In [4]:
import pandas as pd
import numpy as np
import warnings
from statsmodels.tsa.holtwinters import ExponentialSmoothing

# Silence convergence warnings (EXPECTED in business data)
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)

# ===============================================================
# 1. LOAD RAW DATA
# ===============================================================

df = pd.read_csv("vodafone_churn_kpi_raw_2025.csv")

df.columns = (
    df.columns
    .str.strip()
    .str.lower()
    .str.replace(" ", "_")
)

# ===============================================================
# 2. HANDLE COLUMN VARIATIONS
# ===============================================================

for col in ["customer_type", "churn_classification", "churn_type", "customer_segment"]:
    if col in df.columns:
        df = df.rename(columns={col: "customer_type"})
        break

for col in ["churn_value", "actual_values", "actual_value", "value"]:
    if col in df.columns:
        df = df.rename(columns={col: "raw_churn_value"})
        break

# ===============================================================
# 3. PARSE INGESTION DATE
# ===============================================================

df["ingestion_date"] = pd.to_datetime(df["ingestion_date"], dayfirst=True)
df = df[df["ingestion_date"] < "2026-01-01"]

# ===============================================================
# 4. CREATE REPORTING MONTH (MONTH START)
# ===============================================================

df["reporting_month"] = (
    df["ingestion_date"]
    .dt.to_period("M")
    .dt.to_timestamp()
    + pd.offsets.MonthBegin(0)
)

# ===============================================================
# 5. ACTUALS LAYER
# ===============================================================

actuals = (
    df.groupby(
        [
            "reporting_month",
            "ingestion_date",
            "product_group",
            "footprint",
            "customer_type"
        ],
        as_index=False
    )
    .agg(churn_value=("raw_churn_value", "sum"))
)

actuals["reporting_kpi"] = "ACTUAL"
actuals["forecast_date"] = pd.NaT

# ===============================================================
# 6. ROLLING 3-MONTH FORECAST (ROBUST)
# ===============================================================

forecast_rows = []
dimension_keys = ["product_group", "footprint", "customer_type"]

for ingestion_date in sorted(actuals["ingestion_date"].unique()):

    ingestion_month = (
        pd.Timestamp(ingestion_date)
        .to_period("M")
        .to_timestamp()
        + pd.offsets.MonthBegin(0)
    )

    snapshot = actuals[actuals["reporting_month"] <= ingestion_month]

    for dims, group in snapshot.groupby(dimension_keys):

        ts = (
            group.groupby("reporting_month")["churn_value"]
            .sum()
            .sort_index()
        )

        ts.index = pd.DatetimeIndex(ts.index)
        ts = ts.asfreq("MS")

        if len(ts.dropna()) < 4:
            continue

        try:
            model = ExponentialSmoothing(
                ts,
                trend="add",
                seasonal=None,
                initialization_method="estimated"
            ).fit()

            preds = model.forecast(3)

        except Exception:
            continue

        # CRITICAL FIX: HANDLE NaN / INF SAFELY
        preds = (
            pd.to_numeric(preds, errors="coerce")
            .replace([np.inf, -np.inf], np.nan)
            .fillna(0)
            .clip(lower=0)
            .round()
            .astype(int)
        )

        future_months = pd.date_range(
            start=ingestion_month + pd.offsets.MonthBegin(1),
            periods=3,
            freq="MS"
        )

        for m, v in zip(future_months, preds):
            forecast_rows.append({
                "reporting_month": m,
                "ingestion_date": ingestion_date,
                "product_group": dims[0],
                "footprint": dims[1],
                "customer_type": dims[2],
                "churn_value": v,
                "reporting_kpi": "FORECAST",
                "forecast_date": m
            })

forecast_df = pd.DataFrame(forecast_rows)

# ===============================================================
# 7. FINAL REPORTING TABLE
# ===============================================================

final_df = (
    pd.concat([actuals, forecast_df], ignore_index=True)
    .sort_values(["ingestion_date", "reporting_month"])
)

# ===============================================================
# 8. EXPORT
# ===============================================================

final_df.to_csv("churn_forecast_reporting.csv", index=False)

print("churn_forecast_reporting.csv created successfully")


churn_forecast_reporting.csv created successfully
