In [2]:
# 02_forecasting.ipynb
# --------------------
# Forecast monthly sales per territory with Prophet (fallback: ARIMA)
# Evaluation: RMSE + MAPE (cross-validation)

import pandas as pd, numpy as np
from prophet import Prophet
from sklearn.metrics import mean_squared_error
from math import sqrt

# ARIMA fallback
import warnings
warnings.filterwarnings("ignore")
from statsmodels.tsa.arima.model import ARIMA

# ---------------- Helper metrics ----------------
def mape(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    mask = y_true != 0
    if mask.sum() == 0:
        return np.nan
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100

# ---------------- Load & prepare ----------------
df = pd.read_csv("Data1/cleaned_sales.csv", parse_dates=["orderdate"])
df.columns = [c.lower() for c in df.columns]

monthly = (df.assign(ds=df["orderdate"].dt.to_period("M").dt.to_timestamp())
             .groupby(["territory","ds"], as_index=False)["sales"].sum()
             .rename(columns={"sales":"y"}))

territories = monthly["territory"].dropna().unique().tolist()
horizon_months = 12

# ---------------- Storage ----------------
all_cv, all_fc = [], []

# ---------------- Loop over territories ----------------
for terr in territories:
    tdf = monthly[monthly["territory"]==terr].sort_values("ds")
    
    # skip territories with too little history
    if len(tdf) < 15 or tdf["y"].sum() <= 0 or tdf["y"].nunique() <= 1:
        print(f"Skipping {terr}: insufficient or constant sales history.")
        continue
    
    try:
        # Prophet model
        m = Prophet(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=False,
                    n_changepoints=5, changepoint_prior_scale=0.5)
        m.fit(tdf[["ds","y"]], algorithm="Newton")  # force Newton optimizer
        
        # Rolling window CV: last 12 months
        cv_windows = []
        ds_vals = tdf["ds"].unique()
        for split_end in range(len(ds_vals)-12, len(ds_vals)-3, 3):
            train = tdf.iloc[:split_end]
            test  = tdf.iloc[split_end:split_end+3][["ds","y"]]
            
            m_cv = Prophet(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=False,
                           n_changepoints=5, changepoint_prior_scale=0.5)
            m_cv.fit(train[["ds","y"]], algorithm="Newton")
            
            f_test = m_cv.predict(test[["ds"]])[["ds","yhat"]]
            rmse = sqrt(mean_squared_error(test["y"], f_test["yhat"]))
            cv_mape = mape(test["y"], f_test["yhat"])
            all_cv.append({"territory":terr,"window_end":ds_vals[split_end-1],
                           "rmse":rmse,"mape":cv_mape})
        
        # Final 12M forecast
        future = m.make_future_dataframe(periods=horizon_months, freq="M")
        fc = m.predict(future)[["ds","yhat","yhat_lower","yhat_upper"]]
        fc["territory"] = terr
        all_fc.append(fc.assign(model="Prophet"))
    
    except Exception as e:
        print(f"Prophet failed for {terr}, fallback to ARIMA. Error: {e}")
        try:
            # ARIMA fallback
            series = tdf.set_index("ds")["y"]
            model = ARIMA(series, order=(1,1,1))
            model_fit = model.fit()
            
            # Rolling CV (last 12 months)
            for split_end in range(len(series)-12, len(series)-3, 3):
                train = series.iloc[:split_end]
                test  = series.iloc[split_end:split_end+3]
                try:
                    arima_model = ARIMA(train, order=(1,1,1)).fit()
                    pred = arima_model.forecast(steps=3)
                    rmse = sqrt(mean_squared_error(test, pred))
                    cv_mape = mape(test, pred)
                    all_cv.append({"territory":terr,"window_end":test.index[-1],
                                   "rmse":rmse,"mape":cv_mape})
                except:
                    continue
            
            # Forecast next 12M
            forecast = model_fit.forecast(steps=horizon_months)
            fc = pd.DataFrame({
                "ds": pd.date_range(series.index[-1]+pd.offsets.MonthBegin(),
                                    periods=horizon_months, freq="M"),
                "yhat": forecast.values,
                "yhat_lower": forecast.values*0.9,  # rough bands
                "yhat_upper": forecast.values*1.1,
                "territory": terr,
                "model":"ARIMA"
            })
            all_fc.append(fc)
        
        except Exception as e2:
            print(f"ARIMA also failed for {terr}: {e2}")
            continue

# ---------------- Metrics summary ----------------
cv_df = pd.DataFrame(all_cv)
fc_df = pd.concat(all_fc, ignore_index=True) if all_fc else pd.DataFrame()

metrics = (cv_df.groupby("territory", as_index=False)
                 .agg(cv_rmse=("rmse","mean"), cv_mape=("mape","mean"))
                 .sort_values("cv_mape"))

fc_eval = fc_df.merge(metrics, on="territory", how="left")

# ---------------- Save outputs ----------------
metrics.to_csv("forecast_cv_metrics_by_territory.csv", index=False)
fc_eval.to_csv("forecast_next12m_by_territory.csv", index=False)

print("✅ Forecasting complete. Metrics and forecasts saved in 'outputs/' folder.")


07:50:55 - cmdstanpy - INFO - Chain [1] start processing
07:50:55 - cmdstanpy - INFO - Chain [1] done processing
07:50:55 - cmdstanpy - ERROR - Chain [1] error: terminated by signal 3221225657 


Prophet failed for APAC, fallback to ARIMA. Error: Error during optimization! Command 'C:\Users\saubh\AppData\Local\Programs\Python\Python310\Lib\site-packages\prophet\stan_model\prophet_model.bin random seed=94394 data file=C:\Users\saubh\AppData\Local\Temp\tmpbnuo2gob\ir1_ggdu.json init=C:\Users\saubh\AppData\Local\Temp\tmpbnuo2gob\_stbdx5w.json output file=C:\Users\saubh\AppData\Local\Temp\tmpbnuo2gob\prophet_modelpwjmhvg2\prophet_model-20250819075055.csv method=optimize algorithm=newton iter=10000' failed: 


07:50:55 - cmdstanpy - INFO - Chain [1] start processing
07:50:55 - cmdstanpy - INFO - Chain [1] done processing
07:50:55 - cmdstanpy - ERROR - Chain [1] error: terminated by signal 3221225657 


Prophet failed for EMEA, fallback to ARIMA. Error: Error during optimization! Command 'C:\Users\saubh\AppData\Local\Programs\Python\Python310\Lib\site-packages\prophet\stan_model\prophet_model.bin random seed=12608 data file=C:\Users\saubh\AppData\Local\Temp\tmpbnuo2gob\lv5mehym.json init=C:\Users\saubh\AppData\Local\Temp\tmpbnuo2gob\7ss5odjq.json output file=C:\Users\saubh\AppData\Local\Temp\tmpbnuo2gob\prophet_modelr_fqia3t\prophet_model-20250819075055.csv method=optimize algorithm=newton iter=10000' failed: 
Skipping Japan: insufficient or constant sales history.
✅ Forecasting complete. Metrics and forecasts saved in 'outputs/' folder.
