In [2]:
# ============================================================
# ADVANCED TIME SERIES FORECASTING
# Prophet with External Regressors vs SARIMA
# SINGLE CELL – ERROR FREE
# ============================================================

import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

from prophet import Prophet
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_absolute_error, mean_squared_error

# ------------------------------------------------------------
# 1. SYNTHETIC DATA GENERATION (4 YEARS, DAILY)
# ------------------------------------------------------------

np.random.seed(42)

dates = pd.date_range(start="2019-01-01", end="2022-12-31", freq="D")
n = len(dates)

trend = np.linspace(50, 120, n)
weekly = 8 * np.sin(2 * np.pi * dates.dayofweek / 7)
yearly = 15 * np.sin(2 * np.pi * dates.dayofyear / 365)
noise = np.random.normal(0, 4, n)

marketing_spend = np.random.gamma(5, 2, n)
temperature = 20 + 10 * np.sin(2 * np.pi * dates.dayofyear / 365)
promotion = np.random.binomial(1, 0.1, n)

y = (
    trend
    + weekly
    + yearly
    + 0.6 * marketing_spend
    + 0.4 * temperature
    + 12 * promotion
    + noise
)

df = pd.DataFrame({
    "ds": dates,
    "y": y,
    "marketing_spend": marketing_spend,
    "temperature": temperature,
    "promotion": promotion
})

# ------------------------------------------------------------
# 2. TRAIN–TEST SPLIT (TIME AWARE)
# ------------------------------------------------------------

split = int(len(df) * 0.8)
train_df = df.iloc[:split]
test_df = df.iloc[split:]

# ------------------------------------------------------------
# 3. SARIMA BASELINE
# ------------------------------------------------------------

sarima = SARIMAX(
    train_df["y"],
    order=(1, 1, 1),
    seasonal_order=(1, 1, 1, 7),
    enforce_stationarity=False,
    enforce_invertibility=False
)

sarima_fit = sarima.fit(disp=False)
sarima_forecast = sarima_fit.forecast(len(test_df))

sarima_mae = mean_absolute_error(test_df["y"], sarima_forecast)
sarima_mse = mean_squared_error(test_df["y"], sarima_forecast)
sarima_rmse = np.sqrt(sarima_mse)
sarima_mape = np.mean(np.abs((test_df["y"] - sarima_forecast) / test_df["y"])) * 100

# ------------------------------------------------------------
# 4. PROPHET WITH EXTERNAL REGRESSORS
# ------------------------------------------------------------

prophet = Prophet(
    yearly_seasonality=True,
    weekly_seasonality=True,
    daily_seasonality=False,
    changepoint_prior_scale=0.15,
    seasonality_prior_scale=10,
    seasonality_mode="additive"
)

prophet.add_regressor("marketing_spend")
prophet.add_regressor("temperature")
prophet.add_regressor("promotion")

prophet.fit(train_df)

future = test_df.drop(columns=["y"])
forecast = prophet.predict(future)

prophet_preds = forecast["yhat"]

prophet_mae = mean_absolute_error(test_df["y"], prophet_preds)
prophet_mse = mean_squared_error(test_df["y"], prophet_preds)
prophet_rmse = np.sqrt(prophet_mse)
prophet_mape = np.mean(np.abs((test_df["y"] - prophet_preds) / test_df["y"])) * 100

# ------------------------------------------------------------
# 5. COMPARISON RESULTS
# ------------------------------------------------------------

results = pd.DataFrame({
    "Model": ["SARIMA", "Prophet + External Regressors"],
    "MAE": [sarima_mae, prophet_mae],
    "RMSE": [sarima_rmse, prophet_rmse],
    "MAPE (%)": [sarima_mape, prophet_mape]
})

print("\nMODEL COMPARISON RESULTS\n")
print(results)

# ------------------------------------------------------------
# 6. REGRESSOR IMPACT ANALYSIS
# ------------------------------------------------------------

regressor_impact = forecast[
    ["marketing_spend", "temperature", "promotion"]
].corrwith(forecast["yhat"])

print("\nEXTERNAL REGRESSOR INFLUENCE\n")
print(regressor_impact)



MODEL COMPARISON RESULTS

                           Model        MAE       RMSE   MAPE (%)
0                         SARIMA  34.947115  39.907088  29.439774
1  Prophet + External Regressors   3.133687   3.933316        NaN

EXTERNAL REGRESSOR INFLUENCE

marketing_spend    0.208117
temperature        0.821662
promotion          0.295978
dtype: float64
