# Libraries

In [1]:
import pandas as pd
import numpy as np
from prophet import Prophet
from prophet.plot import plot_components_plotly
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, r2_score
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio

  from .autonotebook import tqdm as notebook_tqdm


# 1) Helper error‐metrics

In [2]:
def mape(y_true, y_pred):
	return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def smape(y_true, y_pred):
	denom = (np.abs(y_true) + np.abs(y_pred)) / 2
	return np.mean(np.abs(y_true - y_pred) / denom) * 100

# 2) Prepare your time series + regressors

## Google

In [None]:
def load_data(selected_file):
    dataset_map = {
        "Google (Cleaned)": "df_google_nonlog.csv",
        "Meta (Cleaned)": "df_meta_nonlog.csv",
        "Google (Log-transformed)": "df_google_log.csv",
        "Meta (Log-transformed)": "df_meta_log.csv",
        "Google (ROI Test)" : "df_google_log_ROI.csv",
        "Meta (ROI Test)" : "df_meta_log_ROI.csv"
    }

    selected_label = selected_file  # keep original label
    selected_file = dataset_map[selected_file]  # look up path
    
    df = pd.read_csv(selected_file, low_memory=False)
    df['Date'] = pd.to_datetime(df['Date'], errors="coerce")
    return df, selected_label

df, selected_label = load_data("Google (ROI Test)")

cost_metrics = [
    "Cost",
    "CPM",
    "CPC",
    "CPV",
    "CPVC",
    "CPE",
    "CPCV"
]

if "Google" in selected_label:
    conditions = [
        df['Campaign Stage'] == 'Top Funnel',
        df['Campaign Stage'] == 'Mid Funnel',
        df['Campaign Stage'] == 'Bottom Funnel'
    ]
    kpi = [
        df['Impressions'],
        df['Clicks'],
        df['Conversions']
    ]

else:
    conditions = [
		df['Campaign Stage'] == 'Top Funnel',
		df['Campaign Stage'] == 'Mid Funnel',
		df['Campaign Stage'] == 'Bottom Funnel'
	]
    kpi = [
		df['Impressions'],
		df['Link Clicks'],
		df['Landing Page Views']
	]

df['KPI'] = np.select(conditions, kpi, default=np.nan)
metric = df['KPI']

df['Date'] = pd.to_datetime(df['Date'])

# 3) Train–test split & Future

In [116]:
test_days   = 30    # for back‐testing  
train = ts.iloc[:-test_days].copy()
test  = ts.iloc[-test_days:].copy()

# 4) Fit Prophet with extra regressors

In [117]:
m = Prophet(yearly_seasonality=True, weekly_seasonality=True, changepoint_prior_scale=0.05)
for reg in cost_metrics:
	m.add_regressor(reg)
m.fit(train.rename(columns={'Date':'ds', metric:'y'}))

18:33:34 - cmdstanpy - INFO - Chain [1] start processing
18:33:34 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x2324c115050>

# 5) Forecast

In [118]:
future = m.make_future_dataframe(periods=test_days)
# bring in regressor values for both train+test
future = future.merge(ts.rename(columns={'Date':'ds'})[['ds']+cost_metrics],
                  	on='ds', how='left')
fc = m.predict(future)

# extract just the test‐period forecasts
pred_prophet = (
	fc.set_index('ds')['yhat']
  	.loc[test['Date']]
  	.values
)

# 6) Build XGBoost on raw features

In [119]:
#	here: cost_metrics + simple calendar features
def make_features(df_):
	X = df_[cost_metrics].copy()
	X['dow']   = df_['Date'].dt.dayofweek
	X['month'] = df_['Date'].dt.month
	return X

X_train = make_features(train)
y_train = train[metric]
X_test  = make_features(test)

xgb = XGBRegressor(n_estimators=200, learning_rate=0.05)
xgb.fit(X_train, y_train)
pred_xgb = xgb.predict(X_test)

# 7) Compute metrics

In [120]:
results = []
for name, yhat in [('Prophet', pred_prophet), ('XGBoost', pred_xgb)]:
	y_true = test[metric].values
	results.append({
    	'Model': 	name,
    	'MAE':   	mean_absolute_error(y_true, yhat),
    	'MAPE (%)':  mape(y_true, yhat),
    	'SMAPE (%)': smape(y_true, yhat),
    	'R2':    	r2_score(y_true, yhat)
	})
metrics_df = pd.DataFrame(results)
print(metrics_df.to_string(index=False))

  Model       MAE  MAPE (%)  SMAPE (%)       R2
Prophet 14.247054  5.569853   5.351686 0.601378
XGBoost 16.833588  6.364028   6.369777 0.457867


# 8) Plot 1: Actual vs. Prophet forecast + CI

In [121]:
ci = fc.set_index('ds').loc[test['Date'], ['yhat_lower','yhat_upper']]

fig_forecast = go.Figure([
    go.Scatter(x=train['Date'], y=train[metric], mode='lines', name='Train Actual'),
    go.Scatter(x=test['Date'],  y=test[metric],  mode='lines', name='Test Actual'),
    go.Scatter(x=test['Date'],  y=pred_prophet,      mode='lines', name='Prophet Forecast'),
    go.Scatter(
        x=list(test['Date']) + list(test['Date'][::-1]),
        y=list(ci['yhat_upper']) + list(ci['yhat_lower'][::-1]),
        fill='toself',
        fillcolor='rgba(0,100,80,0.2)',
        line=dict(color='rgba(255,255,255,0)'),
        name='95% CI'
    )
])
fig_forecast.update_layout(
    title=f'{metric}: Prophet Forecast vs Actual',
    xaxis_title='Date', yaxis_title=metric
)
fig_forecast.show()

# 9) Plot 2: Prophet components (trend, weekly, yearly, holidays, regressors)

In [122]:
fig_components = plot_components_plotly(m, fc)
fig_components.show()

# 10) Plot 3: Actual vs. XGBoost

In [123]:
fig_xgb = go.Figure([
    go.Scatter(x=train['Date'], y=train[metric], mode='lines', name='Train Actual'),
    go.Scatter(x=test['Date'],  y=test[metric],  mode='lines', name='Test Actual'),
    go.Scatter(x=test['Date'],  y=pred_xgb,         mode='lines', name='XGBoost Forecast'),
])
fig_xgb.update_layout(
    title=f'{metric}: XGBoost Forecast vs Actual',
    xaxis_title='Date', yaxis_title=metric
)
fig_xgb.show()

# 11) True Forward Cast

In [None]:
# --- after you’ve built `metrics_df` ---
# identify which model had the lower MAPE (%) on the test split
best = metrics_df.sort_values("MAPE (%)").iloc[0]["Model"]  

# find the row with the lowest MAPE (%)
best_row  = metrics_df.loc[metrics_df['MAPE (%)'].idxmin()]

# extract model name and its MAPE (%)
best_model = best_row['Model']
best_MAPE  = best_row['MAPE (%)']

print(f"Best back‑test model: {best_model} with MAPE (%) = {best_MAPE:.3f}")

# define your forecast horizon
future_days = 180
last_date   = ts['Date'].max()

# build a future frame long enough for both
future_all = m.make_future_dataframe(periods=future_days)
future_all = future_all.merge(
    ts.rename(columns={'Date':'ds'})[['ds']+cost_metrics],
    on='ds', how='left'
)
future_all[cost_metrics] = future_all[cost_metrics].ffill().fillna(0)

# Prophet forecast for all future dates
fc_all = m.predict(future_all)
fc_60 = fc_all[fc_all['ds'] > last_date].copy()

# XGBoost forecast for the future (assumes regressors are carried forward)
future_dates = pd.date_range(last_date + pd.Timedelta(1,'D'), periods=future_days)
# grab the last known cost_metrics row and repeat it
last_regs = ts.set_index('Date')[cost_metrics].loc[last_date]
future_regs = pd.DataFrame([last_regs.values]*future_days,
                           index=future_dates, columns=cost_metrics)
df_feat_60 = pd.DataFrame({"Date": future_dates})
df_feat_60 = pd.concat([df_feat_60, future_regs.reset_index(drop=True)], axis=1)
X_feat_60 = make_features(df_feat_60)

pred_xgb_60 = xgb.predict(X_feat_60)

# now branch to whichever was best
if best == "Prophet":
    # use Prophet’s yhat, lower, upper
    forecast_df = fc_60.rename(columns={
        "yhat":       "Forecast",
        "yhat_lower": "Lower CI",
        "yhat_upper": "Upper CI"
    })[['ds','Forecast','Lower CI','Upper CI']]
else:
    # use XGBoost — no CI
    forecast_df = pd.DataFrame({
        'ds':       future_dates,
        'Forecast': pred_xgb_60
    })

# final Plotly chart
fig = go.Figure([
    go.Scatter(x=ts['Date'], y=ts[metric], mode='lines', name='Historical'),
    go.Scatter(x=forecast_df['ds'], y=forecast_df['Forecast'],
               mode='lines', name=f'{future_days}‑Day {best} Forecast')
])

if best == "Prophet":
    fig.add_trace(go.Scatter(
        x=list(forecast_df['ds']) + list(forecast_df['ds'][::-1]),
        y=list(forecast_df['Upper CI']) + list(forecast_df['Lower CI'][::-1]),
        fill='toself', fillcolor='rgba(0,100,80,0.2)',
        line=dict(color='rgba(255,255,255,0)'), name='95% CI'
    ))

fig.update_layout(
    title=f"{metric}: {future_days}‑Day Forecast by {best}",
    xaxis_title='Date', yaxis_title=metric
)
fig.show()

Best back‑test model: Prophet with MAPE (%) = 5.570
