In [None]:

# 0. Google Drive & library installs


!pip install -q wandb prophet scikit-learn pandas numpy matplotlib holidays optuna

from google.colab import drive
import os, joblib, warnings, math, holidays, optuna
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime

import wandb
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_squared_error
import logging
logging.getLogger('cmdstanpy').setLevel(logging.WARNING)
logging.getLogger('prophet').setLevel(logging.WARNING)

warnings.filterwarnings("ignore")

# 1. Drive + env setup

drive.mount('/content/drive')
%cd /content/drive/MyDrive/ML_FInal_Project
!pip install -q wandb prophet scikit-learn pandas numpy matplotlib holidays optuna
os.makedirs("models", exist_ok=True)

SEED = 42
np.random.seed(SEED)


# 2. WandB initialisation

wandb.login()
run = wandb.init(
    project = "walmart-sales-forecasting",
    entity = "lkata22-free-university-of-tbilisi-",
    name = "Prophet_train_test_v1",
    group = "Prophet",
    config = {
        "random_seed": SEED,
        "changepoint_prior_scale": 0.15,
        "seasonality_prior_scale": 10.0,
        "holidays_prior_scale": 10.0,
        "yearly_seasonality": True,
        "weekly_seasonality": True,
        "daily_seasonality": False,
        "seasonality_mode": "multiplicative"
    }
)
config = wandb.config


# 3. Data loading & merge

DATA_PATH = "data"
train = pd.read_csv(f"{DATA_PATH}/train.csv")
features = pd.read_csv(f"{DATA_PATH}/features.csv")
stores = pd.read_csv(f"{DATA_PATH}/stores.csv")

raw_df = (train
    .merge(features, on=["Store","Date","IsHoliday"], how="left")
    .merge(stores, on="Store", how="left")
)


# 4. Feature engineering helper (minimal for Prophet)

us_holidays = holidays.US()

def create_prophet_features(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    df["Date"] = pd.to_datetime(df["Date"])
    df["IsHoliday"] = df["IsHoliday"].astype(int)
    # Prophet expects columns: ds (date), y (target)
    df = df.rename(columns={"Date": "ds", "Weekly_Sales": "y"})
    return df


# 5. Time-based 80/20 train-test split

df = create_prophet_features(raw_df)
cutoff = df["ds"].quantile(0.8)
train_df = df[df["ds"] <= cutoff]
test_df = df[df["ds"] > cutoff]


# 6. Prophet training with live logging


results = []
stores_depts = train_df[["Store", "Dept"]].drop_duplicates().values

for store, dept in stores_depts:
    print(f"Training Prophet for Store {store}, Dept {dept}")
    train_sd = train_df[(train_df["Store"] == store) & (train_df["Dept"] == dept)]
    test_sd = test_df[(test_df["Store"] == store) & (test_df["Dept"] == dept)]
    if len(train_sd) < 52:
        continue
    if test_sd.empty:
        continue


    holidays_df = pd.DataFrame([
        {"holiday": "us_holiday", "ds": d, "lower_window": 0, "upper_window": 1}
        for d in us_holidays if train_sd["ds"].min() <= pd.to_datetime(d) <= train_sd["ds"].max()
    ])

    m = Prophet(
        yearly_seasonality=config.yearly_seasonality,
        weekly_seasonality=config.weekly_seasonality,
        daily_seasonality=config.daily_seasonality,
        changepoint_prior_scale=config.changepoint_prior_scale,
        seasonality_prior_scale=config.seasonality_prior_scale,
        holidays_prior_scale=config.holidays_prior_scale,
        seasonality_mode=config.seasonality_mode,
        holidays=holidays_df if not holidays_df.empty else None
    )

    # Fit
    m.fit(train_sd[["ds", "y"]])

    # Predict
    future = test_sd[["ds"]].copy()
    forecast = m.predict(future)
    y_true = test_sd["y"].values
    y_pred = forecast["yhat"].values

    # Metrics
    mae = mean_absolute_error(y_true, y_pred)
    rmse = math.sqrt(mean_squared_error(y_true, y_pred))
    weights = np.where(test_sd["IsHoliday"].values==1, 5, 1)
    wmae = np.sum(weights * np.abs(y_true - y_pred)) / weights.sum()

    # Log to WandB
    wandb.log({
        "store": store,
        "dept": dept,
        "mae": mae,
        "rmse": rmse,
        "wmae": wmae
    })

    results.append({
        "Store": store,
        "Dept": dept,
        "mae": mae,
        "rmse": rmse,
        "wmae": wmae
    })


    model_path = f"models/prophet_store{store}_dept{dept}.pkl"
    joblib.dump(m, model_path)


# 7. Aggregate results

results_df = pd.DataFrame(results)
print("Prophet Results (Store-Dept):")
print(results_df.describe())

wandb.log({
    "Prophet_mae_mean": results_df["mae"].mean(),
    "Prophet_rmse_mean": results_df["rmse"].mean(),
    "Prophet_wmae_mean": results_df["wmae"].mean()
})


# 8. Save artefacts

results_df.to_csv("models/prophet_results.csv", index=False)
art = wandb.Artifact("prophet_results", type="results")
art.add_file("models/prophet_results.csv")
wandb.log_artifact(art)

wandb.finish()
print("✅ Prophet run complete - metrics & curves logged to WandB.")



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/ML_FInal_Project




0,1
dept,▂▄▄▅▆▁▂▂▂▂▂▃▃▄▄▇▇▂▃▆▆█▁▂▂▂▂▅▇▁▃▅██▁▁▂▃▄▄
mae,▂▁▂█▂▁▄▁▁▂▁▂▁█▂▂▂▁▁▂▁▃▄▂▂▂▄▂▂▂▂▂▁▁▂▁▁▂▄▂
rmse,▃▂▂▁▂▄▆▂▂▁▃▄▂▁▃▁▁▁▁▁▂▄▃▂▁▂▂█▅▂▁▁▁▁▁▁▃▂▁▂
store,▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▅▇▇▇▇██
wmae,▃▄▅▂▂▂▂▃▁▄▅▁▃▁▂▄▄▃▁▂▁▁▅▃▁▂▃▁▂▂▁▁▁▆▂▁▄▃▂█

0,1
dept,44.0
mae,780.30982
rmse,946.84305
store,6.0
wmae,687.7779


Training Prophet for Store 1, Dept 1
Training Prophet for Store 1, Dept 2
Training Prophet for Store 1, Dept 3
Training Prophet for Store 1, Dept 4
Training Prophet for Store 1, Dept 5
Training Prophet for Store 1, Dept 6
Training Prophet for Store 1, Dept 7
Training Prophet for Store 1, Dept 8
Training Prophet for Store 1, Dept 9
Training Prophet for Store 1, Dept 10
Training Prophet for Store 1, Dept 11
Training Prophet for Store 1, Dept 12
Training Prophet for Store 1, Dept 13
Training Prophet for Store 1, Dept 14
Training Prophet for Store 1, Dept 16
Training Prophet for Store 1, Dept 17
Training Prophet for Store 1, Dept 18
Training Prophet for Store 1, Dept 19
Training Prophet for Store 1, Dept 20
Training Prophet for Store 1, Dept 21
Training Prophet for Store 1, Dept 22
Training Prophet for Store 1, Dept 23
Training Prophet for Store 1, Dept 24
Training Prophet for Store 1, Dept 25
Training Prophet for Store 1, Dept 26
Training Prophet for Store 1, Dept 27
Training Prophet for 

0,1
Prophet_mae_mean,▁
Prophet_rmse_mean,▁
Prophet_wmae_mean,▁
dept,▂▆▁█▂▄█▂▃▃▂▂▇▇▃█▅▇▇▆▃▄█▃▆▅▇▁█▅▂▂▁▅▄▇▄▂▃▂
mae,▁▄▃▂▃▆▂▂█▅▁▂▂▇▃▂▂▂▂▂▃▂▁▁▂▁▁▁▁▂▂▂▂▁▃▁▁▁▂▃
rmse,▆▄▂▅▃▄▁▃▁▂▆▂▂▆▂▄▃▆▅▂▁▂▆▅▃█▄▂▂▂▁▂▂▁▂▂▃▄▁█
store,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇████
wmae,▂▁▂▂▅▂▂█▄▁▂▃▁▁▂▃▃▃█▂▃▄▂▁▄▂▃▂▂▄▁▃▁▁▁▆▂▂▂▁

0,1
Prophet_mae_mean,1755.1972
Prophet_rmse_mean,2231.40133
Prophet_wmae_mean,1754.33578
dept,98.0
mae,599.25536
rmse,654.59843
store,45.0
wmae,575.8478


✅ Prophet run complete - metrics & curves logged to WandB.
