# 03 - Model Training

This notebook retrains Ridge using forecast-safe features.

In [1]:
# 📦 Imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error, mean_squared_error
import joblib

In [2]:
# 📂 Load dataset
df = pd.read_csv("../data/processed/retail_sales_features.csv")
df['date'] = pd.to_datetime(df['date'])
df.head()

Unnamed: 0,date,item,category,price,promotion,units_sold,sales_7d_avg,day_of_week,is_weekend,is_holiday,brand,discount,stock_available,stockout_flag,sales_lag_1,sales_lag_3,sales_lag_7,sales_30d_avg,sales_30d_trend,month
0,2023-10-01,0,0,220,0,45,0.0,6,1,0,2,0.12,51,0,0.0,0.0,0.0,0.0,0.0,10
1,2023-10-02,0,0,200,0,51,45.0,0,0,0,2,0.2,52,0,45.0,0.0,0.0,45.0,0.0,10
2,2023-10-03,0,0,200,0,45,48.0,1,0,0,2,0.2,51,0,51.0,0.0,0.0,48.0,0.0,10
3,2023-10-04,0,0,200,0,55,47.0,2,0,1,2,0.2,63,0,45.0,45.0,0.0,47.0,0.0,10
4,2023-10-05,0,0,250,0,53,49.0,3,0,0,2,0.0,57,0,55.0,51.0,0.0,49.0,0.0,10


## 🔄 Retraining Ridge Model with Forecast-Safe Features

In [3]:
# 🎯 Define forecast-safe features only
forecast_safe_features = ['item', 'category', 'price', 'promotion', 'discount',
                          'brand', 'day_of_week', 'month', 'is_weekend', 'is_holiday']
X_fs = df[forecast_safe_features]
y_fs = df['units_sold']

X_train_fs, X_test_fs, y_train_fs, y_test_fs = train_test_split(X_fs, y_fs, test_size=0.2, shuffle=False)

In [4]:
# 🔁 Retrain Ridge on forecast-safe features
ridge_forecast_model = Ridge(alpha=1.0)
ridge_forecast_model.fit(X_train_fs, y_train_fs)

# Save the retrained model
joblib.dump(ridge_forecast_model, "../models/ridge_forecast_model.pkl")
print("✅ Forecast-safe Ridge model saved to ../models/ridge_forecast_model.pkl")

✅ Forecast-safe Ridge model saved to ../models/ridge_forecast_model.pkl
