In [1]:
# 📦 Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings("ignore")

In [2]:
# 📂 Load Data
df = pd.read_csv("./demand-forecasting-kernels-only/train.csv")
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values(['store', 'item', 'date'])
df.set_index('date', inplace=True)

In [3]:
# 📊 Feature Engineering
def add_features(data):
    data['day'] = data.index.day
    data['month'] = data.index.month
    data['dayofweek'] = data.index.dayofweek
    for lag in [1, 7, 14]:
        data[f'sales_lag_{lag}'] = data['sales'].shift(lag)
    return data


In [4]:
results = []

store_list = df['store'].unique()
item_list = df['item'].unique()


In [5]:
for store in store_list:
    for item in item_list:
        df_filtered = df[(df['store'] == store) & (df['item'] == item)].copy()
        df_filtered = add_features(df_filtered)
        df_filtered.dropna(inplace=True)

        if len(df_filtered) < 60:
            continue  # skip short series

        # Split
        train = df_filtered.iloc[:-30]
        test = df_filtered.iloc[-30:]

        X_train = train.drop(['sales', 'store', 'item'], axis=1)
        y_train = train['sales']
        X_test = test.drop(['sales', 'store', 'item'], axis=1)
        y_test = test['sales']

        # Model
        model = GradientBoostingRegressor(n_estimators=200, learning_rate=0.1, max_depth=4, random_state=42)
        model.fit(X_train, y_train)
        preds = model.predict(X_test)

        test = test.copy()
        test['predicted_sales'] = preds
        test['store'] = store
        test['item'] = item
        results.append(test[['store', 'item', 'sales', 'predicted_sales']])

In [6]:
# 🔁 Loop Through Store-Item Combos



# 📈 Combine & Save
forecast_df = pd.concat(results)
forecast_df.to_csv("forecast_gb_all.csv")
forecast_df.head()


Unnamed: 0_level_0,store,item,sales,predicted_sales
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-12-02,1,1,16,17.797357
2017-12-03,1,1,31,16.437855
2017-12-04,1,1,7,10.524672
2017-12-05,1,1,20,11.522503
2017-12-06,1,1,17,14.824978


In [7]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


In [8]:
rmse = np.sqrt(mean_squared_error(y_test, preds))
mae = mean_absolute_error(y_test, preds)
r2 = r2_score(y_test, preds)


In [9]:
rmse

np.float64(10.509850658674235)

In [10]:
mae

8.8235891544386

In [11]:
r2

-0.01679739775257727

In [14]:
print(f"📈 RMSE: {rmse:.2f}")
print(f"📊 MAE: {mae:.2f}")
print(f"🧠 R² Score: {r2:.4f}")

📈 RMSE: 10.51
📊 MAE: 8.82
🧠 R² Score: -0.0168
