# Sanity check - Rolling method efficiency
Will test how well the agent preforms using different rolling methods to understand if exists any superior method.
Will also test on a classification problem from kaggle

## Final Report 

* The highest rewards are consistently when testing on rolling_median (column 2), regardless of training regime (all around -1036).

* When testing on rolling_weighted_mean or rolling_winsorized_mean, rewards are much lower (more negative), suggesting those regimes are harder or the agent doesn’t generalize as well to them.

* Training regime has less effect than testing regime—all training regimes seem to generalize similarly to testing on rolling_median.


* rolling_median is the easiest regime for the agent to perform on (highest rewards across all rows in that column).

* rolling_weighted_mean and rolling_winsorized_mean are harder regimes or less well captured by your agent (much lower rewards).

* Agents trained on any regime do about equally well when tested on the rolling_median regime.

In [1]:
import pandas as pd

# Load datasets
FOLDER = 'data/experiments/kaggle-sales-prediction/'
train = pd.read_csv(FOLDER+'train.csv', parse_dates=['date'])
test = pd.read_csv(FOLDER+'test.csv', parse_dates=['date'])
stores = pd.read_csv(FOLDER+'stores.csv')
holidays = pd.read_csv(FOLDER+'holidays_events.csv', parse_dates=['date'])
transactions = pd.read_csv(FOLDER+'transactions.csv', parse_dates=['date'])

# Merge store information
train = train.merge(stores, on='store_nbr', how='left')
test = test.merge(stores, on='store_nbr', how='left')

# Merge holiday information
holidays = holidays[holidays['locale'] == 'National']
holidays = holidays[['date', 'description']].drop_duplicates()
holidays['is_holiday'] = 1
train = train.merge(holidays[['date', 'is_holiday']], on='date', how='left')
test = test.merge(holidays[['date', 'is_holiday']], on='date', how='left')
train['is_holiday'].fillna(0, inplace=True)
test['is_holiday'].fillna(0, inplace=True)

# Merge transactions
train = train.merge(transactions, on=['date', 'store_nbr'], how='left')
test = test.merge(transactions, on=['date', 'store_nbr'], how='left')


  from pandas.core import (
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train['is_holiday'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test['is_holiday'].fillna(0, inplace=True)


In [2]:
for df in [train, test]:
    df['dayofweek'] = df['date'].dt.dayofweek
    df['month'] = df['date'].dt.month
    df['day'] = df['date'].dt.day
    df['week'] = df['date'].dt.isocalendar().week.astype(int)
    df['year'] = df['date'].dt.year
    df['is_weekend'] = df['dayofweek'].isin([5,6]).astype(int)
    df['is_month_start'] = df['date'].dt.is_month_start.astype(int)
    df['is_month_end'] = df['date'].dt.is_month_end.astype(int)
    df['store_dow'] = df['store_nbr'].astype(str) + "_" + df['dayofweek'].astype(str)
    df['family_month'] = df['family'].astype(str) + "_" + df['month'].astype(str)

    #df['store_dow'] = df['store_dow'].astype('category').cat.codes
    #df['family_month'] = df['family_month'].astype('category').cat.codes

In [3]:
from sklearn.preprocessing import OrdinalEncoder



oe = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1)
train['store_dow'] = oe.fit_transform(train[['store_dow']])
test['store_dow'] = oe.transform(test[['store_dow']])
train['family_month'] = oe.fit_transform(train[['family_month']])
test['family_month'] = oe.transform(test[['family_month']])

In [4]:
test

Unnamed: 0,id,date,store_nbr,family,onpromotion,city,state,type,cluster,is_holiday,...,dayofweek,month,day,week,year,is_weekend,is_month_start,is_month_end,store_dow,family_month
0,3000888,2017-08-16,1,AUTOMOTIVE,0,Quito,Pichincha,D,13,0.0,...,2,8,16,33,2017,0,0,0,72.0,10.0
1,3000889,2017-08-16,1,BABY CARE,0,Quito,Pichincha,D,13,0.0,...,2,8,16,33,2017,0,0,0,72.0,22.0
2,3000890,2017-08-16,1,BEAUTY,2,Quito,Pichincha,D,13,0.0,...,2,8,16,33,2017,0,0,0,72.0,34.0
3,3000891,2017-08-16,1,BEVERAGES,20,Quito,Pichincha,D,13,0.0,...,2,8,16,33,2017,0,0,0,72.0,46.0
4,3000892,2017-08-16,1,BOOKS,0,Quito,Pichincha,D,13,0.0,...,2,8,16,33,2017,0,0,0,72.0,58.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28507,3029395,2017-08-31,9,POULTRY,1,Quito,Pichincha,B,6,0.0,...,3,8,31,35,2017,0,0,1,374.0,346.0
28508,3029396,2017-08-31,9,PREPARED FOODS,0,Quito,Pichincha,B,6,0.0,...,3,8,31,35,2017,0,0,1,374.0,358.0
28509,3029397,2017-08-31,9,PRODUCE,1,Quito,Pichincha,B,6,0.0,...,3,8,31,35,2017,0,0,1,374.0,370.0
28510,3029398,2017-08-31,9,SCHOOL AND OFFICE SUPPLIES,9,Quito,Pichincha,B,6,0.0,...,3,8,31,35,2017,0,0,1,374.0,382.0


In [5]:
import xgboost as xgb
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_log_error
import time
import os


import numpy as np
from scipy.stats import mstats

def rolling_mean(x):
    if len(x) == 0 or np.all(np.isnan(x)):
        return np.nan
    return np.nanmean(x)

def rolling_median(x):
    if len(x) == 0 or np.all(np.isnan(x)):
        return np.nan
    return np.nanmedian(x)

def rolling_weighted_mean(x): 
    if len(x) == 0 or np.all(np.isnan(x)):
        return np.nan
    weights = np.arange(1, len(x)+1)
    # Remove NaNs before weighting
    valid = ~np.isnan(x)
    if np.sum(valid) == 0:
        return np.nan
    return np.average(x[valid], weights=weights[valid])

def rolling_winsorized_mean(x):
    if len(x) < 2 or np.all(np.isnan(x)):
        return np.nan
    # Remove NaNs before applying winsorization
    x_clean = x[~np.isnan(x)]
    if len(x_clean) < 2:
        return np.nan
    return mstats.winsorize(x_clean, limits=(0.1, 0.1)).mean()

    
regimes = {
    'rolling_mean': rolling_mean,
    'rolling_median': rolling_median,
    'rolling_weighted_mean': rolling_weighted_mean,
    'rolling_winsorized_mean': rolling_winsorized_mean,
}

results = {}

_train = train.copy()

for train_name, train_func in regimes.items():
    train_model = _train.copy()
    print(train_name)
    
    
    
    def add_lag_rolling(df, lag_days=[7, 14], rolling_windows=[7, 14]):
        file_path = FOLDER+train_name+'.csv'
        if os.path.exists(file_path):
            cached_df = pd.read_csv(file_path)
            if not cached_df.empty:
                print(f"Loaded cached features from {file_path}")
                return cached_df
        df = df.sort_values(['store_nbr', 'family', 'date'])
        for lag in lag_days:
            df[f'lag_{lag}'] = df.groupby(['store_nbr', 'family'])['sales'].shift(lag)
            df[f'lag_{lag}'] = np.log1p(df[f'lag_{lag}'])
            #df[f'lag_{lag}'] = df.groupby(['store_nbr', 'family'])['sales'].shift(lag)

        for window in rolling_windows:
            # Instead of default rolling mean, apply the custom regime function
            df[f'rolling_mean_{window}'] = (
                df.groupby(['store_nbr', 'family'])['sales']
                  .transform(lambda x: x.shift(1).rolling(window, min_periods=1).apply(train_func, raw=True))
            )
            df[f'rolling_std_{window}'] = df.groupby(['store_nbr', 'family'])['sales'].shift(1).rolling(window, min_periods=1).std()
        df.to_csv(file_path,index=False)
        return df
    print('Will add lag')
    start = time.time()
    train_model = add_lag_rolling(train_model)
  
    end = time.time()
    print(f"Execution time: {end - start:.2f} seconds")
    print('Did add lag',train_model.columns)
    # Drop NA (caused by rolling/lag)
    train_model = train_model.dropna()

    # Feature columns
    features = [
        'store_nbr', 'family', 'dayofweek', 'month', 'day', 'week',
        'is_holiday',
        'lag_7', 'lag_14',
        'rolling_mean_7', 'rolling_std_7',
        'rolling_mean_14', 'rolling_std_14',"year",
"is_weekend",
"is_month_start",
"is_month_end","store_dow",
"family_month"
    ]
    # Encode categorical
    train_model['family'] = train_model['family'].astype('category').cat.codes

    X = train_model[features]
    #y = train_model['sales']
    y = np.log1p(train_model['sales'])
    print('Will split')
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, shuffle=False)
    print('Start')
    model = xgb.XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=6)
    start = time.time()
   
  
 
    model.fit(X_train, y_train)
    end = time.time()
    print(f"Execution time: {end - start:.2f} seconds")
    #y_pred = model.predict(X_val)
    #y_pred = np.clip(y_pred, 0, None)  # set all values < 0 to 0
    #y_pred = np.nan_to_num(np.where(np.isinf(y_pred), np.nan, y_pred), nan=0.0)
    y_pred = np.expm1(model.predict(X_val))
    print('RMSLE:', np.sqrt(mean_squared_log_error(y_val, y_pred)))
    results[train_name]=np.sqrt(mean_squared_log_error(y_val, y_pred))

rolling_mean
Will add lag
Execution time: 524.38 seconds
Did add lag Index(['id', 'date', 'store_nbr', 'family', 'sales', 'onpromotion', 'city',
       'state', 'type', 'cluster', 'is_holiday', 'transactions', 'dayofweek',
       'month', 'day', 'week', 'year', 'is_weekend', 'is_month_start',
       'is_month_end', 'store_dow', 'family_month', 'lag_7', 'lag_14',
       'rolling_mean_7', 'rolling_std_7', 'rolling_mean_14', 'rolling_std_14'],
      dtype='object')
Will split
Start
Execution time: 14.27 seconds
RMSLE: 3.1478369102935444
rolling_median
Will add lag


KeyboardInterrupt: 

In [None]:
X_train

### What’s a Good Score?
For Store Sales (Kaggle):

🥉 RMSLE > 0.60 → Baseline or simple model

🥈 RMSLE ≈ 0.45 → Reasonable with lags + rolling + calendar features

🥇 RMSLE < 0.40 → Competitive (you’re doing very well)

🏆 RMSLE < 0.38 → Likely leaderboard top 10%

