# Import libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
        
from scipy.stats import skew,norm,zscore
from scipy.signal import periodogram

from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.deterministic import DeterministicProcess, CalendarFourier

from sklearn.model_selection import train_test_split, cross_val_score, TimeSeriesSplit, GridSearchCV, cross_validate
from sklearn.metrics import mean_squared_error, make_scorer, mean_squared_log_error, mean_absolute_error, mean_absolute_percentage_error
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.neural_network import MLPRegressor
from sklearn.decomposition import PCA
from random import shuffle
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_log_error as msle
from tqdm import tqdm

from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import Pool, CatBoostRegressor

import optuna

# Loading the dataset

In [2]:
holidays_events = pd.read_csv("/kaggle/input/store-sales-time-series-forecasting/holidays_events.csv", parse_dates=['date'])
oil = pd.read_csv("/kaggle/input/store-sales-time-series-forecasting/oil.csv", parse_dates=['date'])
stores = pd.read_csv("/kaggle/input/store-sales-time-series-forecasting/stores.csv")
transactions = pd.read_csv("/kaggle/input/store-sales-time-series-forecasting/transactions.csv", parse_dates=['date'])

test = pd.read_csv("/kaggle/input/store-sales-time-series-forecasting/test.csv", parse_dates=['date'])
train = pd.read_csv("/kaggle/input/store-sales-time-series-forecasting/train.csv", parse_dates=['date'])

In [3]:
important_dates = {
    'train_start_date': '2013-01-01',
    'train_end_date': '2017-08-15',
    'test_start_date': '2017-08-16',
    'test_end_date': '2017-08-31',
    'forest_start_date': '2016-06-01'
}

# Prepare Data

In [4]:
def add_store_details(main_df, train, test):
    df = main_df.copy()
    
    df['uniquestore'] = df.city.apply(lambda x: 0 if x in ['Quito', 'Guayaquil', 'Santo Domingo', 'Cuenca', 'Manta', 'Machala', 'Latacunga', 'Ambato'] else 1)
    df['newstore'] = df.store_nbr.apply(lambda x: 1 if x in [19, 20, 21, 28, 35, 41, 51, 52] else 0)
        
    df = pd.concat([train, test], axis=0).merge(df, on=['store_nbr'], how='left')
    df = df.rename(columns={'type' : 'store'})
    
    return df

In [5]:
final_df = add_store_details(stores, train, test)
final_df

Unnamed: 0,id,date,store_nbr,family,sales,onpromotion,city,state,store,cluster,uniquestore,newstore
0,0,2013-01-01,1,AUTOMOTIVE,0.0,0,Quito,Pichincha,D,13,0,0
1,1,2013-01-01,1,BABY CARE,0.0,0,Quito,Pichincha,D,13,0,0
2,2,2013-01-01,1,BEAUTY,0.0,0,Quito,Pichincha,D,13,0,0
3,3,2013-01-01,1,BEVERAGES,0.0,0,Quito,Pichincha,D,13,0,0
4,4,2013-01-01,1,BOOKS,0.0,0,Quito,Pichincha,D,13,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
3029395,3029395,2017-08-31,9,POULTRY,,1,Quito,Pichincha,B,6,0,0
3029396,3029396,2017-08-31,9,PREPARED FOODS,,0,Quito,Pichincha,B,6,0,0
3029397,3029397,2017-08-31,9,PRODUCE,,1,Quito,Pichincha,B,6,0,0
3029398,3029398,2017-08-31,9,SCHOOL AND OFFICE SUPPLIES,,9,Quito,Pichincha,B,6,0,0


In [6]:
def add_holiday_details(main_df):
    
    df = main_df.copy()
    
    df.loc[297, 'transferred'] = df.loc[297, 'transferred'] = False
    df = df.query("transferred!=True")
    
    df = df.drop(index=holidays_events[holidays_events[['date', 'locale_name']].duplicated()].index.values)

    df.loc[df.type=='Event', 'type'] = df.description.apply(lambda x: x[0:7])
     
    nat_df = df.query("locale=='National'")
    loc_df = df.query("locale=='Local'")
    reg_df = df.query("locale=='Regional'")
    
    df = final_df.merge(nat_df, left_on=['date'], right_on=['date'], how='left')
    df = df.merge(loc_df, left_on=['date', 'city'], right_on=['date', 'locale_name'], how='left')
    df = df.merge(reg_df, left_on=['date', 'state'], right_on=['date', 'locale_name'], how='left')
   
    df['firstday'] = df.description_x.apply(lambda x: 1 if x=='Primer dia del ano' else 0)

    df = df.drop(columns=['locale_x', 'locale_name_x', 'description_x', 'transferred_x',
                          'locale_y', 'locale_name_y', 'description_y', 'transferred_y',
                          'locale', 'locale_name', 'description', 'transferred'])
    df.loc[~df.type_x.isnull(), 'event_type'] = df.type_x.apply(lambda x: x)
    df.loc[~df.type_y.isnull(), 'event_type'] = df.type_y.apply(lambda x: x)
    df.loc[~df.type.isnull(), 'event_type'] = df.type.apply(lambda x: x)
    df.loc[df.event_type.isnull(), 'event_type'] = df.event_type.apply(lambda x: 'norm')
    df = df.drop(columns=['type_x', 'type_y', 'type'])

    df['isevent'] = df.event_type.apply(lambda x: 'y' if x!='norm' else 'n')

    df.loc[df.date.isin(['2017-04-16', '2016-03-27', '2015-04-05', '2014-04-20', '2013-03-31']), 'isevent'] = df.isevent.apply(lambda x: 'y')
    df.loc[df.date.isin(['2017-04-16', '2016-03-27', '2015-04-05', '2014-04-20', '2013-03-31']), 'event_type'] = df.event_type.apply(lambda x: 'Holiday')

    df['isclosed'] = df.groupby(by=['date', 'store_nbr'])['sales'].transform(lambda x: 1 if x.sum()==0 else 0)    
    df.loc[(df.date.dt.year==2017) & (df.date.dt.month==8) & (df.date.dt.day>=16) , 'isclosed'] = df.isclosed.apply(lambda x: 0)    
    df.loc[df.date.isin(['2017-01-01']), 'isevent'] = df.isevent.apply(lambda x: 'n')
  
    return df

In [7]:
final_df = add_holiday_details(holidays_events)
final_df

Unnamed: 0,id,date,store_nbr,family,sales,onpromotion,city,state,store,cluster,uniquestore,newstore,firstday,event_type,isevent,isclosed
0,0,2013-01-01,1,AUTOMOTIVE,0.0,0,Quito,Pichincha,D,13,0,0,1,Holiday,y,1
1,1,2013-01-01,1,BABY CARE,0.0,0,Quito,Pichincha,D,13,0,0,1,Holiday,y,1
2,2,2013-01-01,1,BEAUTY,0.0,0,Quito,Pichincha,D,13,0,0,1,Holiday,y,1
3,3,2013-01-01,1,BEVERAGES,0.0,0,Quito,Pichincha,D,13,0,0,1,Holiday,y,1
4,4,2013-01-01,1,BOOKS,0.0,0,Quito,Pichincha,D,13,0,0,1,Holiday,y,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3029395,3029395,2017-08-31,9,POULTRY,,1,Quito,Pichincha,B,6,0,0,0,norm,n,0
3029396,3029396,2017-08-31,9,PREPARED FOODS,,0,Quito,Pichincha,B,6,0,0,0,norm,n,0
3029397,3029397,2017-08-31,9,PRODUCE,,1,Quito,Pichincha,B,6,0,0,0,norm,n,0
3029398,3029398,2017-08-31,9,SCHOOL AND OFFICE SUPPLIES,,9,Quito,Pichincha,B,6,0,0,0,norm,n,0


In [8]:
print(f"Total null entries in oil: \n{oil.set_index('date').resample('D').mean().isnull().sum()}")

Total null entries in oil: 
dcoilwtico    529
dtype: int64


In [9]:
def add_oil_details(main_df):
    
    df = main_df.copy()
    
    df = df.set_index('date').resample("D").mean().interpolate(limit_direction='backward').reset_index()
    
    for i in [1, 2, 3, 4, 5, 6, 7, 10, 14, 21, 30, 60, 90]:
        df['lagoil_' + str(i) + '_dcoilwtico'] = df['dcoilwtico'].shift(i)
    
    df['oil_week_avg'] = df['dcoilwtico'].rolling(7).mean()
    df['oil_2weeks_avg'] = df['dcoilwtico'].rolling(14).mean()
    df['oil_month_avg'] = df['dcoilwtico'].rolling(30).mean()

    df.dropna(inplace = True)
    
    df = final_df.merge(df, on=['date'], how='left')
    
    return df
    

In [10]:
final_df = add_oil_details(oil)
final_df

Unnamed: 0,id,date,store_nbr,family,sales,onpromotion,city,state,store,cluster,...,lagoil_7_dcoilwtico,lagoil_10_dcoilwtico,lagoil_14_dcoilwtico,lagoil_21_dcoilwtico,lagoil_30_dcoilwtico,lagoil_60_dcoilwtico,lagoil_90_dcoilwtico,oil_week_avg,oil_2weeks_avg,oil_month_avg
0,0,2013-01-01,1,AUTOMOTIVE,0.0,0,Quito,Pichincha,D,13,...,,,,,,,,,,
1,1,2013-01-01,1,BABY CARE,0.0,0,Quito,Pichincha,D,13,...,,,,,,,,,,
2,2,2013-01-01,1,BEAUTY,0.0,0,Quito,Pichincha,D,13,...,,,,,,,,,,
3,3,2013-01-01,1,BEVERAGES,0.0,0,Quito,Pichincha,D,13,...,,,,,,,,,,
4,4,2013-01-01,1,BOOKS,0.0,0,Quito,Pichincha,D,13,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3029395,3029395,2017-08-31,9,POULTRY,,1,Quito,Pichincha,B,6,...,47.24,47.39,47.07,48.54,49.19,45.656,47.68,46.825714,47.362857,48.034333
3029396,3029396,2017-08-31,9,PREPARED FOODS,,0,Quito,Pichincha,B,6,...,47.24,47.39,47.07,48.54,49.19,45.656,47.68,46.825714,47.362857,48.034333
3029397,3029397,2017-08-31,9,PRODUCE,,1,Quito,Pichincha,B,6,...,47.24,47.39,47.07,48.54,49.19,45.656,47.68,46.825714,47.362857,48.034333
3029398,3029398,2017-08-31,9,SCHOOL AND OFFICE SUPPLIES,,9,Quito,Pichincha,B,6,...,47.24,47.39,47.07,48.54,49.19,45.656,47.68,46.825714,47.362857,48.034333


In [11]:
def add_transaction_details(main_df):
    
    df = main_df.copy()
    
    df = final_df.merge(df, on=['date', 'store_nbr'], how='left')
    
    df.loc[(df.transactions.isnull()) & (df.isclosed==1), 'transactions'] = df.transactions.apply(lambda x: 0)
    group_df = df.groupby(by=['store_nbr', 'date']).transactions.first().reset_index()
    group_df['avg_tra'] = group_df.transactions.rolling(15, min_periods=10).mean()
    group_df['16_tra'] = group_df.transactions.shift(16)
    group_df['21_tra'] = group_df.transactions.shift(21)
    group_df['30_tra'] = group_df.transactions.shift(30)
    group_df['60_tra'] = group_df.transactions.shift(60)
    group_df.drop(columns='transactions', inplace=True)
    df = df.merge(group_df, on=['date', 'store_nbr'], how='left')
    df.loc[(df.transactions.isnull()) & (df.isclosed==0), 'transactions'] = df.avg_tra
    df.drop(columns='avg_tra', inplace=True)
    df.loc[(df.date.dt.year==2017) & (df.date.dt.month==8) & (df.date.dt.day>=16) , 'transactions'] = df.transactions.apply(lambda x: None)    

    df['tot_store_day_onprom'] = df.groupby(by=['date', 'store_nbr']).onpromotion.transform(lambda x: x.sum())

    return df
    

In [12]:
final_df = add_transaction_details(transactions)
final_df

Unnamed: 0,id,date,store_nbr,family,sales,onpromotion,city,state,store,cluster,...,lagoil_90_dcoilwtico,oil_week_avg,oil_2weeks_avg,oil_month_avg,transactions,16_tra,21_tra,30_tra,60_tra,tot_store_day_onprom
0,0,2013-01-01,1,AUTOMOTIVE,0.0,0,Quito,Pichincha,D,13,...,,,,,0.0,,,,,0
1,1,2013-01-01,1,BABY CARE,0.0,0,Quito,Pichincha,D,13,...,,,,,0.0,,,,,0
2,2,2013-01-01,1,BEAUTY,0.0,0,Quito,Pichincha,D,13,...,,,,,0.0,,,,,0
3,3,2013-01-01,1,BEVERAGES,0.0,0,Quito,Pichincha,D,13,...,,,,,0.0,,,,,0
4,4,2013-01-01,1,BOOKS,0.0,0,Quito,Pichincha,D,13,...,,,,,0.0,,,,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3029395,3029395,2017-08-31,9,POULTRY,,1,Quito,Pichincha,B,6,...,47.68,46.825714,47.362857,48.034333,,2155.0,1923.0,2271.0,2352.0,223
3029396,3029396,2017-08-31,9,PREPARED FOODS,,0,Quito,Pichincha,B,6,...,47.68,46.825714,47.362857,48.034333,,2155.0,1923.0,2271.0,2352.0,223
3029397,3029397,2017-08-31,9,PRODUCE,,1,Quito,Pichincha,B,6,...,47.68,46.825714,47.362857,48.034333,,2155.0,1923.0,2271.0,2352.0,223
3029398,3029398,2017-08-31,9,SCHOOL AND OFFICE SUPPLIES,,9,Quito,Pichincha,B,6,...,47.68,46.825714,47.362857,48.034333,,2155.0,1923.0,2271.0,2352.0,223


In [13]:
def add_time_features (main_df):
    
    df = main_df.copy()
        
    df['year'] = df.index.year.astype('int')
    df['quarter'] = df.index.quarter.astype('int')
    df['month'] = df.index.month.astype('int')
    df['day'] = df.index.day.astype('int')
    df['dayofweek'] = df.index.day_of_week.astype('int')
    df['weekofyear'] = df.index.week.astype('int')
    df['isweekend'] = df.dayofweek.apply(lambda x: 1 if x in (5,6) else 0)
    df['startschool'] = df.month.apply(lambda x: 1 if x in (4,5,8,9) else 0)
    
    df['daysinmonth'] = df.index.days_in_month.astype('int')

    df = pd.get_dummies(df, columns=['year'], drop_first=True)
    df = pd.get_dummies(df, columns=['quarter'], drop_first=True)
    df = pd.get_dummies(df, columns=['dayofweek'], drop_first=True)
    df = pd.get_dummies(df, columns=['store'], drop_first=True)
    df = pd.get_dummies(df, columns=['event_type'], drop_first=True)
    df = pd.get_dummies(df, columns=['isevent'], drop_first=True)
    df = pd.get_dummies(df, columns=['state'], drop_first=True)

    # DeterministicProcess
    fourierA = CalendarFourier(freq='A', order=5)
    fourierM = CalendarFourier(freq='M', order=2)
    fourierW = CalendarFourier(freq='W', order=4)

    dp = DeterministicProcess(index=df.index,
                          order=1,
                          seasonal=False,
                          constant=False,
                          additional_terms=[fourierA, fourierM, fourierW],
                          drop=True)
    dp_df = dp.in_sample()
    df = pd.concat([df, dp_df], axis=1)
    
    df['outliers'] = df.sales.apply(lambda x: 1 if x>30000 else 0)
    
    df.drop(columns=['daysinmonth', 'month', 'city'], inplace=True)
    
    return df

In [14]:
final_df = final_df.set_index('date').loc[important_dates['forest_start_date']:,:]
final_df

Unnamed: 0_level_0,id,store_nbr,family,sales,onpromotion,city,state,store,cluster,uniquestore,...,lagoil_90_dcoilwtico,oil_week_avg,oil_2weeks_avg,oil_month_avg,transactions,16_tra,21_tra,30_tra,60_tra,tot_store_day_onprom
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-06-01,2216808,1,AUTOMOTIVE,3.0,0,Quito,Pichincha,D,13,0,...,34.56,49.174286,48.650000,47.024333,1898.0,1794.0,1828.0,1722.0,1295.0,363
2016-06-01,2216809,1,BABY CARE,0.0,0,Quito,Pichincha,D,13,0,...,34.56,49.174286,48.650000,47.024333,1898.0,1794.0,1828.0,1722.0,1295.0,363
2016-06-01,2216810,1,BEAUTY,4.0,0,Quito,Pichincha,D,13,0,...,34.56,49.174286,48.650000,47.024333,1898.0,1794.0,1828.0,1722.0,1295.0,363
2016-06-01,2216811,1,BEVERAGES,2199.0,37,Quito,Pichincha,D,13,0,...,34.56,49.174286,48.650000,47.024333,1898.0,1794.0,1828.0,1722.0,1295.0,363
2016-06-01,2216812,1,BOOKS,0.0,0,Quito,Pichincha,D,13,0,...,34.56,49.174286,48.650000,47.024333,1898.0,1794.0,1828.0,1722.0,1295.0,363
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-08-31,3029395,9,POULTRY,,1,Quito,Pichincha,B,6,0,...,47.68,46.825714,47.362857,48.034333,,2155.0,1923.0,2271.0,2352.0,223
2017-08-31,3029396,9,PREPARED FOODS,,0,Quito,Pichincha,B,6,0,...,47.68,46.825714,47.362857,48.034333,,2155.0,1923.0,2271.0,2352.0,223
2017-08-31,3029397,9,PRODUCE,,1,Quito,Pichincha,B,6,0,...,47.68,46.825714,47.362857,48.034333,,2155.0,1923.0,2271.0,2352.0,223
2017-08-31,3029398,9,SCHOOL AND OFFICE SUPPLIES,,9,Quito,Pichincha,B,6,0,...,47.68,46.825714,47.362857,48.034333,,2155.0,1923.0,2271.0,2352.0,223


In [15]:
df = add_time_features(final_df).loc[:important_dates['test_end_date'],:].reset_index().set_index(['store_nbr', 'family', 'date']).sort_index()
df['16_tra'] = df['16_tra'].fillna(0)
df['21_tra'] = df['21_tra'].fillna(0)
df['30_tra'] = df['30_tra'].fillna(0)
df['60_tra'] = df['60_tra'].fillna(0)
df

  # Remove the CWD from sys.path while we load stuff.


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,id,sales,onpromotion,cluster,uniquestore,newstore,firstday,isclosed,dcoilwtico,lagoil_1_dcoilwtico,...,"cos(1,freq=M)","sin(2,freq=M)","cos(2,freq=M)","sin(1,freq=W-SUN)","cos(1,freq=W-SUN)","sin(2,freq=W-SUN)","cos(2,freq=W-SUN)","sin(3,freq=W-SUN)","cos(3,freq=W-SUN)",outliers
store_nbr,family,date,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
1,AUTOMOTIVE,2016-06-01,2216808,3.0,0,13,0,0,0,0,49.070000,49.100000,...,1.000000,0.000000,1.000000,0.974928,-0.222521,-0.433884,-0.900969,-0.781831,0.623490,0
1,AUTOMOTIVE,2016-06-02,2218590,1.0,0,13,0,0,0,0,49.140000,49.070000,...,0.978148,0.406737,0.913545,0.433884,-0.900969,-0.781831,0.623490,0.974928,-0.222521,0
1,AUTOMOTIVE,2016-06-03,2220372,4.0,0,13,0,0,0,0,48.690000,49.140000,...,0.913545,0.743145,0.669131,-0.433884,-0.900969,0.781831,0.623490,-0.974928,-0.222521,0
1,AUTOMOTIVE,2016-06-04,2222154,9.0,0,13,0,0,0,0,49.030000,48.690000,...,0.809017,0.951057,0.309017,-0.974928,-0.222521,0.433884,-0.900969,0.781831,0.623490,0
1,AUTOMOTIVE,2016-06-05,2223936,2.0,0,13,0,0,0,0,49.370000,49.030000,...,0.669131,0.994522,-0.104528,-0.781831,0.623490,-0.974928,-0.222521,-0.433884,-0.900969,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54,SEAFOOD,2017-08-27,3022139,,0,3,1,0,0,0,46.816667,47.233333,...,0.528964,-0.897805,-0.440394,-0.781831,0.623490,-0.974928,-0.222521,-0.433884,-0.900969,0
54,SEAFOOD,2017-08-28,3023921,,0,3,1,0,0,0,46.400000,46.816667,...,0.688967,-0.998717,-0.050649,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,0
54,SEAFOOD,2017-08-29,3025703,,0,3,1,0,0,0,46.460000,46.400000,...,0.820763,-0.937752,0.347305,0.781831,0.623490,0.974928,-0.222521,0.433884,-0.900969,0
54,SEAFOOD,2017-08-30,3027485,,0,3,1,0,0,0,45.960000,46.460000,...,0.918958,-0.724793,0.688967,0.974928,-0.222521,-0.433884,-0.900969,-0.781831,0.623490,0


# Modeling

In [16]:
def train_val_split_function(df):
    train, val = train_test_split(df, test_size=0.1, shuffle=False)
    return [train.drop(columns=['sales']), val.drop(columns=['sales']), train['sales'], val['sales']]

def get_model(name):
    RF_param = {
        'criterion': 'squared_error',
        'bootstrap': 'False',
        'max_depth': 9733,
        'max_features': 'auto',
        'max_leaf_nodes': 4730,
        'n_estimators': 159,
        'min_samples_split': 3,
        'min_samples_leaf': 8
    }
    switcher = {
        "GB": GradientBoostingRegressor(n_estimators=20,min_samples_split=30, subsample=0.3),
        "RF": RandomForestRegressor(**RF_param, random_state=0),
        "Ridge": Ridge(alpha=0.5),
        "LR": LinearRegression(),
        "CB": CatBoostRegressor(),
        "XGB": XGBRegressor()
    }
    
    return switcher.get(name, switcher.get("RF"))

def get_predicitions(final_predictions):
    sample_submission = pd.read_csv('../input/store-sales-time-series-forecasting/sample_submission.csv')
    sample_submission['sales'] = sample_submission['id'].map(final_predictions)
    sample_submission['sales'] = np.exp(sample_submission['sales']) - 1
    
    return sample_submission
    

In [17]:
def train(pca=None):
    train_error = val_error = count = 0
    final_predictions = {}
    
    for i in tqdm(df.index.get_level_values(0).unique()):
        for j in df.index.get_level_values(1).unique():
            current_df = df.loc[(i, j)]
            test_id = current_df[current_df['sales'].isna()]['id']
            current_df = current_df.drop(columns=['id', 'transactions'])
            
            train = current_df[~current_df['sales'].isna()]
            X_test = current_df[current_df['sales'].isna()].drop(columns=['sales'])
            
            X_train, X_val, y_train, y_val = train_val_split_function(train)
            
            if pca:
                pca.fit(X_train)
                X_train = pca.transform(X_train)
                X_val = pca.transform(X_val)
                X_test = pca.transform(X_test)
            
            y_train = np.log1p(y_train)
            y_val = np.log1p(y_val)
            
            model = get_model("RF")
            model.fit(X_train, y_train)
            
            train_pred =  model.predict(X_train).clip(0.0)
            val_pred = model.predict(X_val).clip(0.0)
            
            train_error += msle(np.exp(y_train) - 1, (np.exp(train_pred) - 1).clip(0))
            val_error += msle(np.exp(y_val) - 1, (np.exp(val_pred) - 1).clip(0))
            count += 1
            
            test_preds = model.predict(X_test).clip(0.0)

            for q in range(test_preds.shape[0]):
                final_predictions[test_id[q]] = test_preds[q]
            
        print(f"Train Performance: {(train_error / count)**0.5}; Val Performance: {(val_error / count)**0.5}")
    
    return final_predictions
    

### Train without PCA

In [20]:
predictions = train()
predictions = get_predicitions(predictions)
predictions.to_csv('/kaggle/working/without_pca.csv',index=False)

### With PCA

In [21]:
predictions = train(pca=PCA(n_components=10))
predictions = get_predicitions(predictions)
predictions.to_csv('/kaggle/working/with_pca.csv',index=False)

  2%|▏         | 1/54 [00:12<11:23, 12.90s/it]

Train Performance: 0.376240411156621; Val Performance: 0.38727815021678813


  4%|▎         | 2/54 [00:25<11:09, 12.88s/it]

Train Performance: 0.3727576897575212; Val Performance: 0.37559824498430233


  6%|▌         | 3/54 [00:38<10:55, 12.85s/it]

Train Performance: 0.37124555352408073; Val Performance: 0.3660436352747978


  7%|▋         | 4/54 [00:51<10:44, 12.89s/it]

Train Performance: 0.3698769529214687; Val Performance: 0.3629608728637732


  9%|▉         | 5/54 [01:04<10:32, 12.92s/it]

Train Performance: 0.36777165090428204; Val Performance: 0.3767625829677819


 11%|█         | 6/54 [01:16<10:11, 12.74s/it]

Train Performance: 0.36529734417121607; Val Performance: 0.37112714499843846


 13%|█▎        | 7/54 [01:29<10:01, 12.80s/it]

Train Performance: 0.36535081218306176; Val Performance: 0.369401426455424


 15%|█▍        | 8/54 [01:42<09:47, 12.76s/it]

Train Performance: 0.36469875120520184; Val Performance: 0.36695131893667704


 17%|█▋        | 9/54 [01:55<09:33, 12.74s/it]

Train Performance: 0.36743129511964817; Val Performance: 0.3967151378461308


 19%|█▊        | 10/54 [02:07<09:17, 12.66s/it]

Train Performance: 0.36804774573627225; Val Performance: 0.4076434939288728


 20%|██        | 11/54 [02:21<09:14, 12.91s/it]

Train Performance: 0.37010031742507904; Val Performance: 0.41703225223800205


 22%|██▏       | 12/54 [02:33<08:58, 12.83s/it]

Train Performance: 0.3699927113545402; Val Performance: 0.4214766569714382


 24%|██▍       | 13/54 [02:46<08:39, 12.67s/it]

Train Performance: 0.37083677281951344; Val Performance: 0.42529306754368346


 26%|██▌       | 14/54 [02:58<08:22, 12.57s/it]

Train Performance: 0.37069689160880503; Val Performance: 0.42795809737796286


 28%|██▊       | 15/54 [03:11<08:10, 12.59s/it]

Train Performance: 0.37055637458004886; Val Performance: 0.43061179601275446


 30%|██▉       | 16/54 [03:23<07:59, 12.61s/it]

Train Performance: 0.3703234044337466; Val Performance: 0.4317384915607647


 31%|███▏      | 17/54 [03:36<07:47, 12.63s/it]

Train Performance: 0.3705785648192109; Val Performance: 0.4342987409426285


 33%|███▎      | 18/54 [03:48<07:26, 12.41s/it]

Train Performance: 0.37688916182471893; Val Performance: 0.45950296939431945


 35%|███▌      | 19/54 [04:01<07:18, 12.53s/it]

Train Performance: 0.3774416138697167; Val Performance: 0.463813959750844


 37%|███▋      | 20/54 [04:13<07:08, 12.60s/it]

Train Performance: 0.3791648098453993; Val Performance: 0.46903283287687864


 39%|███▉      | 21/54 [04:26<06:57, 12.65s/it]

Train Performance: 0.3798960739150335; Val Performance: 0.4692085016017785


 41%|████      | 22/54 [04:39<06:44, 12.64s/it]

Train Performance: 0.379351706041441; Val Performance: 0.47178263045079594


 43%|████▎     | 23/54 [04:52<06:34, 12.73s/it]

Train Performance: 0.37905551500278944; Val Performance: 0.4699844230404837


 44%|████▍     | 24/54 [05:05<06:24, 12.82s/it]

Train Performance: 0.37843995843036665; Val Performance: 0.4656946125679131


 46%|████▋     | 25/54 [05:17<06:07, 12.69s/it]

Train Performance: 0.3837363724553174; Val Performance: 0.4661117881740613


 48%|████▊     | 26/54 [05:30<05:58, 12.81s/it]

Train Performance: 0.3846199463271644; Val Performance: 0.46858705916237275


 50%|█████     | 27/54 [05:43<05:44, 12.75s/it]

Train Performance: 0.3842990533797562; Val Performance: 0.46664989481784086


 52%|█████▏    | 28/54 [05:55<05:29, 12.68s/it]

Train Performance: 0.38464865737988596; Val Performance: 0.4664865541989098


 54%|█████▎    | 29/54 [06:08<05:17, 12.69s/it]

Train Performance: 0.3846060218614478; Val Performance: 0.465395554706772


 56%|█████▌    | 30/54 [06:21<05:07, 12.82s/it]

Train Performance: 0.38447509120050466; Val Performance: 0.4654270577246808


 57%|█████▋    | 31/54 [06:34<04:54, 12.78s/it]

Train Performance: 0.3848453403165242; Val Performance: 0.4648393338723624


 59%|█████▉    | 32/54 [06:46<04:38, 12.66s/it]

Train Performance: 0.38490668156663244; Val Performance: 0.46535320463897917


 61%|██████    | 33/54 [06:59<04:24, 12.59s/it]

Train Performance: 0.38490100413359624; Val Performance: 0.4647495275102739


 63%|██████▎   | 34/54 [07:12<04:13, 12.69s/it]

Train Performance: 0.38495622330846996; Val Performance: 0.46538430155125243


 65%|██████▍   | 35/54 [07:24<03:58, 12.57s/it]

Train Performance: 0.38483056094453016; Val Performance: 0.4658030810494039


 67%|██████▋   | 36/54 [07:37<03:50, 12.79s/it]

Train Performance: 0.38490143162666385; Val Performance: 0.4666324861358871


 69%|██████▊   | 37/54 [07:50<03:38, 12.85s/it]

Train Performance: 0.384199383754952; Val Performance: 0.46421092068943554


 70%|███████   | 38/54 [08:03<03:25, 12.84s/it]

Train Performance: 0.3837152213471344; Val Performance: 0.4628254416003339


 72%|███████▏  | 39/54 [08:15<03:10, 12.73s/it]

Train Performance: 0.38403661656626514; Val Performance: 0.46240971846140905


 74%|███████▍  | 40/54 [08:28<02:57, 12.68s/it]

Train Performance: 0.38389321632305684; Val Performance: 0.4618335768566975


 76%|███████▌  | 41/54 [08:41<02:47, 12.85s/it]

Train Performance: 0.38368684171999656; Val Performance: 0.4607603519646194


 78%|███████▊  | 42/54 [08:54<02:32, 12.75s/it]

Train Performance: 0.383008346584074; Val Performance: 0.45872439766214773


 80%|███████▉  | 43/54 [09:06<02:19, 12.69s/it]

Train Performance: 0.38552424161838633; Val Performance: 0.4683666233988037


 81%|████████▏ | 44/54 [09:19<02:06, 12.70s/it]

Train Performance: 0.38534819542683124; Val Performance: 0.4687601789398267


 83%|████████▎ | 45/54 [09:32<01:54, 12.77s/it]

Train Performance: 0.3853987193044162; Val Performance: 0.4724054597925194


 85%|████████▌ | 46/54 [09:44<01:40, 12.60s/it]

Train Performance: 0.38525360116156765; Val Performance: 0.47331352526239284


 87%|████████▋ | 47/54 [09:56<01:27, 12.50s/it]

Train Performance: 0.3850471134778096; Val Performance: 0.4759061015781123


 89%|████████▉ | 48/54 [10:09<01:15, 12.54s/it]

Train Performance: 0.3850643624132934; Val Performance: 0.4775817856821148


 91%|█████████ | 49/54 [10:22<01:02, 12.53s/it]

Train Performance: 0.384849727448174; Val Performance: 0.47917739151813443


 93%|█████████▎| 50/54 [10:34<00:50, 12.54s/it]

Train Performance: 0.38473806112186026; Val Performance: 0.48143718755328463


 94%|█████████▍| 51/54 [10:47<00:37, 12.55s/it]

Train Performance: 0.3848750263761401; Val Performance: 0.48001309882423726


 96%|█████████▋| 52/54 [10:56<00:22, 11.45s/it]

Train Performance: 0.3818426204917517; Val Performance: 0.47873621864713195


 98%|█████████▊| 53/54 [11:08<00:11, 11.87s/it]

Train Performance: 0.3819006555183971; Val Performance: 0.47761591122253544


100%|██████████| 54/54 [11:21<00:00, 12.61s/it]

Train Performance: 0.3813910524803166; Val Performance: 0.47726729276150515





### With Extra features from: [artemchistyakov](https://www.kaggle.com/code/artemchistyakov/store-sales-eda-rf)

In [22]:
def tags_to_dict():
    tags = {
     'AUTOMOTIVE': [4, 7, 30, 10, 'family'],
     'BABY CARE':  [-8, 2, 25, 5, 'family'],
     'BEAUTY': [-8, 7, 25, 5, 'other'],
     'BEVERAGES': [0, 0, 40, 40, 'food'],
     'BOOKS': [0, 0, 55, 15, 'other'],
     'BREAD/BAKERY': [-3, 0, 30, 30, 'food'],
     'CELEBRATION': [-5, 5, 50, 20, 'family'],
     'CLEANING': [-8, 3, 40, 20, 'food'],
     'DAIRY': [-4, 0, 40, 40, 'food'],
     'DELI': [3, 6, 40, 20, 'food'],
     'EGGS': [-4, -5, 40, 20, 'food'],
     'FROZEN FOODS': [-4, -3, 40, 20, 'food'],
     'GROCERY I': [-4, 3, 40, 20, 'food'],
     'GROCERY II': [-4, 3, 40, 20, 'food'],
     'HARDWARE': [10, 10, 30, 20, 'other'],
     'HOME AND KITCHEN I': [-10, 4, 40, 20, 'family'],
     'HOME AND KITCHEN II': [-10, 4, 40, 20, 'family'],
     'HOME APPLIANCES': [0, 4, 40, 20, 'family'],
     'HOME CARE': [-10, 4, 40, 20, 'family'],
     'LADIESWEAR': [-10, 4, 40, 20, 'other'],
     'LAWN AND GARDEN': [-10, 4, 40, 20, 'family'],
     'LINGERIE': [-10, 4, 40, 2, 'other'],
     'LIQUOR,WINE,BEER': [4, 8, 40, 20, 'food'],
     'MAGAZINES': [-6, -7, 50, 20, 'other'],
     'MEATS': [-4, 5, 40, 20, 'food'],
     'PERSONAL CARE': [-5, 5, 40, 20, 'family'],
     'PET SUPPLIES': [-5, 0, 40, 20, 'family'],
     'PLAYERS AND ELECTRONICS': [5, 5, 25, 10, 'other'],
     'POULTRY': [-7, -4, 40, 20, 'food'],
     'PREPARED FOODS': [0, 6, 30, 10, 'food'],
     'PRODUCE': [0, 0, 40, 40, 'other'],
     'SCHOOL AND OFFICE SUPPLIES': [3, 3, 25, 15, 'family'],
     'SEAFOOD': [-5, 8, 40, 20, 'food']
    }
    
    sex_dict = {}
    luxury_dict = {}
    age_mean_dict = {}
    age_var_dict = {}
    type_dict = {}
    for i in tags.keys():
        sex_dict[i] = tags[i][0]
        luxury_dict[i] = tags[i][1]
        age_mean_dict[i] = tags[i][2]
        age_var_dict[i] = tags[i][3]
        type_dict[i] = tags[i][4]
    return [sex_dict, luxury_dict, age_mean_dict, age_var_dict, type_dict]

def get_oil_dict(oil):
    # estimate price of gaps (market don't work on weekends and holidays)
    price_estim = [-1] * (oil['days_from_2013'][oil.shape[0] - 1] + 1)
    price_estim[0] = 93.14
    for i in range(1, oil.shape[0]):
        price_estim[oil['days_from_2013'][i]] = oil['dcoilwtico'][i]

    for i in range (len(price_estim)):
        if price_estim[i] == -1 or math.isnan(price_estim[i]):
            tj = -1
            for j in range(i + 1, len(price_estim)):
                if price_estim[j] != -1 and (not math.isnan(price_estim[j])):
                    tj = j
                    break

            for j in range(i, tj):
                price_estim[j] = ((tj - j) * price_estim[i - 1] + (j - i) * price_estim[tj]) / (tj - i)

            i = tj

    oil_dict = dict(zip(np.arange(len(price_estim)), price_estim))
    return oil_dict


import math

def add_custom_features(df):
    # read
    train = pd.read_csv('../input/store-sales-time-series-forecasting/train.csv')
    oil = pd.read_csv('../input/store-sales-time-series-forecasting/oil.csv')
    trans = pd.read_csv('../input/store-sales-time-series-forecasting/transactions.csv')
    
    # add 'days_from_2013' for easy shifting
    df['days_from_2013'] = (pd.to_datetime(df.index.get_level_values(2)) - pd.to_datetime('2013-01-01')).days
    train['days_from_2013'] = (pd.to_datetime(train['date']) - pd.to_datetime('2013-01-01')).dt.days
    oil['days_from_2013'] = (pd.to_datetime(oil['date']) - pd.to_datetime('2013-01-01')).dt.days
    trans['days_from_2013'] = (pd.to_datetime(trans['date']) - pd.to_datetime('2013-01-01')).dt.days
    
    # groupby features
    gr_day = train.groupby('days_from_2013')['sales'].mean()
    gr_store = train.groupby('store_nbr')['sales'].mean()
    gr_family = train.groupby('family')['sales'].mean()

    days = [16, 18, 20, 21, 25, 28, 30, 35, 42, 60, 90, 120, 180, 365]
    for i in days:
        df['days_' + str(i)] = df['days_from_2013'] - i
        df['days_lagged' + str(i)] = df['days_' + str(i)].map(gr_day).fillna(0)
        df = df.drop(columns=['days_' + str(i)])

    df['store_gb'] = df.index.get_level_values(0).map(gr_store)
    df['family_gb'] = df.index.get_level_values(1).map(gr_family)
    
    oil_dict = get_oil_dict(oil)

    # lagged oil
    days = [0, 1, 2, 3, 4, 5, 6, 7, 10, 14, 21, 30, 60, 90, 120, 180, 360]
    for i in days:
        df['days_' + str(i)] = df['days_from_2013'] - i
        df['oil_lagged' + str(i)] = df['days_' + str(i)].map(oil_dict)
        df = df.drop(columns=['days_' + str(i)])
        
    # lagged transactions
    # # fill trans dict
    trans_dict = {}
    for ii in range(trans.shape[0]):
        i = trans.loc[ii]
        trans_dict[tuple([i['store_nbr'], i['days_from_2013']])] = i['transactions']
    
    def transaction_get_value(a, b):
        try:
            return trans_dict[tuple([a, (pd.to_datetime(b) - pd.to_datetime('2013-01-01').dt.days)])]
        except:
            return 0

    days = [16, 18, 20, 21, 25, 28, 30, 35, 42, 60, 90, 120, 180, 365]
    for i in days:
        df['days_' + str(i)] = df['days_from_2013'] - i
        df['oil_lagged' + str(i)] = df['days_' + str(i)].map(oil_dict)
        df['trans_lagged' + str(i)] = [transaction_get_value(*a) for a in tuple(zip(df.index.get_level_values(0),
                                                                        df.index.get_level_values(2)))]
        df = df.drop(columns=['days_' + str(i)])

    sex_dict, luxury_dict, age_mean_dict, age_var_dict, type_dict = tags_to_dict()
    df['tag_sex'] = df.index.get_level_values(1).map(sex_dict)
    df['tag_luxury'] = df.index.get_level_values(1).map(luxury_dict)
    df['tag_age_mean'] = df.index.get_level_values(1).map(age_mean_dict)
    df['tag_age_var'] = df.index.get_level_values(1).map(age_var_dict)
    df['tag_type'] = df.index.get_level_values(1).map(type_dict)
    df = pd.get_dummies(df, columns=['tag_type'])
    
    df['tag_age_min'] = df['tag_age_mean'] - df['tag_age_var']
    df['tag_age_max'] = df['tag_age_mean'] + df['tag_age_var']
    return df

def custom_split_function(main_df, train_start_date='2013-01-01', train_end_date='2017-08-30', val_start_date='2017-09-01', val_end_date='2020-01-01'):
    train_start_date = (pd.to_datetime(train_start_date) - pd.to_datetime('2013-01-01')).days
    train_end_date = (pd.to_datetime(train_end_date) - pd.to_datetime('2013-01-01')).days
    val_start_date = (pd.to_datetime(val_start_date) - pd.to_datetime('2013-01-01')).days
    val_end_date = (pd.to_datetime(val_end_date) - pd.to_datetime('2013-01-01')).days
    
    train = main_df[(main_df['days_from_2013'] >= train_start_date) & (main_df['days_from_2013'] <= train_end_date)]
    val = main_df[(main_df['days_from_2013'] >= val_start_date) & (main_df['days_from_2013'] <= val_end_date)]
    return [train.drop(columns=['sales']), val.drop(columns=['sales']), train['sales'], val['sales']]

def get_weights_distribution(tp, dates):
    if tp == 1:
        return np.ones(dates.shape)
    if tp == 2:
        return np.exp((400 - (pd.to_datetime('2017-08-16') - pd.to_datetime(dates)).days) / 100)
    if tp == 3:
        return np.exp((400 - (pd.to_datetime('2017-08-16') - pd.to_datetime(dates)).days) / 200)
    if tp == 4:
        return np.exp((400 - (pd.to_datetime('2017-08-16') - pd.to_datetime(dates)).days) / 300)
    if tp == 5:
        return np.exp((400 - (pd.to_datetime('2017-08-16') - pd.to_datetime(dates)).days) / 400)

In [23]:
df = add_custom_features(df)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,id,sales,onpromotion,cluster,uniquestore,newstore,firstday,isclosed,dcoilwtico,lagoil_1_dcoilwtico,...,trans_lagged365,tag_sex,tag_luxury,tag_age_mean,tag_age_var,tag_type_family,tag_type_food,tag_type_other,tag_age_min,tag_age_max
store_nbr,family,date,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
1,AUTOMOTIVE,2016-06-01,2216808,3.0,0,13,0,0,0,0,49.070000,49.100000,...,0,4,7,30,10,1,0,0,20,40
1,AUTOMOTIVE,2016-06-02,2218590,1.0,0,13,0,0,0,0,49.140000,49.070000,...,0,4,7,30,10,1,0,0,20,40
1,AUTOMOTIVE,2016-06-03,2220372,4.0,0,13,0,0,0,0,48.690000,49.140000,...,0,4,7,30,10,1,0,0,20,40
1,AUTOMOTIVE,2016-06-04,2222154,9.0,0,13,0,0,0,0,49.030000,48.690000,...,0,4,7,30,10,1,0,0,20,40
1,AUTOMOTIVE,2016-06-05,2223936,2.0,0,13,0,0,0,0,49.370000,49.030000,...,0,4,7,30,10,1,0,0,20,40
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54,SEAFOOD,2017-08-27,3022139,,0,3,1,0,0,0,46.816667,47.233333,...,0,-5,8,40,20,0,1,0,20,60
54,SEAFOOD,2017-08-28,3023921,,0,3,1,0,0,0,46.400000,46.816667,...,0,-5,8,40,20,0,1,0,20,60
54,SEAFOOD,2017-08-29,3025703,,0,3,1,0,0,0,46.460000,46.400000,...,0,-5,8,40,20,0,1,0,20,60
54,SEAFOOD,2017-08-30,3027485,,0,3,1,0,0,0,45.960000,46.460000,...,0,-5,8,40,20,0,1,0,20,60


In [24]:
def train_best():
    train_error = val_error = count = 0
    final_predictions = {}
    
    for i in tqdm(df.index.get_level_values(0).unique()):
        for j in df.index.get_level_values(1).unique():
            current_df = df.loc[(i, j)]
            test_id = current_df[current_df['sales'].isna()]['id']
            current_df = current_df.drop(columns=['id', 'transactions'])
            
            train = current_df[~current_df['sales'].isna()]
            X_test = current_df[current_df['sales'].isna()].drop(columns=['sales'])
            
            X_train, X_val, y_train, y_val = custom_split_function(train)
            
            y_train = np.log1p(y_train)
#             y_val = np.log1p(y_val)
            
            model = get_model("RF")
            weights = get_weights_distribution(5, X_train.index)
            model.fit(X_train, y_train, sample_weight=weights)
            
            train_pred =  model.predict(X_train).clip(0.0)
#             val_pred = model.predict(X_val).clip(0.0)
            
            train_error += msle(np.exp(y_train) - 1, (np.exp(train_pred) - 1).clip(0))
#             val_error += msle(np.exp(y_val) - 1, (np.exp(val_pred) - 1).clip(0))
            count += 1
            
            test_preds = model.predict(X_test).clip(0.0)

            for q in range(test_preds.shape[0]):
                final_predictions[test_id[q]] = test_preds[q]
            
        print(f"Train Performance: {(train_error / count)**0.5}; Val Performance: {(val_error / count)**0.5}")
    
    return final_predictions
    

# Submission

In [25]:
final_preds = train_best()
best_submission = get_predicitions(final_preds)
best_submission.to_csv('/kaggle/working/best.csv', index=False)
best_submission

  2%|▏         | 1/54 [00:52<46:15, 52.36s/it]

Train Performance: 0.3189048065448388; Val Performance: 0.0


  4%|▎         | 2/54 [01:42<44:06, 50.89s/it]

Train Performance: 0.32270911747488845; Val Performance: 0.0


  6%|▌         | 3/54 [02:33<43:18, 50.96s/it]

Train Performance: 0.32587432415877615; Val Performance: 0.0


  7%|▋         | 4/54 [03:22<41:57, 50.35s/it]

Train Performance: 0.32541703016140344; Val Performance: 0.0


  9%|▉         | 5/54 [04:14<41:30, 50.84s/it]

Train Performance: 0.32413841653047387; Val Performance: 0.0


 11%|█         | 6/54 [05:02<39:49, 49.78s/it]

Train Performance: 0.32228257287324424; Val Performance: 0.0


 13%|█▎        | 7/54 [05:54<39:40, 50.64s/it]

Train Performance: 0.322721906623123; Val Performance: 0.0


 15%|█▍        | 8/54 [06:45<38:59, 50.85s/it]

Train Performance: 0.3221419270720787; Val Performance: 0.0


 17%|█▋        | 9/54 [07:35<37:49, 50.43s/it]

Train Performance: 0.3247957352457052; Val Performance: 0.0


 19%|█▊        | 10/54 [08:23<36:21, 49.58s/it]

Train Performance: 0.3256968499103267; Val Performance: 0.0


 20%|██        | 11/54 [09:13<35:41, 49.81s/it]

Train Performance: 0.3277577849553817; Val Performance: 0.0


 22%|██▏       | 12/54 [10:01<34:35, 49.41s/it]

Train Performance: 0.3277195396307285; Val Performance: 0.0


 24%|██▍       | 13/54 [10:50<33:40, 49.28s/it]

Train Performance: 0.3285640788878606; Val Performance: 0.0


 26%|██▌       | 14/54 [11:40<33:00, 49.50s/it]

Train Performance: 0.32838693545369296; Val Performance: 0.0


 28%|██▊       | 15/54 [12:31<32:24, 49.86s/it]

Train Performance: 0.3285735191677737; Val Performance: 0.0


 30%|██▉       | 16/54 [13:20<31:23, 49.57s/it]

Train Performance: 0.32844356774144534; Val Performance: 0.0


 31%|███▏      | 17/54 [14:08<30:19, 49.18s/it]

Train Performance: 0.3287325891609395; Val Performance: 0.0


 33%|███▎      | 18/54 [14:51<28:20, 47.24s/it]

Train Performance: 0.32508939937031267; Val Performance: 0.0


 35%|███▌      | 19/54 [15:39<27:45, 47.58s/it]

Train Performance: 0.32596654647759055; Val Performance: 0.0


 37%|███▋      | 20/54 [16:28<27:12, 48.03s/it]

Train Performance: 0.3277518979810962; Val Performance: 0.0


 39%|███▉      | 21/54 [17:18<26:43, 48.60s/it]

Train Performance: 0.32908450409576434; Val Performance: 0.0


 41%|████      | 22/54 [18:07<25:58, 48.69s/it]

Train Performance: 0.32904695362432945; Val Performance: 0.0


 43%|████▎     | 23/54 [18:58<25:28, 49.30s/it]

Train Performance: 0.3289571669863882; Val Performance: 0.0


 44%|████▍     | 24/54 [19:51<25:09, 50.31s/it]

Train Performance: 0.3287731164982859; Val Performance: 0.0


 46%|████▋     | 25/54 [20:39<24:00, 49.66s/it]

Train Performance: 0.32770551397643255; Val Performance: 0.0


 48%|████▊     | 26/54 [21:29<23:13, 49.75s/it]

Train Performance: 0.3288127417121962; Val Performance: 0.0


 50%|█████     | 27/54 [22:19<22:28, 49.95s/it]

Train Performance: 0.3290098510719294; Val Performance: 0.0


 52%|█████▏    | 28/54 [23:06<21:16, 49.10s/it]

Train Performance: 0.3298027904319436; Val Performance: 0.0


 54%|█████▎    | 29/54 [23:54<20:21, 48.84s/it]

Train Performance: 0.3302335468452914; Val Performance: 0.0


 56%|█████▌    | 30/54 [24:47<20:00, 50.04s/it]

Train Performance: 0.3304486665101373; Val Performance: 0.0


 57%|█████▋    | 31/54 [25:38<19:17, 50.34s/it]

Train Performance: 0.3312559267213033; Val Performance: 0.0


 59%|█████▉    | 32/54 [26:26<18:12, 49.66s/it]

Train Performance: 0.33140035849226585; Val Performance: 0.0


 61%|██████    | 33/54 [27:13<17:04, 48.77s/it]

Train Performance: 0.3316519662883977; Val Performance: 0.0


 63%|██████▎   | 34/54 [28:04<16:30, 49.50s/it]

Train Performance: 0.3320569488984651; Val Performance: 0.0


 65%|██████▍   | 35/54 [28:52<15:28, 48.89s/it]

Train Performance: 0.3319020102809194; Val Performance: 0.0


 67%|██████▋   | 36/54 [29:41<14:43, 49.09s/it]

Train Performance: 0.3323325520083817; Val Performance: 0.0


 69%|██████▊   | 37/54 [30:33<14:09, 49.98s/it]

Train Performance: 0.33202264658100783; Val Performance: 0.0


 70%|███████   | 38/54 [31:22<13:14, 49.63s/it]

Train Performance: 0.3319267416354189; Val Performance: 0.0


 72%|███████▏  | 39/54 [32:12<12:25, 49.73s/it]

Train Performance: 0.3324666075181893; Val Performance: 0.0


 74%|███████▍  | 40/54 [33:02<11:35, 49.68s/it]

Train Performance: 0.3326447500900016; Val Performance: 0.0


 76%|███████▌  | 41/54 [33:51<10:44, 49.58s/it]

Train Performance: 0.3326717227378043; Val Performance: 0.0


 78%|███████▊  | 42/54 [34:40<09:51, 49.32s/it]

Train Performance: 0.3323790258352197; Val Performance: 0.0


 80%|███████▉  | 43/54 [35:27<08:56, 48.74s/it]

Train Performance: 0.3349212700748831; Val Performance: 0.0


 81%|████████▏ | 44/54 [36:16<08:06, 48.70s/it]

Train Performance: 0.3350133161101088; Val Performance: 0.0


 83%|████████▎ | 45/54 [37:04<07:18, 48.71s/it]

Train Performance: 0.3352754415200568; Val Performance: 0.0


 85%|████████▌ | 46/54 [37:54<06:31, 48.90s/it]

Train Performance: 0.33531411587050325; Val Performance: 0.0


 87%|████████▋ | 47/54 [38:42<05:41, 48.82s/it]

Train Performance: 0.33528527187668083; Val Performance: 0.0


 89%|████████▉ | 48/54 [39:31<04:52, 48.67s/it]

Train Performance: 0.3353447003654709; Val Performance: 0.0


 91%|█████████ | 49/54 [40:19<04:02, 48.59s/it]

Train Performance: 0.3353677087004583; Val Performance: 0.0


 93%|█████████▎| 50/54 [41:08<03:14, 48.58s/it]

Train Performance: 0.33537618617192366; Val Performance: 0.0


 94%|█████████▍| 51/54 [41:57<02:26, 48.79s/it]

Train Performance: 0.33563648478750524; Val Performance: 0.0


 96%|█████████▋| 52/54 [42:18<01:21, 40.52s/it]

Train Performance: 0.33304336992191624; Val Performance: 0.0


 98%|█████████▊| 53/54 [43:08<00:43, 43.27s/it]

Train Performance: 0.33321852505913435; Val Performance: 0.0


100%|██████████| 54/54 [43:55<00:00, 48.80s/it]

Train Performance: 0.3329999611641711; Val Performance: 0.0





Unnamed: 0,id,sales
0,3000888,3.218919
1,3000889,0.000000
2,3000890,4.031745
3,3000891,2420.409059
4,3000892,0.115015
...,...,...
28507,3029395,340.372360
28508,3029396,113.848349
28509,3029397,1272.751190
28510,3029398,116.628746
