# Libraries

In [None]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns

from sklearn.linear_model import LinearRegression,HuberRegressor,Ridge,TweedieRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler

import dateutil.easter as easter

import datetime
import optuna
import math

In [None]:
#Holidays
HOLIDAYS = False     
NEXT_HOLIDAY = False  

SEASONS = True 
WEATHER = True 

LAG_FEATURES = True

POST_PROCESSING = False
MODEL_TYPE = "Ridge Regression"

VAL_SPLIT = "2017-12-31" #"2018-05-31"

In [None]:
EPOCHS = 10000    
EARLY_STOPPING = 30
DEVICE = "cpu"

SCALER_NAME = "MinMaxScaler"  #None MinMax Standard
SCALER = MinMaxScaler()  #MinMaxScaler StandardScaler

obj is the objective function of the algorithm, i.e. what it's trying to maximize or minimize, e.g. "regression" means it's minimizing squared residuals.

Metric and eval are essentially the same. They are used for Early stopping 

# Load Data

In [None]:
train_df = pd.read_csv("../input/tabular-playground-series-jan-2022/train.csv",index_col = 0)
test_df = pd.read_csv("../input/tabular-playground-series-jan-2022/test.csv",index_col = 0)
sub = pd.read_csv("../input/tabular-playground-series-jan-2022/sample_submission.csv",index_col = 0)
gdp_df = pd.read_csv('../input/gdp-20152019-finland-norway-and-sweden/GDP_data_2015_to_2019_Finland_Norway_Sweden.csv')
gdp_df.set_index('year', inplace=True)

if HOLIDAYS:
    holidays = pd.read_csv("../input/holidays-finland-norway-sweden-20152019/Holidays_Finland_Norway_Sweden_2015-2019.csv",usecols = ["Date","Country","Name"]                      )
    holidays.rename(columns = {"Date":"date","Country":"country","Name":"holiday"},inplace= True)
    holidays["holiday"]= 1
    holidays["holiday"]= holidays["holiday"].astype("int32")
    holidays["date"] = pd.to_datetime(holidays["date"])

In [None]:
df_weather = pd.read_csv('../input/finland-norway-and-sweden-weather-data-20152019/nordics_weather.csv', parse_dates=['date'])

In [None]:
#Make date
train_df["date"] = pd.to_datetime(train_df["date"])
test_df["date"] = pd.to_datetime(test_df["date"])

In [None]:
train_df.head()

# Functions 

Thanks to [ambrosm](https://www.kaggle.com/anirudhg15) \
For this amazing feature engineering
https://www.kaggle.com/ambrosm/tpsjan22-03-linear-model/notebook#More-feature-engineering-(advanced-model)

In [None]:
# Feature engineering
def engineer(df):
    """Return a new dataframe with the engineered features"""
    
    def get_gdp(row):
        country = 'GDP_' + row.country
        return gdp_df.loc[row.date.year, country]
      
    
    new_df = pd.DataFrame({'gdp': np.log(df.apply(get_gdp, axis=1)),
                           'wd4': df.date.dt.weekday == 4, # Friday
                           'wd56': df.date.dt.weekday >= 5, # Saturday and Sunday
                          })

    #new_df['daysinmonth'] = df['date'].dt.days_in_month         
    
    # One-hot encoding (no need to encode the last categories)
    for country in ['Finland', 'Norway']:
        new_df[country] = df.country == country
    new_df['KaggleRama'] = df.store == 'KaggleRama'
    for product in ['Kaggle Mug', 'Kaggle Hat']:
        new_df[product] = df['product'] == product
        
    # Seasonal variations (Fourier series)
    # The three products have different seasonal patterns
    
    dayofyear = df.date.dt.dayofyear
    for k in range(1, 3):
        new_df[f'sin{k}'] = np.sin(dayofyear / 365 * 2 * math.pi * k)
        new_df[f'cos{k}'] = np.cos(dayofyear / 365 * 2 * math.pi * k)
        new_df[f'mug_sin{k}'] = new_df[f'sin{k}'] * new_df['Kaggle Mug']
        new_df[f'mug_cos{k}'] = new_df[f'cos{k}'] * new_df['Kaggle Mug']
        new_df[f'hat_sin{k}'] = new_df[f'sin{k}'] * new_df['Kaggle Hat']
        new_df[f'hat_cos{k}'] = new_df[f'cos{k}'] * new_df['Kaggle Hat']

    return new_df
#train = engineer(train_df)

In [None]:
# Feature engineering for holidays
def engineer_more(df):
    """Return a new dataframe with more engineered features"""
    new_df = engineer(df)

    # End of year
    new_df = pd.concat([new_df,
                        pd.DataFrame({f"dec{d}":
                                      (df.date.dt.month == 12) & (df.date.dt.day == d)
                                      for d in range(24, 32)}),
                        pd.DataFrame({f"n-dec{d}":
                                      (df.date.dt.month == 12) & (df.date.dt.day == d) & (df.country == 'Norway')
                                      for d in range(24, 32)}),
                        pd.DataFrame({f"f-jan{d}":
                                      (df.date.dt.month == 1) & (df.date.dt.day == d) & (df.country == 'Finland')
                                      for d in range(1, 14)}),
                        pd.DataFrame({f"jan{d}":
                                      (df.date.dt.month == 1) & (df.date.dt.day == d) & (df.country == 'Norway')
                                      for d in range(1, 10)}),
                        pd.DataFrame({f"s-jan{d}":
                                      (df.date.dt.month == 1) & (df.date.dt.day == d) & (df.country == 'Sweden')
                                      for d in range(1, 15)})],
                       axis=1)
    
    # May
    new_df = pd.concat([new_df,
                        pd.DataFrame({f"may{d}":
                                      (df.date.dt.month == 5) & (df.date.dt.day == d) 
                                      for d in list(range(1, 10))}), #  + list(range(17, 25))
                        pd.DataFrame({f"may{d}":
                                      (df.date.dt.month == 5) & (df.date.dt.day == d) & (df.country == 'Norway')
                                      for d in list(range(19, 26))})],
                       axis=1)
    
    # June and July
    new_df = pd.concat([new_df,
                        pd.DataFrame({f"june{d}":
                                      (df.date.dt.month == 6) & (df.date.dt.day == d) & (df.country == 'Sweden')
                                      for d in list(range(8, 14))}),
                        #pd.DataFrame({f"june{d}":
                        #              (df.date.dt.month == 6) & (df.date.dt.day == d) & (df.country == 'Norway')
                        #              for d in list(range(22, 31))}),
                        #pd.DataFrame({f"july{d}":
                        #              (df.date.dt.month == 7) & (df.date.dt.day == d) & (df.country == 'Norway')
                        #              for d in list(range(1, 3))})],
                       ],
                       axis=1)
    
    # Last Wednesday of June
    wed_june_date = df.date.dt.year.map({2015: pd.Timestamp(('2015-06-24')),
                                         2016: pd.Timestamp(('2016-06-29')),
                                         2017: pd.Timestamp(('2017-06-28')),
                                         2018: pd.Timestamp(('2018-06-27')),
                                         2019: pd.Timestamp(('2019-06-26'))})
    new_df = pd.concat([new_df,
                        pd.DataFrame({f"wed_june{d}": 
                                      (df.date - wed_june_date == np.timedelta64(d, "D")) & (df.country != 'Norway')
                                      for d in list(range(-4, 6))})],
                       axis=1)
    
    # First Sunday of November
    sun_nov_date = df.date.dt.year.map({2015: pd.Timestamp(('2015-11-1')),
                                         2016: pd.Timestamp(('2016-11-6')),
                                         2017: pd.Timestamp(('2017-11-5')),
                                         2018: pd.Timestamp(('2018-11-4')),
                                         2019: pd.Timestamp(('2019-11-3'))})
    new_df = pd.concat([new_df,
                        pd.DataFrame({f"sun_nov{d}": 
                                      (df.date - sun_nov_date == np.timedelta64(d, "D")) & (df.country != 'Norway')
                                      for d in list(range(0, 9))})],
                       axis=1)
    
    # First half of December (Independence Day of Finland, 6th of December)
    new_df = pd.concat([new_df,
                        pd.DataFrame({f"dec{d}":
                                      (df.date.dt.month == 12) & (df.date.dt.day == d) & (df.country == 'Finland')
                                      for d in list(range(6, 14))})],
                       axis=1)

    # Easter
    easter_date = df.date.apply(lambda date: pd.Timestamp(easter.easter(date.year)))
    new_df = pd.concat([new_df,
                        pd.DataFrame({f"easter{d}": 
                                      (df.date - easter_date == np.timedelta64(d, "D"))
                                      for d in list(range(-2, 11)) + list(range(40, 48)) + list(range(50, 59))})],
                       axis=1)
    
    #new_df = pd.get_dummies(new_df)

    return new_df.astype(np.float64)

train = engineer_more(train_df)

train['num_sold'] = train_df.num_sold.astype(np.float32)
test = engineer_more(test_df)

#features = list(test.columns)
#print(features)

test['date'] = test_df.date
train['date'] = train_df.date

In [None]:
train[["store","product","country"]]= train_df[["store","product","country"]]
test[["store","product","country"]]= test_df[["store","product","country"]]

In [None]:
if SEASONS:
    
    print("Adding Seasons ")
    seasons = [1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 1]

    month_to_season = dict(zip(range(1,13), seasons))

    train["season"] = train["date"].dt.month.map(month_to_season)
    test["season"] = test["date"].dt.month.map(month_to_season)

In [None]:
if WEATHER:
    w_feats = ['country', 'date', 'tavg','precipitation']
    #w_feats = ['country', 'date', 'precipitation', 'snow_depth', 'tavg', 'tmax','tmin']
    print("Adding weather")
    train = train.merge(df_weather[w_feats], on=['date', 'country'], how='left')
    train.index = train_df.index 
    test = test.merge(df_weather[w_feats], on=['date', 'country'], how='left')
    test.index = test_df.index 

In [None]:
train.head()

In [None]:
def public_hols(df):
    df = pd.merge(df, holidays, how='left', on=['date', 'country'])
    df.fillna(value = 0,inplace=True)
    return df

In [None]:
if HOLIDAYS:
    train = public_hols(train)
    test = public_hols(test)
    test.index = test_df.index 

In [None]:
def next_holiday(x):
    i=1
    while sum(holidays["date"] == pd.Timestamp(x) + pd.DateOffset(days=i)) ==0:
        i+=1
        if i >200:
            i=0
            break
            break
    return i

if NEXT_HOLIDAY:
    holidays["date"] = pd.to_datetime(holidays["date"])
    train["to_holiday"] = train["date"].apply(lambda x : next_holiday(x))
    test["to_holiday"] = test["date"].apply(lambda x : next_holiday(x))

In [None]:
def SMAPE(y_true, y_pred):
    denominator = (y_true + np.abs(y_pred)) / 200.0
    diff = np.abs(y_true - y_pred) / denominator
    diff[denominator == 0] = 0.0
    return np.mean(diff)

# Lag Features 

In [None]:
def create_lag(DAYS,df):
    df[f"shift{DAYS}"] = df.groupby(["store","product","country"])["num_sold"].shift(DAYS,fill_value = 0)
    return df

In [None]:
def rolling_mean_std(roll_window, df):
    shift_days=0
    col_name = 'rolling_'+str(shift_days)+'_'+str(roll_window)
    df[col_name+"_mean"] = df.groupby(["store","product","country"])["num_sold"].shift(shift_days).rolling(roll_window).mean()
    df[col_name+"_std"] = df.groupby(["store","product","country"])["num_sold"].shift(shift_days).rolling(roll_window).std()
    
    return df.fillna(0,inplace = True)

In [None]:
def day_roll(df,day_shift,roll_window):
    #Shift values and rolling mean
    for day in days_shift:
        create_lag(day,df)
    for window in roll_window:
        pass
        #rolling_mean_std(window,df)

    return df

# Shift Days

In [None]:
'''days_shift = [1,7,14, 30]
roll_window = [7,14,30]'''
days_shift = [i for i in range(1,30)]
roll_window = [7]

In [None]:
'''def roll_lag_run(df):
        df = day_roll(df,days_shift,roll_window)
    return df '''

if LAG_FEATURES:
    print("Running Lag features")
    train = day_roll(train,days_shift,roll_window)

## Set Features

In [None]:
features_base= list(test.columns)
features= list(train.columns)


for feat in [features_base, features]:
    feat.remove("store")
    feat.remove("product")
    feat.remove("country")
    feat.remove("date")

features.remove("num_sold")

print(features_base)
print()
print(features)

# Split and Scale

In [None]:
X = train[features_base]
y= train["num_sold"]

X_train = train[train["date"]<=VAL_SPLIT][features]
X_test = train[train["date"]>VAL_SPLIT][features]

y_train = train[train["date"]<=VAL_SPLIT]["num_sold"]
y_test = train[train["date"]>VAL_SPLIT]["num_sold"]

In [None]:
def scale_data(X_train, X_test= None, test=None,):
     
    scaler= SCALER
    
    #this can be X or X_train 
    X_train_s = scaler.fit_transform(X_train)

    if X_test is None: #full train 
        test_s = scaler.transform(test)
        return X_train_s, test_s
    
    else: # validation 
        X_test_s = scaler.transform(X_test)
    
    return   X_train_s , X_test_s

# Run model

In [None]:
def fit_model(X,y,test = None, X_test = None,y_test= None):
    
    model = Ridge(max_iter=EPOCHS)

    
    if X_test is not None: #validation prediction 
        X_train_s , X_test_s = scale_data(X, X_test)
        model.fit(X_train_s,np.log1p(y))
        preds = np.expm1(model.predict(X_test_s))
        
        smape = SMAPE(y_test,preds)
        
        return preds, model, smape
        
    else:
        X_s, test_s = scale_data(X, test)
        
        model.fit(X_s,np.log1p(y))
        preds = np.expm1(model.predict(test_s))
        
        return preds, model

In [None]:
val_predictions , model ,smape = fit_model(X= X_train,y = y_train,test= None, X_test = X_test,y_test = y_test)

In [None]:
print("SMAPE :",smape )
print(f"\n EPOCHS: {EPOCHS}")
print(f"\n SCALER: {SCALER_NAME}")
print(f"\n POST_PROCESSING: {POST_PROCESSING}")

In [None]:
residuals = y_test - val_predictions
plt.figure(figsize = (20,7))
plt.scatter(y_test,val_predictions)
plt.title("Residual Analysis")
plt.show()

## Run on Full training data 

In [None]:
# fit on full dataset
onesplit_preds , model = fit_model(X,y,test[features_base])

In [None]:
sub_base  = sub.copy(deep = True)
sub_base_full  = sub.copy(deep = True)
sub_base["num_sold"] =  val_predictions
sub_base_full["num_sold"] =  onesplit_preds

In [None]:
sub_base_full

# Multi - Step Recursive 
We will loop through a time period (days), predict the data and append to the training data for re-training \
This will continue till the end of test 

In [None]:
FREQUENCY = 1 #prediction period

start_date = min(test["date"]) 
end_date = max(test["date"])

In [None]:
def multi_step_recursive(start_date, end_date, freq, sub, train_i, test_i):
    delta = pd.DateOffset(days = freq)

    all_df = pd.concat([train_i.assign(ds="a"),test_i.assign(ds="b")],axis =0)
    
    #Shift values and rolling 
    if LAG_FEATURES:
        all_df = day_roll(all_df,days_shift,roll_window)

    while start_date <= end_date:

        #Select slice to predict
        test_split = all_df [  (all_df["date"]>= start_date ) & (all_df["date"]< start_date+delta) ][features]
        
        X = all_df[ all_df["date"]< start_date][features]
        y = all_df[ all_df["date"]< start_date]["num_sold"]
        
        #predict 1 timeframe - full data
        one_period_preds , model = fit_model(X,y,test_split[features])

        #Add prediction test data to X and preds to y
        test_split["num_sold"] = one_period_preds
        all_df.loc[test_split.index, "num_sold"]  = test_split["num_sold"]

        sub.loc[test_split.index , "num_sold"] = one_period_preds
        
        #Shift values and rolling 
        if LAG_FEATURES:
            all_df = day_roll(all_df,days_shift,roll_window)

        #update start date
        start_date += delta
    
    #val prediction
    X_train = train_i[train_i["date"]<=VAL_SPLIT][features]
    X_test = train_i[train_i["date"]>VAL_SPLIT][features]
    y_train = train_i[train_i["date"]<=VAL_SPLIT]["num_sold"]
    y_test = train_i[train_i["date"]>VAL_SPLIT]["num_sold"]

    val_predictions , model ,smape = fit_model(X= X_train,y = y_train,test= None, X_test = X_test,y_test = y_test)
    print("SMAPE:",smape)
    
    return sub ,smape

In [None]:
sub_recursive , smape = multi_step_recursive(start_date, end_date, FREQUENCY, sub.copy(deep=True), train, test)

In [None]:
sub_recursive

# Split on Store, Product , Country

In [None]:
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'

In [None]:
def split_models(split_on, sub_df, train ,test):    

    split_smape=0
    
    # split training on product/ store/ country
    for split in train[split_on].unique():
        print(f"\nPredicting for {split_on} {split}")

        train_split= train[train[split_on] ==split]
        test_split =test[test[split_on] ==split]
        
        #train on Full dataset
        final_predictions , smape = multi_step_recursive(start_date, end_date, FREQUENCY, sub.copy(deep=True), train_split, test_split)
        split_smape += smape/train[split_on].nunique()
        
        sub_df.loc[test_split.index,"num_sold"] = final_predictions["num_sold"]

    print(f"\n Final mean smape:",split_smape)
    
    return split_smape, sub_df, model

In [None]:
store_smape, sub_store, model = split_models("store", sub.copy(deep=True), train ,test)
#sub_store

In [None]:
product_smape, sub_product, model = split_models("product", sub.copy(deep=True), train ,test)
#sub_product

In [None]:
country_smape, sub_country,model = split_models("country", sub.copy(deep=True), train ,test)
#sub_country

**Notes:**

* best----- Kaggle Hat (product)  ,   Norway (Country) , Sweden (country)
* good----- all store splits 
* bad------ Finland (Country)-worst   -- Kaggle Sticker(product) --KaggleMug(product) 

#### summary
* Country split is good  but Finland is bad \
* Try focus on Finland and see where stickers fits in 

# All Split 

In [None]:
import itertools
all_splits = list(itertools.product(['KaggleMart', 'KaggleRama'],['Kaggle Mug', 'Kaggle Hat', 'Kaggle Sticker'],['Finland', 'Norway', 'Sweden']))

In [None]:
def split_models_ALL(split_on, sub_df):    

    split_smape=0
    split_dict = {}

    # split training on product/ store/ country
    for idx ,split in enumerate(split_on):
        print(f"\nPredicting for store: {split[0]}, product: {split[1]}, country: {split[2]} ")

        train_split= train[ (train["store"] == split[0]) & (train["product"] == split[1]) & (train["country"] == split[2])]
        test_split =test[ (test["store"] == split[0]) & (test["product"] == split[1]) & (test["country"] == split[2])]

        X_train = train_split[train_split["date"]<=VAL_SPLIT][features]
        X_test = train_split[train_split["date"]>VAL_SPLIT][features]
        y_train= train_split[train_split["date"]<=VAL_SPLIT]["num_sold"]
        y_test= train_split[train_split["date"]>VAL_SPLIT]["num_sold"]

        #run model for each split type
        val_predictions , model ,smape = fit_model(X_train,y_train,test_split[features], X_test,y_test)

        split_smape += smape/len(all_splits)
        split_dict[split] = smape

        #train on Full dataset
        #final_predictions , model = fit_model(train_split[features]  ,train_split["num_sold"]  ,  test_split[features])
        final_predictions , smape = multi_step_recursive(start_date, end_date, FREQUENCY, sub.copy(deep=True), train_split, test_split)
        sub_df.loc[test_split.index,"num_sold"] = final_predictions["num_sold"]
                

    print(f"\n final all_split smape:",split_smape)
    
    return split_smape, sub_df , split_dict

In [None]:
smape_all, sub_all, split_dict = split_models_ALL(all_splits, sub.copy(deep=True))

In [None]:
split_dict

In [None]:
sub_all

# Post Processing & Submission 

Targeted rounding 
https://www.kaggle.com/c/petfinder-pawpularity-score/discussion/300992

In [None]:
if POST_PROCESSING: 
    dec = sub_recursive % 1
    to_round = (dec<=0.2)|(dec>=0.8)
    sub_recursive[to_round] = np.round(sub_recursive[to_round])

In [None]:
sub_base["num_sold"]= sub_base["num_sold"].round()
sub_base_full["num_sold"]= sub_base_full["num_sold"].round()
sub_recursive["num_sold"]= sub_recursive["num_sold"].round()
sub_store["num_sold"]= sub_store["num_sold"].round()
sub_product["num_sold"]= sub_product["num_sold"].round()
sub_country["num_sold"]= sub_country["num_sold"].round()
sub_all["num_sold"]= sub_all["num_sold"].round()

sub_base.to_csv("submission_base.csv")
sub_base_full.to_csv("submission_base_full.csv")

sub_recursive.to_csv("submission_recursive.csv")

sub_store.to_csv("submission_store.csv")
sub_product.to_csv("submission_product.csv")
sub_country.to_csv("submission_country.csv")
sub_all.to_csv("submission_all.csv")

# Training Visualization

In [None]:
plt.figure(figsize=(25,10))
sns.lineplot(data = train[train["date"]>=VAL_SPLIT], x= "date" , y = "num_sold", label ="actual" ,ci=None)
sns.lineplot(data = sub_base, x= test["date"] , y = "num_sold", label ="Base prediction" ,ci=None)
sns.lineplot(data = sub_base_full, x= test["date"] , y = "num_sold", label ="Base full train prediction" ,ci=None)
sns.lineplot(data = sub_recursive, x= test["date"] , y = "num_sold", label ="Mutistep recursive prediction" ,ci=None)

plt.title("Recursive vs Baseline")
plt.show()

In [None]:
plt.figure(figsize=(25,10))
#sns.lineplot(data = train[train["date"]>=VAL_SPLIT], x= "date" , y = "num_sold", label ="actual" ,ci=None)
sns.lineplot(data = sub_base, x= test["date"] , y = "num_sold", label ="Baseline prediction" ,ci=None)
sns.lineplot(data = sub_store,x = test["date"] , y = "num_sold", label ="Store recursive" ,ci=None)
sns.lineplot(data = sub_product, x= test["date"] , y = "num_sold", label ="Product recursive" ,ci=None)
sns.lineplot(data = sub_country, x= test["date"] , y = "num_sold", label ="Country recursive" ,ci=None)
sns.lineplot(data = sub_all, x= test["date"] , y = "num_sold", label ="ALL recursive" ,ci=None)

plt.axvline(pd.to_datetime("2019-04-21"),label= "easter",  c = "r", linestyle="--")
plt.text(x =pd.to_datetime("2019-04-21") ,y = 0,s ='EASTER',rotation=90)
plt.title("Split recursive predctions ")
plt.show()

In [None]:
plt.figure(figsize=(25,10))
sns.lineplot(data = sub_recursive, x= test["date"] , y = "num_sold", label ="Mutistep recursive" ,ci=None)
sns.lineplot(data = sub_country, x= test["date"] , y = "num_sold", label ="Country recursive" ,ci=None)
sns.lineplot(data = sub_store,x = test["date"] , y = "num_sold", label ="Store recursive" ,ci=None)
plt.title(" Baseline Muti-step vs Store Multi-step")
plt.show()

##  Residuals comparison 

In [None]:
fig,ax = plt.subplots(2,1, figsize=(25,15))


#sns.lineplot(ax=ax[0],data = train[train["date"]>=VAL_SPLIT], x= "date" , y = "num_sold", label ="actual" ,ci=None)
sns.lineplot(ax=ax[0],data = sub_base_full, x= test["date"] , y = "num_sold", label ="Baseline full train prediction" ,ci=None)
sns.lineplot(ax=ax[0],data = sub_recursive, x= test["date"] , y = "num_sold", label ="Mutistep recursive prediction" ,ci=None)
ax[0].set_title(f"Baseline vs Recursive Baseline")

res_base_rec = sub_base_full["num_sold"] - sub_recursive["num_sold"]
sns.lineplot(ax=ax[1], y = res_base_rec,  x= test["date"] ,  label ="Residuals",ci=None )
ax[1].set_title(f"Residuals baseline - recursive")

plt.show()

In [None]:
fig,ax = plt.subplots(2,1, figsize=(25,15))

sns.lineplot(ax=ax[0],data = sub_recursive, x= test["date"] , y = "num_sold", label ="Mutistep recursive" ,ci=None)
sns.lineplot(ax=ax[0],data = sub_store,x = test["date"] , y = "num_sold", label ="Store recursive" ,ci=None)
ax[0].set_title(" Baseline recursive vs Store split recursive")

base_store = sub_recursive["num_sold"] -  sub_store["num_sold"] 
sns.lineplot(ax=ax[1], y = base_store,  x= test["date"] ,  label ="Residuals",ci=None )
ax[1].set_title(f"Recursive baseline - Store split")

plt.show()

**Note**: 
* Store is less than full recursive 

In [None]:
fig,ax = plt.subplots(2,1, figsize=(25,15))

sns.lineplot(ax=ax[0],data = sub_recursive, x= test["date"] , y = "num_sold", label ="Mutistep recursive" ,ci=None)
sns.lineplot(ax=ax[0],data = sub_all,x = test["date"] , y = "num_sold", label ="ALL recursive" ,ci=None)
ax[0].set_title(" Baseline Muti-step vs ALL Multi-step")

base_store = sub_recursive["num_sold"] -  sub_all["num_sold"] 
sns.lineplot(ax=ax[1], y = base_store,  x= test["date"] ,  label ="Residuals",ci=None )
ax[1].set_title(f"Recursive baseline - ALL Split")

plt.show()