# Output Notebook

In [146]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import plotly.express as px
import random

%matplotlib inline
from pylab import rcParams
rcParams['figure.figsize'] = 10, 5
plt.style.use('seaborn')

import plotly.io as pio
import plotly.graph_objects as go

pio.templates.default = 'plotly'

pd.options.display.max_columns = 100
pd.options.display.max_rows = 400
import warnings
warnings.simplefilter('ignore')

from IPython.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

import utils
import tstests

from tqdm import tqdm

In [147]:
weekdays = ['Mon', 'Tues', 'Wed', 'Thu', 'Fri', 'Sat'] # weekly seasonality
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov'] # yearly seasonality

temporal_features = weekdays+months

def add_temporal_features(df):
    for i, x in enumerate(weekdays):
        df[x] = (df.index.weekday == i+1).astype(int)
    for i, x in enumerate(months):
        df[x] = (df.index.month == i+1).astype(int)
    return df

def add_fourier_terms(df, num_terms=2):
    for k in range(num_terms):
        df[f'sin365_{k+1}'] = np.sin((k+1)*2*np.pi*df.index.dayofyear/365.25) # yearly seasonality
        df[f'sin7_{k+1}'] = np.sin((k+1)*2*np.pi*df.index.dayofweek/7) # weekly seasonality
        
        df[f'cos365_{k+1}'] = np.cos((k+1)*2*np.pi*df.index.dayofyear/365.25) # yearly seasonality
        df[f'cos7_{k+1}'] = np.cos((k+1)*2*np.pi*df.index.dayofweek/7) # weekly seasonality
    
    return df

In [148]:
train_data = pd.read_csv("demand-forecasting-kernels-only/train.csv")
train_data['date'] = train_data['date'].astype('datetime64[ns]')
train_data = train_data.set_index('date')
train_data.index = pd.DatetimeIndex(train_data.index.values, freq=train_data.index.inferred_freq)
train_data

Unnamed: 0,store,item,sales
2013-01-01,1,1,13
2013-01-02,1,1,11
2013-01-03,1,1,14
2013-01-04,1,1,13
2013-01-05,1,1,10
...,...,...,...
2017-12-27,10,50,63
2017-12-28,10,50,59
2017-12-29,10,50,74
2017-12-30,10,50,62


In [149]:
test_data = pd.read_csv("demand-forecasting-kernels-only/test.csv")
test_data['date'] = test_data['date'].astype('datetime64[ns]')
test_data = test_data.set_index('date')
test_data.index = pd.DatetimeIndex(test_data.index.values, freq=test_data.index.inferred_freq)
test_data

Unnamed: 0,id,store,item
2018-01-01,0,1,1
2018-01-02,1,1,1
2018-01-03,2,1,1
2018-01-04,3,1,1
2018-01-05,4,1,1
...,...,...,...
2018-03-27,44995,10,50
2018-03-28,44996,10,50
2018-03-29,44997,10,50
2018-03-30,44998,10,50


# MSTL Forecast Output

In [116]:
from statsmodels.tsa.seasonal import MSTL
import scipy as sp

def fourier(x, *a, tau):
    ret = a[0]
    num_terms = (len(a) - 1) // 2
    for deg in range(1, num_terms + 1):
        ret += a[deg] * np.cos(deg * np.pi / tau * x) + a[num_terms + deg] * np.sin(deg * np.pi / tau * x)
    return ret

def fft_dominant_freqs(x, k):
    fft_result = np.fft.fft(x)
    frequencies = np.fft.fftfreq(len(x), d=1)

    positive_freqs = frequencies[:len(frequencies)//2]
    positive_fft_result = 2.0/len(fft_result) * np.abs(fft_result[:len(frequencies)//2])
    sorted_indices = np.argsort(-positive_fft_result)
    dominant_freqs = positive_freqs[sorted_indices[:k]]
    
    return dominant_freqs

def curve_fit_wrapper(x_data, y_data, tau_value, initial_guess):
    func_to_fit = lambda x, *a: fourier(x, *a, tau=tau_value)
    popt, pcov = sp.optimize.curve_fit(func_to_fit, x_data, y_data, p0=initial_guess)
    return popt, pcov

def get_mstl_forecast(train, valid, components=False, **f_kwargs):
    train_ts = train.sales.sort_index()
    train_size = len(train_ts)
    valid_size = valid.shape[0]
    x_train = np.arange(train_size)
    x_valid = np.arange(start=train_size, stop=train_size+valid_size)
    
    # decompose
    p = f_kwargs['p']
    mstl = MSTL(train_ts, periods=p, iterate=3, stl_kwargs={'seasonal_deg':0})
    res = mstl.fit()
    trend = res.trend + res.resid
    seasonals = res.seasonal
    train_components = []
    valid_components = []
    
    # fit trend component
    p_trend_fit = np.polyfit(x_train, trend.values, f_kwargs['poly_fit_terms'])
    fcast = np.poly1d(p_trend_fit)
    train_components.append(fcast(x_train)) # train
    valid_components.append(fcast(x_valid)) # valid
    
    # fit seasonal components
    # get dominant freqs for each period
    num_periods = len(f_kwargs['p'])
    for i in range(num_periods):
        dominant_freqs = fft_dominant_freqs(seasonals.iloc[:, i], f_kwargs['fourier_terms'])
        y_train = seasonals.iloc[:,i]
        tau = f_kwargs['p'][i]
        popt, pcov = curve_fit_wrapper(x_train, y_train, tau, dominant_freqs)
        train_components.append(fourier(x_train, *popt, tau=tau))
        valid_components.append(fourier(x_valid, *popt, tau=tau))

    if components:
        return train_components, valid_components
    else:    
        preds_train = np.sum(train_components, axis=0)
        preds_valid = np.sum(valid_components, axis=0)
        return preds_train, preds_valid

In [117]:
si_scores_dict = {}
for i in range(50):
    for s in range(10):
        si_scores_dict[f's{s+1}_i{i+1}'] = []
si_scores_df = pd.DataFrame.from_dict(si_scores_dict, orient='index')
scores_dict = {'train' : [], 'valid': []}
scores_df = pd.DataFrame.from_dict(scores_dict)

In [118]:
kwargs_mstl = {
    'name' : 'mstl_forecast',
    'f_kwargs' : {
        'p' : [7, 7*52],
        'poly_fit_terms' : 2,
        'fourier_terms' : 20
    }
}


train_data, test_data, si_scores_df, scores_df = utils.run_model(train_data, test_data, 
                                       si_scores_df, scores_df,
                                       get_mstl_forecast, future=True, **kwargs_mstl)

(1,1)  train : 19.1059
(1,2)  train : 16.3027
(1,3)  train : 17.0057
(1,4)  train : 17.9434
(1,5)  train : 20.9278
(1,6)  train : 21.0318
(1,7)  train : 21.8134
(1,8)  train : 17.088
(1,9)  train : 17.9964
(1,10)  train : 17.6892
-----------------------------------------------------
Item 1 Train sMAPE : 18.6904
-----------------------------------------------------

(2,1)  train : 12.5221
(2,2)  train : 10.8241
(2,3)  train : 11.4876
(2,4)  train : 11.6556
(2,5)  train : 13.3441
(2,6)  train : 13.1903
(2,7)  train : 13.8903
(2,8)  train : 10.9854
(2,9)  train : 11.7904
(2,10)  train : 11.2777
-----------------------------------------------------
Item 2 Train sMAPE : 12.0968
-----------------------------------------------------

(3,1)  train : 15.0538
(3,2)  train : 13.0044
(3,3)  train : 13.4215
(3,4)  train : 14.4012
(3,5)  train : 16.4822
(3,6)  train : 16.6716
(3,7)  train : 17.4099
(3,8)  train : 13.0863
(3,9)  train : 14.46
(3,10)  train : 14.0755
----------------------------------

(23,1)  train : 17.6203
(23,2)  train : 14.4092
(23,3)  train : 14.6661
(23,4)  train : 15.4305
(23,5)  train : 18.4118
(23,6)  train : 18.5966
(23,7)  train : 19.5917
(23,8)  train : 14.6874
(23,9)  train : 15.5959
(23,10)  train : 15.1503
-----------------------------------------------------
Item 23 Train sMAPE : 16.416
-----------------------------------------------------

(24,1)  train : 11.8797
(24,2)  train : 10.2677
(24,3)  train : 10.6147
(24,4)  train : 10.8988
(24,5)  train : 12.8004
(24,6)  train : 12.5964
(24,7)  train : 12.8292
(24,8)  train : 10.4486
(24,9)  train : 10.8422
(24,10)  train : 10.8846
-----------------------------------------------------
Item 24 Train sMAPE : 11.4062
-----------------------------------------------------

(25,1)  train : 10.7865
(25,2)  train : 9.5866
(25,3)  train : 9.9358
(25,4)  train : 10.4184
(25,5)  train : 11.904
(25,6)  train : 11.519
(25,7)  train : 12.1407
(25,8)  train : 9.4319
(25,9)  train : 10.5603
(25,10)  train : 9.9925
------

(45,1)  train : 10.78
(45,2)  train : 9.565
(45,3)  train : 9.9699
(45,4)  train : 10.3512
(45,5)  train : 11.6361
(45,6)  train : 11.7355
(45,7)  train : 11.9987
(45,8)  train : 9.7083
(45,9)  train : 10.394
(45,10)  train : 9.9002
-----------------------------------------------------
Item 45 Train sMAPE : 10.6039
-----------------------------------------------------

(46,1)  train : 12.4032
(46,2)  train : 11.0924
(46,3)  train : 11.3248
(46,4)  train : 11.7851
(46,5)  train : 13.5061
(46,6)  train : 13.5555
(46,7)  train : 14.1991
(46,8)  train : 10.6272
(46,9)  train : 11.8573
(46,10)  train : 11.5415
-----------------------------------------------------
Item 46 Train sMAPE : 12.1892
-----------------------------------------------------

(47,1)  train : 18.7004
(47,2)  train : 15.9627
(47,3)  train : 16.7577
(47,4)  train : 18.0778
(47,5)  train : 21.1252
(47,6)  train : 20.2515
(47,7)  train : 21.9468
(47,8)  train : 16.4406
(47,9)  train : 18.2779
(47,10)  train : 17.3282
-------

In [120]:
final_df_mstl = test_data[['id', 'mstl_forecast']]
final_df_mstl.columns = ['id', 'sales']
final_df_mstl

Unnamed: 0,id,sales
2018-01-01,0,11.806305
2018-01-02,1,14.398936
2018-01-03,2,14.930680
2018-01-04,3,15.590702
2018-01-05,4,17.178543
...,...,...
2018-03-27,44995,78.851904
2018-03-28,44996,80.101602
2018-03-29,44997,85.137075
2018-03-30,44998,90.108781


In [121]:
final_df_mstl.to_csv('submission_mstl_forecast1.csv', index=False)

# SARIMAX Temporal Features

In [150]:
import statsmodels.tsa.api as smt

def get_sarimax_forecast(train, valid, **f_kwargs):
    train.index = pd.DatetimeIndex(train.index.values, freq=train.index.inferred_freq)
    endog_train = train.sales.sort_index()
    exog_train = train[f_kwargs['exog_vars']].sort_index()
    exog_valid = valid[f_kwargs['exog_vars']].sort_index()
        
    model = smt.SARIMAX(endog_train, exog=exog_train, **f_kwargs['sarimax_kwargs']) 
    model_fit = model.fit(maxiter=1000, method='powell', disp=False)
    
    preds_train = model_fit.predict(exog=exog_train)
    preds_valid = model_fit.forecast(steps=valid.index.nunique(), exog=exog_valid)
    
    return preds_train, preds_valid

In [151]:
train_sarimax = train_data.copy()
test_sarimax = test_data.copy()
train_sarimax = add_temporal_features(train_sarimax)
test_sarimax = add_temporal_features(test_sarimax)

In [154]:
kwargs_sarimax = {
    'name' : 'sarimax',
    'f_kwargs' : {
        'sarimax_kwargs' : {
            'order': (4,1,1),
        },
        'exog_vars' : temporal_features
    }
}

train_sarimax, test_sarimax, si_scores_df, scores_df = utils.run_model(
                                                train_sarimax, test_sarimax, 
                                                si_scores_df, scores_df,
                                                get_sarimax_forecast,
                                                future=True,**kwargs_sarimax)

(1,1)  train : 18.8493
(1,2)  train : 15.9752
(1,3)  train : 16.8166
(1,4)  train : 17.6517
(1,5)  train : 20.82
(1,6)  train : 20.7887
(1,7)  train : 21.9199
(1,8)  train : 16.8443
(1,9)  train : 17.4207
(1,10)  train : 17.2142
-----------------------------------------------------
Item 1 Train sMAPE : 18.4301
-----------------------------------------------------

(2,1)  train : 11.8394
(2,2)  train : 10.0557
(2,3)  train : 10.668
(2,4)  train : 11.1018
(2,5)  train : 12.6602
(2,6)  train : 12.7117
(2,7)  train : 13.4646
(2,8)  train : 10.1813
(2,9)  train : 11.107
(2,10)  train : 10.3604
-----------------------------------------------------
Item 2 Train sMAPE : 11.415
-----------------------------------------------------

(3,1)  train : 14.6544
(3,2)  train : 12.5087
(3,3)  train : 13.0554
(3,4)  train : 13.9067
(3,5)  train : 16.1262
(3,6)  train : 16.2904
(3,7)  train : 16.8867
(3,8)  train : 12.4898
(3,9)  train : 14.1564
(3,10)  train : 13.3804
------------------------------------

(23,2)  train : 13.9426
(23,3)  train : 14.2861
(23,4)  train : 15.0867
(23,5)  train : 17.95
(23,6)  train : 18.009
(23,7)  train : 19.3577
(23,8)  train : 14.2942
(23,9)  train : 15.0886
(23,10)  train : 14.9215
-----------------------------------------------------
Item 23 Train sMAPE : 16.0114
-----------------------------------------------------

(24,1)  train : 11.1574
(24,2)  train : 9.3569
(24,3)  train : 9.9282
(24,4)  train : 10.2517
(24,5)  train : 12.3993
(24,6)  train : 11.8413
(24,7)  train : 12.4128
(24,8)  train : 9.6097
(24,9)  train : 10.2968
(24,10)  train : 10.3208
-----------------------------------------------------
Item 24 Train sMAPE : 10.7575
-----------------------------------------------------

(25,1)  train : 10.0518
(25,2)  train : 8.5898
(25,3)  train : 9.0971
(25,4)  train : 9.5007
(25,5)  train : 11.1949
(25,6)  train : 10.6809
(25,7)  train : 11.4162
(25,8)  train : 8.3388
(25,9)  train : 9.7144
(25,10)  train : 9.0775
-----------------------------------

(45,1)  train : 9.9718
(45,2)  train : 8.7402
(45,3)  train : 8.8932
(45,4)  train : 9.5288
(45,5)  train : 10.9096
(45,6)  train : 10.8868
(45,7)  train : 11.4909
(45,8)  train : 8.8824
(45,9)  train : 9.5808
(45,10)  train : 8.9535
-----------------------------------------------------
Item 45 Train sMAPE : 9.7838
-----------------------------------------------------

(46,1)  train : 11.7781
(46,2)  train : 10.3335
(46,3)  train : 10.4921
(46,4)  train : 11.1789
(46,5)  train : 12.8482
(46,6)  train : 12.9058
(46,7)  train : 13.6015
(46,8)  train : 9.9042
(46,9)  train : 11.1262
(46,10)  train : 10.7171
-----------------------------------------------------
Item 46 Train sMAPE : 11.4886
-----------------------------------------------------

(47,1)  train : 18.5071
(47,2)  train : 15.7677
(47,3)  train : 16.6174
(47,4)  train : 17.6434
(47,5)  train : 20.9129
(47,6)  train : 20.3202
(47,7)  train : 21.6954
(47,8)  train : 16.1881
(47,9)  train : 17.791
(47,10)  train : 16.7094
---------

In [155]:
final_df_sarimax = test_sarimax[['id', 'sarimax']]
final_df_sarimax.columns = ['id', 'sales']
final_df_sarimax

Unnamed: 0,id,sales
2018-01-01,0,12.177669
2018-01-02,1,14.729065
2018-01-03,2,15.470478
2018-01-04,3,16.287699
2018-01-05,4,17.865893
...,...,...
2018-03-27,44995,73.958040
2018-03-28,44996,74.852684
2018-03-29,44997,79.684028
2018-03-30,44998,84.409104


In [156]:
final_df_sarimax.to_csv('submission_sarimax_final.csv', index=False)

# SARIMAX Fourier Terms

In [157]:
train_fourier = train_data.copy()
test_fourier= test_data.copy()
train_fourier = add_fourier_terms(train_fourier)
test_fourier = add_fourier_terms(test_fourier)

In [158]:
kwargs_fourier = {
    'name' : 'sarimax_fourier',
    'f_kwargs' : {
        'sarimax_kwargs' : {
            'order': (2,1,1),
        },
        'exog_vars' : fourier_terms
    }
}

train_fourier, test_fourier, si_scores_df, scores_df = utils.run_model(
                                                train_fourier, test_fourier, 
                                                si_scores_df, scores_df,
                                                get_sarimax_forecast,
                                                future=True,**kwargs_fourier)

(1,1)  train : 19.9829
(1,2)  train : 17.1274
(1,3)  train : 17.7006
(1,4)  train : 18.6595
(1,5)  train : 21.5144
(1,6)  train : 21.7259
(1,7)  train : 22.6805
(1,8)  train : 18.0867
(1,9)  train : 18.862
(1,10)  train : 18.5387
-----------------------------------------------------
Item 1 Train sMAPE : 19.4879
-----------------------------------------------------

(2,1)  train : 13.2993
(2,2)  train : 11.8141
(2,3)  train : 12.3624
(2,4)  train : 12.7016
(2,5)  train : 14.156
(2,6)  train : 14.3885
(2,7)  train : 14.753
(2,8)  train : 11.916
(2,9)  train : 12.7008
(2,10)  train : 12.0469
-----------------------------------------------------
Item 2 Train sMAPE : 13.0138
-----------------------------------------------------

(3,1)  train : 15.9721
(3,2)  train : 13.9741
(3,3)  train : 14.4114
(3,4)  train : 15.2159
(3,5)  train : 17.3764
(3,6)  train : 17.3766
(3,7)  train : 18.3678
(3,8)  train : 13.9623
(3,9)  train : 15.5763
(3,10)  train : 14.8768
-----------------------------------

(23,1)  train : 18.5832
(23,2)  train : 15.53
(23,3)  train : 15.6646
(23,4)  train : 16.3566
(23,5)  train : 19.0243
(23,6)  train : 19.5478
(23,7)  train : 20.5713
(23,8)  train : 15.5534
(23,9)  train : 16.5249
(23,10)  train : 16.159
-----------------------------------------------------
Item 23 Train sMAPE : 17.3515
-----------------------------------------------------

(24,1)  train : 12.911
(24,2)  train : 11.2674
(24,3)  train : 11.6736
(24,4)  train : 11.706
(24,5)  train : 13.7601
(24,6)  train : 13.6523
(24,7)  train : 13.7302
(24,8)  train : 11.2989
(24,9)  train : 11.8879
(24,10)  train : 12.0434
-----------------------------------------------------
Item 24 Train sMAPE : 12.3931
-----------------------------------------------------

(25,1)  train : 11.6634
(25,2)  train : 10.5349
(25,3)  train : 10.7774
(25,4)  train : 11.2375
(25,5)  train : 13.0409
(25,6)  train : 12.3986
(25,7)  train : 12.9473
(25,8)  train : 10.2251
(25,9)  train : 11.5063
(25,10)  train : 11.0393
----

(45,1)  train : 11.8631
(45,2)  train : 10.6573
(45,3)  train : 10.7419
(45,4)  train : 11.2955
(45,5)  train : 12.8231
(45,6)  train : 12.5228
(45,7)  train : 13.0377
(45,8)  train : 10.6222
(45,9)  train : 11.1909
(45,10)  train : 10.8249
-----------------------------------------------------
Item 45 Train sMAPE : 11.5579
-----------------------------------------------------

(46,1)  train : 13.4791
(46,2)  train : 11.8413
(46,3)  train : 12.2555
(46,4)  train : 12.9093
(46,5)  train : 14.3976
(46,6)  train : 14.7051
(46,7)  train : 15.2158
(46,8)  train : 11.6998
(46,9)  train : 12.6805
(46,10)  train : 12.4397
-----------------------------------------------------
Item 46 Train sMAPE : 13.1624
-----------------------------------------------------

(47,1)  train : 19.5206
(47,2)  train : 17.0954
(47,3)  train : 17.6431
(47,4)  train : 18.8022
(47,5)  train : 22.2365
(47,6)  train : 21.107
(47,7)  train : 22.4215
(47,8)  train : 17.2951
(47,9)  train : 18.8384
(47,10)  train : 18.2688


In [159]:
final_df_fourier = test_fourier[['id', 'sarimax_fourier']]
final_df_fourier.columns = ['id', 'sales']
final_df_fourier

Unnamed: 0,id,sales
2018-01-01,0,14.174030
2018-01-02,1,13.930729
2018-01-03,2,16.400175
2018-01-04,3,16.100659
2018-01-05,4,17.224980
...,...,...
2018-03-27,44995,65.869133
2018-03-28,44996,75.476379
2018-03-29,44997,76.797709
2018-03-30,44998,80.596601


In [160]:
final_df_fourier.to_csv('submission_sarimax_fourier_final.csv', index=False)

# SARIMAX Temporal Features + Fourier Terms

In [161]:
train_t_fourier = train_data.copy()
test_t_fourier = test_data.copy()

train_t_fourier = add_fourier_terms(train_t_fourier)
test_t_fourier = add_fourier_terms(test_t_fourier)
train_t_fourier = add_temporal_features(train_t_fourier)
test_t_fourier = add_temporal_features(test_t_fourier)

In [162]:
kwargs_t_fourier = {
    'name' : 'sarimax_t_fourier',
    'f_kwargs' : {
        'sarimax_kwargs' : {
            'order': (5,1,1),
        },
        'exog_vars' : temporal_features+fourier_terms
    }
}

train_t_fourier, test_t_fourier, si_scores_df, scores_df = utils.run_model(
                                                train_t_fourier, test_t_fourier, 
                                                si_scores_df, scores_df,
                                                get_sarimax_forecast,
                                                future=True,**kwargs_t_fourier)

(1,1)  train : 26.3545
(1,2)  train : 22.2251
(1,3)  train : 23.7927
(1,4)  train : 24.5195
(1,5)  train : 29.194
(1,6)  train : 28.719
(1,7)  train : 31.0002
(1,8)  train : 23.6177
(1,9)  train : 24.6224
(1,10)  train : 24.0997
-----------------------------------------------------
Item 1 Train sMAPE : 25.8145
-----------------------------------------------------

(2,1)  train : 16.6425
(2,2)  train : 14.6702
(2,3)  train : 14.9679
(2,4)  train : 15.5993
(2,5)  train : 17.7286
(2,6)  train : 17.4538
(2,7)  train : 18.6601
(2,8)  train : 14.0342
(2,9)  train : 15.5849
(2,10)  train : 14.4713
-----------------------------------------------------
Item 2 Train sMAPE : 15.9813
-----------------------------------------------------

(3,1)  train : 20.3993
(3,2)  train : 17.399
(3,3)  train : 18.6027
(3,4)  train : 19.4854
(3,5)  train : 22.9513
(3,6)  train : 22.8406
(3,7)  train : 23.2062
(3,8)  train : 17.7278
(3,9)  train : 19.959
(3,10)  train : 18.8369
-----------------------------------

(23,1)  train : 24.4386
(23,2)  train : 19.8215
(23,3)  train : 20.1934
(23,4)  train : 21.417
(23,5)  train : 25.5947
(23,6)  train : 25.3674
(23,7)  train : 26.8829
(23,8)  train : 20.2751
(23,9)  train : 20.9299
(23,10)  train : 20.9121
-----------------------------------------------------
Item 23 Train sMAPE : 22.5833
-----------------------------------------------------

(24,1)  train : 15.9593
(24,2)  train : 12.945
(24,3)  train : 14.2025
(24,4)  train : 14.5722
(24,5)  train : 17.291
(24,6)  train : 17.094
(24,7)  train : 16.9984
(24,8)  train : 13.9348
(24,9)  train : 14.7565
(24,10)  train : 14.3957
-----------------------------------------------------
Item 24 Train sMAPE : 15.2149
-----------------------------------------------------

(25,1)  train : 14.1819
(25,2)  train : 12.0288
(25,3)  train : 12.6937
(25,4)  train : 13.3371
(25,5)  train : 15.7453
(25,6)  train : 15.0056
(25,7)  train : 16.3859
(25,8)  train : 11.3978
(25,9)  train : 13.9356
(25,10)  train : 12.8069
---

(45,1)  train : 14.1946
(45,2)  train : 12.0259
(45,3)  train : 12.6312
(45,4)  train : 13.3093
(45,5)  train : 15.5342
(45,6)  train : 15.0748
(45,7)  train : 16.0072
(45,8)  train : 12.5806
(45,9)  train : 13.5481
(45,10)  train : 12.678
-----------------------------------------------------
Item 45 Train sMAPE : 13.7584
-----------------------------------------------------

(46,1)  train : 16.4106
(46,2)  train : 14.2793
(46,3)  train : 14.6813
(46,4)  train : 15.2818
(46,5)  train : 18.3805
(46,6)  train : 18.3587
(46,7)  train : 19.6898
(46,8)  train : 14.0407
(46,9)  train : 15.8928
(46,10)  train : 14.8776
-----------------------------------------------------
Item 46 Train sMAPE : 16.1893
-----------------------------------------------------

(47,1)  train : 26.0038
(47,2)  train : 21.6513
(47,3)  train : 23.4694
(47,4)  train : 25.0727
(47,5)  train : 29.6639
(47,6)  train : 30.0448
(47,7)  train : 30.4938
(47,8)  train : 22.4297
(47,9)  train : 25.1487
(47,10)  train : 23.1344


In [163]:
final_df_t_fourier = test_t_fourier[['id', 'sarimax_t_fourier']]
final_df_t_fourier.columns = ['id', 'sales']
final_df_t_fourier

Unnamed: 0,id,sales
2018-01-01,0,13.430664
2018-01-02,1,15.979492
2018-01-03,2,16.551758
2018-01-04,3,17.200195
2018-01-05,4,18.762695
...,...,...
2018-03-27,44995,91.142578
2018-03-28,44996,92.378906
2018-03-29,44997,97.437500
2018-03-30,44998,102.425781


In [165]:
final_df_t_fourier.to_csv('submission_sarimax_t_fourier_final.csv', index=False)

# SARIMAX Deseasonalized Series

In [96]:
from statsmodels.tsa.forecasting.stl import STLForecast

def get_deseasoned_forecast(train, valid, **f_kwargs):
    endog_train = train.sales
    
    stlf = STLForecast(endog_train, smt.SARIMAX,
                   model_kwargs=dict(order=f_kwargs['order'], 
                                     seasonal_order=f_kwargs['seasonal_order']),
                   period=f_kwargs['period'])
    
    stlf_res = stlf.fit(fit_kwargs=dict(disp=False, method='powell'))
    
    preds_train = stlf_res.get_prediction().predicted_mean
    preds_valid = stlf_res.forecast(valid.shape[0])
    
    return preds_train, preds_valid

In [97]:
kwargs_deseasoned = {
    'name' : 'sarimax_deseasoned',
    'f_kwargs' : {
        'period' : 364,
        'order' : (0,1,1),
        'seasonal_order' : (0,0,0,0)
    }
}

In [98]:
train_cp, test_cp = train_data.copy(), test_data.copy()

In [99]:
train_cp, test_cp, si_scores_df, scores_df = utils.run_model(train_cp, test_cp, 
                                       si_scores_df, scores_df,
                                       get_deseasoned_forecast, future=True, **kwargs_deseasoned)

(1,1)  train : 13.2703
(1,2)  train : 11.1831
(1,3)  train : 11.7717
(1,4)  train : 12.3315
(1,5)  train : 14.9082
(1,6)  train : 14.5337
(1,7)  train : 14.8011
(1,8)  train : 11.8222
(1,9)  train : 12.2457
(1,10)  train : 12.266
-----------------------------------------------------
Item 1 Train sMAPE : 12.9133
-----------------------------------------------------

(2,1)  train : 8.1227
(2,2)  train : 7.0468
(2,3)  train : 7.3174
(2,4)  train : 7.7406
(2,5)  train : 8.8252
(2,6)  train : 8.8586
(2,7)  train : 9.4499
(2,8)  train : 6.9559
(2,9)  train : 7.7275
(2,10)  train : 7.0645
-----------------------------------------------------
Item 2 Train sMAPE : 7.9109
-----------------------------------------------------

(3,1)  train : 10.508
(3,2)  train : 8.6055
(3,3)  train : 9.2331
(3,4)  train : 9.7274
(3,5)  train : 11.4571
(3,6)  train : 11.4148
(3,7)  train : 11.6586
(3,8)  train : 8.6874
(3,9)  train : 9.8995
(3,10)  train : 9.4455
--------------------------------------------------

(23,7)  train : 13.706
(23,8)  train : 10.0867
(23,9)  train : 10.4092
(23,10)  train : 10.743
-----------------------------------------------------
Item 23 Train sMAPE : 11.267
-----------------------------------------------------

(24,1)  train : 7.4366
(24,2)  train : 6.4684
(24,3)  train : 6.7447
(24,4)  train : 6.9976
(24,5)  train : 8.4912
(24,6)  train : 8.247
(24,7)  train : 8.4996
(24,8)  train : 6.6966
(24,9)  train : 7.2012
(24,10)  train : 7.197
-----------------------------------------------------
Item 24 Train sMAPE : 7.398
-----------------------------------------------------

(25,1)  train : 7.0747
(25,2)  train : 5.9243
(25,3)  train : 6.3736
(25,4)  train : 6.6274
(25,5)  train : 7.7341
(25,6)  train : 7.2786
(25,7)  train : 8.0387
(25,8)  train : 5.5724
(25,9)  train : 6.7615
(25,10)  train : 6.3783
-----------------------------------------------------
Item 25 Train sMAPE : 6.7764
-----------------------------------------------------

(26,1)  train : 9.1649
(26,2)  t

(45,9)  train : 6.7023
(45,10)  train : 6.2235
-----------------------------------------------------
Item 45 Train sMAPE : 6.8046
-----------------------------------------------------

(46,1)  train : 8.2786
(46,2)  train : 7.0515
(46,3)  train : 7.2117
(46,4)  train : 7.8027
(46,5)  train : 9.1644
(46,6)  train : 9.0308
(46,7)  train : 9.7013
(46,8)  train : 6.9048
(46,9)  train : 7.6924
(46,10)  train : 7.5588
-----------------------------------------------------
Item 46 Train sMAPE : 8.0397
-----------------------------------------------------

(47,1)  train : 13.0907
(47,2)  train : 11.5053
(47,3)  train : 11.4571
(47,4)  train : 12.2297
(47,5)  train : 14.9104
(47,6)  train : 14.8701
(47,7)  train : 15.1315
(47,8)  train : 11.3442
(47,9)  train : 12.4014
(47,10)  train : 11.5495
-----------------------------------------------------
Item 47 Train sMAPE : 12.849
-----------------------------------------------------

(48,1)  train : 8.7429
(48,2)  train : 7.1178
(48,3)  train : 8.022

In [100]:
test_cp

Unnamed: 0,id,store,item,sarimax_deseasoned
2018-01-01,0,1,1,14.069251
2018-01-02,1,1,1,10.559755
2018-01-03,2,1,1,16.314774
2018-01-04,3,1,1,13.433400
2018-01-05,4,1,1,20.909335
...,...,...,...,...
2018-03-27,44995,10,50,65.192745
2018-03-28,44996,10,50,71.602052
2018-03-29,44997,10,50,70.866271
2018-03-30,44998,10,50,75.257648


In [101]:
final_df = test_cp[['id', 'sarimax_deseasoned']]
final_df.columns = ['id', 'sales']
final_df.to_csv('submission_sarimax_deseasoned.csv', index=False)