In [1]:
import pandas as pd
import numpy as np
import sys
import numpy as np 
import pandas as pd 
import statsmodels as sm 
import warnings 
from scipy.stats import norm 
from statsmodels.tsa.stattools import acf 
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.holtwinters import SimpleExpSmoothing
from sklearn.linear_model import LinearRegression

In [2]:
def sThetaF(y, s_period = 1, h = 10, s = None):
    """
    @param y : array-like time series data
    @param s_period : the no. of observations before seasonal pattern repeats
    @param h : number of period for forcasting
    @s : additive or multiplicative 
    """
    fcast = {} # store result
    n = y.index.size
    x = y.copy()
    m = s_period
    time_y = np.array(np.arange(n))/m + 1
    time_fc = time_y[n-1] + np.array(np.arange(1,h+1))/m

    s_type = 'multiplicative'
    if s is not None:
        if s == 'additive':
            s = True
            s_type = 'additive'

    # Seasonality Test & Decomposition
    if s is not None and m >= 4:
        r = (acf(x, nlags = m+1))[1:]
        clim = 1.64/sqrt(n) * np.sqrt(np.cumsum([1, 2 * np.square(r)]))
        s = abs(r[m-1]) > clim[m-1]
    else:
        if not s:
            s = False


    if s: 
        decomp = seasonal_decompose(x, model = s_type)
        if s_type == 'additive' or (s_type -- 'multiplicative' and any(decomp < 0.01)): 
            s_type = 'additive'
            decomp = seasonal_decompose(x, model = 'additive').seasonal
            x = x - decomp
        else:
            x = x/decomp


    ## Find Theta Line
    model = LinearRegression().fit(time_y.reshape(-1,1), x)
    fcast['mean'] = model.intercept_ + model.coef_ * time_fc 

    return fcast

In [4]:
stv_df = pd.read_csv('./data/sales_train_validation.csv')
stv_df['time_series'] = stv_df[[f'd_{i}' for i in range(1241,1914)]].values.tolist()
index = [f'd_{i}' for i in range(1241,1914)]
test_series = pd.Series(stv_df['time_series'].iloc[0], index)
pred = sThetaF(test_series, s_period = 28, h = 28)['mean']
for i in range(1, 30490):
    test_series = pd.Series(stv_df['time_series'].iloc[i], index)
    y_pred = sThetaF(test_series, s_period = 28, h = 28)['mean']
    pred = np.vstack((pred, y_pred)) 

pred = np.vstack((pred, np.zeros((30490, 28))))
columns = [f'F{i}' for i in range(1,29)]
final = pd.DataFrame(pred, columns=columns)
subm = pd.read_csv('./data/sample_submission.csv')
final['id'] = subm['id']
final = final.set_index('id')

In [5]:
final

Unnamed: 0_level_0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,...,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HOBBIES_1_001_CA_1_validation,0.818788,0.819230,0.819671,0.820112,0.820553,0.820994,0.821435,0.821876,0.822317,0.822758,...,0.826728,0.827170,0.827611,0.828052,0.828493,0.828934,0.829375,0.829816,0.830257,0.830698
HOBBIES_1_002_CA_1_validation,0.306901,0.306829,0.306756,0.306684,0.306611,0.306539,0.306466,0.306394,0.306321,0.306248,...,0.305595,0.305523,0.305450,0.305378,0.305305,0.305233,0.305160,0.305088,0.305015,0.304942
HOBBIES_1_003_CA_1_validation,0.701629,0.702627,0.703624,0.704621,0.705619,0.706616,0.707613,0.708611,0.709608,0.710605,...,0.719581,0.720579,0.721576,0.722573,0.723571,0.724568,0.725565,0.726563,0.727560,0.728557
HOBBIES_1_004_CA_1_validation,1.784228,1.783059,1.781890,1.780720,1.779551,1.778382,1.777212,1.776043,1.774873,1.773704,...,1.763180,1.762010,1.760841,1.759672,1.758502,1.757333,1.756164,1.754994,1.753825,1.752655
HOBBIES_1_005_CA_1_validation,1.249469,1.249897,1.250324,1.250751,1.251178,1.251605,1.252033,1.252460,1.252887,1.253314,...,1.257159,1.257586,1.258014,1.258441,1.258868,1.259295,1.259722,1.260150,1.260577,1.261004
HOBBIES_1_006_CA_1_validation,0.413805,0.412436,0.411067,0.409698,0.408329,0.406960,0.405591,0.404222,0.402853,0.401484,...,0.389162,0.387793,0.386424,0.385055,0.383686,0.382317,0.380948,0.379578,0.378209,0.376840
HOBBIES_1_007_CA_1_validation,0.306809,0.306846,0.306883,0.306921,0.306958,0.306996,0.307033,0.307070,0.307108,0.307145,...,0.307482,0.307519,0.307557,0.307594,0.307631,0.307669,0.307706,0.307744,0.307781,0.307818
HOBBIES_1_008_CA_1_validation,10.032822,10.034798,10.036774,10.038749,10.040725,10.042701,10.044676,10.046652,10.048628,10.050603,...,10.068385,10.070360,10.072336,10.074312,10.076287,10.078263,10.080239,10.082214,10.084190,10.086166
HOBBIES_1_009_CA_1_validation,0.023668,0.022032,0.020396,0.018760,0.017124,0.015487,0.013851,0.012215,0.010579,0.008943,...,-0.005782,-0.007418,-0.009054,-0.010690,-0.012326,-0.013963,-0.015599,-0.017235,-0.018871,-0.020507
HOBBIES_1_010_CA_1_validation,0.525937,0.525610,0.525284,0.524957,0.524631,0.524304,0.523978,0.523651,0.523325,0.522998,...,0.520060,0.519734,0.519407,0.519081,0.518754,0.518428,0.518101,0.517775,0.517448,0.517122
