In [1]:
import pandas as pd
import numpy as np

import plotly.express as px
import plotly.graph_objects as go

from sklearn.metrics import mean_squared_error, mean_squared_log_error
from statsmodels.tsa.holtwinters import SimpleExpSmoothing, ExponentialSmoothing
from sklearn.model_selection import ParameterGrid

from tqdm import tqdm
import warnings
warnings.filterwarnings("error")

In [2]:
df = pd.read_csv('inventory_mana_cleaned.csv')
df_prod = pd.read_parquet('df_prod.parquet')[['product_code', 'type']]
df = df.merge(df_prod, on = 'product_code', how = 'left')
df['orderdate'] = pd.to_datetime(df.orderdate)
df.head()

Unnamed: 0,ordernumber,orderdate,year,month,day,customername,customer_type,warehousecode,product_code,orderquantity,...,revenue,costs,profit,client_group,product_type,is_holiday,week_num,year_month,year_week,type
0,SO - 000101,2015-08-26,2015,8,26,New Ltd,Distributor,NXH382,STK-112,5,...,3217.5,1430,1787.5,beta,AX,0,35,2015-08,2015-35,AX
1,SO - 000102,2016-01-13,2016,1,13,Winthrop,Wholesale,GUT930,STK-121,5,...,1732.4,1220,512.4,beta,CY,0,2,2016-01,2016-02,CY
2,SO - 000103,2015-09-19,2015,9,19,Apollo Ltd,Export,GUT930,STK-112,10,...,6435.0,2860,3575.0,gamma,AX,0,38,2015-09,2015-38,AX
3,SO - 000104,2015-12-10,2015,12,10,Eminence Corp,Export,AXW291,STK-150,6,...,1302.84,846,456.84,alpha,CX,0,50,2015-12,2015-50,CX
4,SO - 000105,2015-02-19,2015,2,19,E. Ltd,Wholesale,AXW291,STK-158,6,...,858.6,540,318.6,beta,CX,0,8,2015-02,2015-08,CX


In [3]:
data = df[df['type'] == 'CX'][['orderdate', 'orderquantity']].sort_values('orderdate').set_index('orderdate')
data.index = pd.DatetimeIndex(data.index).to_period('W')
data = data.reset_index().groupby(['orderdate'])[['orderquantity']].agg('sum')

x = pd.DataFrame(data = {'dummy': 1}, index = pd.date_range(start = '2015-01-06', end = '2016-06-30'))
x.index = pd.DatetimeIndex(x.index).to_period('W').rename('orderdate')
x = x.reset_index().groupby(['orderdate'])[['dummy']].agg('sum').reset_index()[['orderdate']]
data = data.merge(x, on = 'orderdate', how = 'right').fillna(0).set_index('orderdate')

In [4]:
train = data[data.index < '2016-04-01']
test = data[data.index > '2016-04-01']
(train.shape, test.shape)

((64, 1), (13, 1))

In [5]:
param_grid = \
{
    'trend': ['add', 'mul', None],
    'damped_trend': [True, False],
    'seasonal': ['add', 'mul', None],
    'seasonal_periods': [x for x in range (2, 13)],
    'use_boxcox': [True, False],
    'remove_bias': [True, False],
    'use_brute': [True, False]
}
param_list = list(ParameterGrid(param_grid))
hyper_para = pd.DataFrame(data = param_list)
print(hyper_para.shape)
hyper_para.head()

(1584, 7)


Unnamed: 0,damped_trend,remove_bias,seasonal,seasonal_periods,trend,use_boxcox,use_brute
0,True,True,add,2,add,True,True
1,True,True,add,2,add,True,False
2,True,True,add,2,add,False,True
3,True,True,add,2,add,False,False
4,True,True,add,2,mul,True,True


In [6]:
rmse_train = []
msle_train = []
rmse_test = []
msle_test = []

smoothing_level = []
smoothing_trend = []
smoothing_seasonal = []
damping_trend = []
initial_level = []
initial_trend = []
initial_seasons = []
lamda = []

for params in tqdm(param_list, total = len(param_list)):
    try:
        # Holt-Winter Exponential Smoothing
        htes = ExponentialSmoothing(train,
                                    trend = params['trend'], 
                                    damped_trend = params['damped_trend'], 
                                    seasonal = params['seasonal'], 
                                    seasonal_periods = params['seasonal_periods'],
                                    use_boxcox = params['use_boxcox']) \
            .fit(optimized = True, use_brute = params['use_brute'])
        
        # Metrics on Train
        rmse_train.append(
            np.sqrt(mean_squared_error(train.values, htes.fittedvalues.values))
        )
        msle_train.append(
            mean_squared_log_error(train.values, htes.fittedvalues.values)
        )
        
        # Metrics on Test
        rmse_test.append(
            np.sqrt(mean_squared_error(test.values, htes.forecast(test.shape[0]).values))
        )
        msle_test.append(
            mean_squared_log_error(test.values, htes.forecast(test.shape[0]).values)
        )
        
        # Hyperpara
        smoothing_level.append(htes.params['smoothing_level'])
        smoothing_trend.append(htes.params['smoothing_trend'])
        smoothing_seasonal.append(htes.params['smoothing_seasonal'])
        damping_trend.append(htes.params['damping_trend'])
        initial_level.append(htes.params['initial_level'])
        initial_trend.append(htes.params['initial_trend'])
        initial_seasons.append(htes.params['initial_seasons'])
        lamda.append(htes.params['lamda'])
    except:
        rmse_train.append(np.nan)
        msle_train.append(np.nan)
        rmse_test.append(np.nan)
        msle_test.append(np.nan)
        smoothing_level.append(np.nan)
        smoothing_trend.append(np.nan)
        smoothing_seasonal.append(np.nan)
        damping_trend.append(np.nan)
        initial_level.append(np.nan)
        initial_trend.append(np.nan)
        initial_seasons.append(np.nan)
        lamda.append(np.nan)
        breakpoint()

hyper_para['smoothing_level'] = smoothing_level
hyper_para['smoothing_trend'] = smoothing_trend
hyper_para['smoothing_seasonal'] = smoothing_seasonal
hyper_para['damping_trend'] = damping_trend
hyper_para['initial_level'] = initial_level
hyper_para['initial_trend'] = initial_trend
hyper_para['initial_seasons'] = initial_seasons
hyper_para['lamda'] = lamda
hyper_para['rmse_train'] = rmse_train
hyper_para['rmse_test'] = rmse_test
hyper_para['msle_train'] = msle_train
hyper_para['msle_test'] = msle_test
# hyper_para.to_csv('CX_param_config.csv')
hyper_para = hyper_para[~(hyper_para.rmse_test == np.nan)].sort_values(by = ['rmse_test', 'msle_test'], ascending = True).head(20)
hyper_para.iloc[0, :]

100%|██████████| 1584/1584 [00:33<00:00, 47.71it/s] 


damped_trend                                                      False
remove_bias                                                        True
seasonal                                                            add
seasonal_periods                                                     10
trend                                                               mul
use_boxcox                                                        False
use_brute                                                          True
smoothing_level                                                0.040357
smoothing_trend                                                0.040357
smoothing_seasonal                                               0.0001
damping_trend                                                       NaN
initial_level                                                182.813333
initial_trend                                                  1.000917
initial_seasons       [17.761250000000004, 14.011249999999997, -

In [8]:
htes = \
       ExponentialSmoothing(train,
                            trend = hyper_para.iloc[0, :]['trend'], 
                            damped_trend = hyper_para.iloc[0, :]['damped_trend'], 
                            seasonal = hyper_para.iloc[0, :]['seasonal'], 
                            seasonal_periods = hyper_para.iloc[0, :]['seasonal_periods'], 
                            initialization_method = 'known',
                            initial_level = hyper_para.iloc[0, :]['initial_level'], 
                            initial_trend = hyper_para.iloc[0, :]['initial_trend'], 
                            initial_seasonal = hyper_para.iloc[0, :]['initial_seasons'],
                            use_boxcox = False) \
    .fit(use_brute = hyper_para.iloc[0, :]['use_brute'])

In [20]:
fig = go.Figure(layout = go.Layout(xaxis_type = 'category', 
                                   xaxis = {'title': 'timeline'}, 
                                   yaxis = {'title': 'orderquantity'},
                                   title = 'Weekly forecast on train and test dataset of CX product group'))

fig.add_trace(
    go.Scatter(
        x = data.index.strftime('%Y-%m-%d'),
        y = data.values.reshape(data.shape[0], ),
        mode = 'lines',
        name = 'y'
    )
)

fig.add_trace(
    go.Scatter(
        x = htes.fittedvalues.index.strftime('%Y-%m-%d'),
        y = htes.fittedvalues.values,
        mode = 'lines',
        name = 'fitted_train'
    )
)

fig.add_trace(
    go.Scatter(
        x = htes.forecast(test.shape[0]).index.strftime('%Y-%m-%d'),
        y = htes.forecast(test.shape[0]).values,
        mode = 'lines',
        name = 'forecasted_test'
    )
)

fig.show()