# Forecasting beer sales 

github: https://github.com/kohlicekjan/forecasting-beer-sales-ml

### Source
- https://scikit-learn.org/stable/index.html
- https://www.mariofilho.com/how-to-predict-multiple-time-series-with-scikit-learn-with-sales-forecasting-example/
- https://alkaline-ml.com/pmdarima/quickstart.html
- https://facebook.github.io/prophet/

In [139]:
import pandas as pd
import numpy as np
import os

from sklearn.model_selection import train_test_split
import lightgbm as lgb

In [140]:
DATASET_NAME = 'off-trade' # all_cz, off-trade_cz, on-trade_cz

DATA_PATH = f'./data/{DATASET_NAME}_data.csv'

MODEL_PATH = f'./models/{DATASET_NAME}_model.joblib'

RESULT_CSV_PATH = f'./results/{DATASET_NAME}_result.csv'
RESULT_EXCEL_PATH = f'./results/{DATASET_NAME}_result.xlsx'

In [141]:
data = pd.read_csv(DATA_PATH, header=0, decimal=",")

data.head()

Unnamed: 0,SkuShort,ProductGroup,PrimaryPack,Country,Year,Week,NumberWorkdays,AvgTemp,AvgRain,AvgSun,IsLockdown,PdtHl,PrevWeekPdtHl1,BgtHl,PrevWeekBgtHl1,SalesHl,PrevWeekSalesHl1,PrevWeekSalesHl2,OldPredSalesHl
0,1015,CLEAR BEER,RB,CZ,2016,1,5,-1.164286,0.935714,1.014286,0,,,1637.344974,,1805.0,,,705.4995
1,1016,CLEAR BEER,RB,CZ,2016,1,5,-1.164286,0.935714,1.014286,0,,,293.344724,,159.1128,,,160.0957
2,1022,CLEAR BEER,RB,CZ,2016,1,5,-1.164286,0.935714,1.014286,0,,,59.144115,,72.49,,,74.442
3,1026,CLEAR BEER,RB,CZ,2016,1,5,-1.164286,0.935714,1.014286,0,,,10.669629,,23.8392,,,8.8567
4,1027,CLEAR BEER,RB,CZ,2016,1,5,-1.164286,0.935714,1.014286,0,,,2.109411,,3.96,,,4.9003


In [142]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 44195 entries, 0 to 44194
Data columns (total 19 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   SkuShort          44195 non-null  int64  
 1   ProductGroup      44195 non-null  object 
 2   PrimaryPack       44195 non-null  object 
 3   Country           44195 non-null  object 
 4   Year              44195 non-null  int64  
 5   Week              44195 non-null  int64  
 6   NumberWorkdays    44195 non-null  int64  
 7   AvgTemp           44195 non-null  float64
 8   AvgRain           44195 non-null  float64
 9   AvgSun            44195 non-null  float64
 10  IsLockdown        44195 non-null  int64  
 11  PdtHl             29061 non-null  float64
 12  PrevWeekPdtHl1    29071 non-null  float64
 13  BgtHl             35449 non-null  float64
 14  PrevWeekBgtHl1    34515 non-null  float64
 15  SalesHl           44195 non-null  float64
 16  PrevWeekSalesHl1  42943 non-null  float6

In [143]:
cols_drop = ['Year', 'SalesHl', 'OldPredSalesHl']
#'SkuShort', 'ProductGroup', 'PrimaryPack', 'Country',

y = pd.DataFrame(data.SalesHl).round(0).astype(int)
X = data.drop(cols_drop, axis=1).fillna(0)

X.IsLockdown = X.IsLockdown.astype('bool')
X.SkuShort = X.SkuShort.astype('category')
X.ProductGroup = X.SkuShort.astype('category')
X.PrimaryPack = X.SkuShort.astype('category')
X.Country = X.Country.astype('category').cat.codes
X['PrevWeekSalesDiff'] = X.PrevWeekSalesHl1-X.PrevWeekSalesHl2

#X = X.drop(['PrevWeekSalesHl1', 'PrevWeekSalesHl2'], axis=1)

# X.PrevWeekSalesHl1 = X.PrevWeekSalesHl1.round(0).astype(int)
# X.PrevWeekSalesHl2 = X.PrevWeekSalesHl2.round(0).astype(int)
# X.BgtHl = X.BgtHl.round(0).astype(int)
# X.PdtHl = X.PdtHl.round(0).astype(int)


y_oldPred = pd.DataFrame(data.OldPredSalesHl).fillna(0).round(0).astype(int)

#y.head()
X.head()

Unnamed: 0,SkuShort,ProductGroup,PrimaryPack,Country,Week,NumberWorkdays,AvgTemp,AvgRain,AvgSun,IsLockdown,PdtHl,PrevWeekPdtHl1,BgtHl,PrevWeekBgtHl1,PrevWeekSalesHl1,PrevWeekSalesHl2,PrevWeekSalesDiff
0,1015,1015,1015,0,1,5,-1.164286,0.935714,1.014286,False,0.0,0.0,1637.344974,0.0,0.0,0.0,0.0
1,1016,1016,1016,0,1,5,-1.164286,0.935714,1.014286,False,0.0,0.0,293.344724,0.0,0.0,0.0,0.0
2,1022,1022,1022,0,1,5,-1.164286,0.935714,1.014286,False,0.0,0.0,59.144115,0.0,0.0,0.0,0.0
3,1026,1026,1026,0,1,5,-1.164286,0.935714,1.014286,False,0.0,0.0,10.669629,0.0,0.0,0.0,0.0
4,1027,1027,1027,0,1,5,-1.164286,0.935714,1.014286,False,0.0,0.0,2.109411,0.0,0.0,0.0,0.0


In [144]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train.head()

Unnamed: 0,SkuShort,ProductGroup,PrimaryPack,Country,Week,NumberWorkdays,AvgTemp,AvgRain,AvgSun,IsLockdown,PdtHl,PrevWeekPdtHl1,BgtHl,PrevWeekBgtHl1,PrevWeekSalesHl1,PrevWeekSalesHl2,PrevWeekSalesDiff
10783,6515,6515,6515,0,11,5,2.792857,2.05,1.414286,False,2347.918956,3.577778,3067.973863,25.030124,19.2,13.7,5.5
17863,1096,1096,1096,0,30,5,23.871429,1.485714,9.15,False,0.0,0.0,0.0,0.0,3.24,4.32,-1.08
43989,13638,13638,13638,1,50,5,2.767033,0.476923,0.665934,True,0.0,125.566506,5.524795,183.38848,158.6376,140.8968,17.7408
10311,6515,6515,6515,0,6,5,-1.457143,0.65,1.228571,False,1184.258242,4.111111,3606.435097,25.091435,19.7,17.2,2.5
6025,13438,13438,13438,0,15,4,9.614286,1.035714,3.835714,False,0.428571,183.844921,0.0,440.2233,320.5224,556.38,-235.8576


In [145]:
import numpy as np
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score, explained_variance_score, max_error, mean_absolute_percentage_error

from sklearn import ensemble, gaussian_process, isotonic, kernel_ridge, linear_model, neighbors, neural_network, svm, tree
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.experimental import enable_hist_gradient_boosting

# Create linear regression object

#NOT model = ensemble.AdaBoostRegressor()
#model = ensemble.BaggingRegressor() # ON-TRADE: ,OFF-TRADE: 0.79 {'base_estimator': DecisionTreeRegressor(random_state=0), 'bootstrap': True, 'bootstrap_features': False, 'n_estimators': 200, 'oob_score': False, 'verbose': 1, 'warm_start': True}

model = ensemble.ExtraTreesRegressor() #0.7962

# model = ensemble.GradientBoostingRegressor()

#model = ensemble.RandomForestRegressor(n_estimators=1000, n_jobs=6) # ON-TRADE: ,OFF-TRADE: 0.7933 {'bootstrap': True, 'criterion': 'mae', 'oob_score': True, 'warm_start': False}

#NOT model = ensemble.StackingRegressor(estimators=[('lgbm', lgb.LGBMRegressor()),('hgb', ensemble.HistGradientBoostingRegressor())],final_estimator=ensemble.BaggingRegressor(), n_jobs=4, passthrough=False) 
#model = ensemble.VotingRegressor([('lgbm', lgb.LGBMRegressor(n_estimators=5000, learning_rate=0.005)), ('rf', ensemble.RandomForestRegressor(n_estimators=1000))]) # ON-TRADE: ,OFF-TRADE: 0.8143

#model = ensemble.HistGradientBoostingRegressor(early_stopping=False) # ON-TRADE: ,OFF-TRADE: 0.7937 {'learning_rate': 0.01, 'loss': 'least_squares', 'max_iter': 1000, 'max_leaf_nodes': 70}

#NOT model = gaussian_process.GaussianProcessRegressor()
#NOT model = isotonic.IsotonicRegression()

#NOT model = kernel_ridge.KernelRidge(alpha=1.0)# ON-TRADE: ,OFF-TRADE: 0.7827

# #option CV
#SLOW model = linear_model.LogisticRegression(max_iter=100)
#model = linear_model.LinearRegression()# ON-TRADE: ,OFF-TRADE: 0.7875 {'copy_X': False, 'fit_intercept': True, 'normalize': False, 'positive': False}
#model = linear_model.Ridge(max_iter=1000)# ON-TRADE: ,OFF-TRADE: 0.7876 {'alpha': 0.005, 'copy_X': False, 'fit_intercept': True, 'normalize': True, 'solver': 'sparse_cg'}
#model = linear_model.SGDRegressor(max_iter=1500, early_stopping=False) # ON-TRADE: ,OFF-TRADE:  0.7819 {'alpha': 0.0001, 'average': True, 'epsilon': 0.1, 'eta0': 0.01, 'fit_intercept': True, 'l1_ratio': 0.15, 'learning_rate': 'invscaling', 'loss': 'huber', 'penalty': 'l2', 'power_t': 0.1, 'shuffle': True, 'verbose': False, 'warm_start': True}

#model = linear_model.ElasticNet(max_iter=2000)# ON-TRADE: ,OFF-TRADE: 0.7875 {'alpha': 0.001, 'copy_X': False, 'fit_intercept': True, 'l1_ratio': 1, 'normalize': True, 'positive': False, 'precompute': False, 'selection': 'random', 'warm_start': True}
#model = linear_model.Lars(n_nonzero_coefs=1000) # ON-TRADE: , OFF-TRADE: 0.7875 {'copy_X': True, 'fit_intercept': True, 'fit_path': True, 'jitter': False, 'normalize': False, 'precompute': False, 'verbose': True}
#model = linear_model.LassoLars(max_iter=500) # ON-TRADE: , OFF-TRADE: 0.7875 {'alpha': 0.001, 'copy_X': True, 'fit_intercept': True, 'fit_path': True, 'jitter': False, 'normalize': True, 'positive': False, 'precompute': True, 'verbose': True}
#model = linear_model.OrthogonalMatchingPursuit() # ON-TRADE: , OFF-TRADE: 0.7035 {'fit_intercept': True, 'normalize': True}
#model = linear_model.ARDRegression(n_iter=500) # ON-TRADE: , OFF-TRADE: 0.7875 {'compute_score': True, 'copy_X': True, 'fit_intercept': True, 'normalize': False, 'verbose': True}
#model = linear_model.BayesianRidge(n_iter=500) # ON-TRADE: , OFF-TRADE: 0.7875 {'compute_score': True, 'copy_X': True, 'fit_intercept': True, 'normalize': True, 'verbose': True}



#model = linear_model.HuberRegressor(max_iter=500) # ON-TRADE: , OFF-TRADE: 0.7805 {'epsilon': 1.6, 'fit_intercept': True, 'warm_start': True}
#model = linear_model.RANSACRegressor(max_trials=500) #O N-TRADE: , OFF-TRADE: 0.7225
#model = linear_model.TheilSenRegressor(max_iter=500, n_jobs=-1) # ON-TRADE: , OFF-TRADE: 0.7582 {'copy_X': False, 'fit_intercept': True, 'verbose': True}

#NOT model = linear_model.PoissonRegressor(max_iter=500)
# model = linear_model.TweedieRegressor(max_iter=500) # ON-TRADE: , OFF-TRADE: 0.7871 {'alpha': 0.05, 'fit_intercept': False, 'link': 'auto', 'power': 0, 'warm_start': True}
#NOT model = linear_model.GammaRegressor(max_iter=500) # ON-TRADE: ,OFF-TRADE:
#NOT model = linear_model.PassiveAggressiveRegressor(random_state=0, fit_intercept=True) # ON-TRADE: , OFF-TRADE: 0.6938

# model = neighbors.KNeighborsRegressor(n_neighbors=7, weights='uniform', leaf_size=30, n_jobs=-1) # ON-TRADE: , OFF-TRADE: 0.7941
#NOT model = neighbors.RadiusNeighborsRegressor(radius=5.0, weights='distance')

#NOT model = svm.LinearSVR() # ON-TRADE: , OFF-TRADE: 0.7049



#model = neural_network.MLPRegressor(random_state=1, hidden_layer_sizes=60, max_iter=500, early_stopping=False) # ON-TRADE: , OFF-TRADE: 0.8059
#{'activation': 'relu', 'learning_rate': 'constant', 'nesterovs_momentum': True, 'shuffle': False, 'solver': 'adam', 'verbose': True, 'warm_start': True}

#model = lgb.LGBMRegressor(n_estimators=5000, learning_rate=0.005) # ON-TRADE: , OFF-TRADE: 0.8140


param_grid = dict(criterion=['mse','mae'])
#alpha=[1, 0.5, 0.1, 0.05, 0.001, 0.0005, 0.0001],fit_intercept=[True, False], verbose=[True, False], normalize=[True, False], precompute=[True, False], copy_X=[True, False], fit_path=[True, False], positive=[True, False], jitter=[True, False]
clf = GridSearchCV(model, param_grid, n_jobs=1) #, random_state=0
search = clf.fit(X_train, y_train.values.ravel())
#print(search.cv_results_)
print(search.best_score_)
print(search.best_params_)



# #BEST 
# #ON-TRADE: 0.9745, 5636.8731
# #OFF-TRADE: 0.8045, 14567.9493, without sku: (0.7934, 14149.3662)
# #model = ensemble.HistGradientBoostingRegressor(random_state=1, loss='least_squares', learning_rate=0.05, max_iter=350, max_leaf_nodes=70, early_stopping=False)

# # Train the model using the training sets
# model.fit(X_train, y_train.values.ravel())

# # Make predictions using the testing set
# y_pred = model.predict(X_test)
# y_pred[y_pred < 0] = 0

# # # The mean squared error
# # print('Mean squared error: %.4f'% mean_squared_error(y_test, y_pred))

# print(DATASET_NAME)
# # The coefficient of determination: 1 is perfect prediction
# print('Coefficient of determination: %.4f'% r2_score(y_test, y_pred))

# # #Best possible score is 1.0, lower values are worse.
# # print('Explained variance regression: %.4f'% explained_variance_score(y_test, y_pred))

# #max_error metric calculates the maximum residual error.
# print('Maximum residual error: %.4f'% max_error(y_test, y_pred))

# # #Mean absolute percentage error regression loss.
# # print('Mean absolute percentage error regression loss: %.4f'% mean_absolute_percentage_error(y_test, y_pred))

# print('Old Coefficient of determination: %.4f'% r2_score(y, y_oldPred))
# print('Old Maximum residual error: %.4f'% max_error(y, y_oldPred))

# # print(f'The number of iterations as selected by early stopping: {model.n_iter_}')
# # print(f'The scores at each iteration on the training data: {model.train_score_}')
# # print(f'The scores at each iteration on the held-out validation data: {model.validation_score_}')
# # print(f'Boolean mask for the categorical features: {model.is_categorical_}')



0.7962158624414701
{}


## Model analysis

In [146]:
# from skater.core.explanations import Interpretation
# from skater.model import InMemoryModel

# feature_names = X_train.columns.tolist()
# X_train_numpy = X_train[:500].to_numpy()

# # wrap our base model with InMemoryModel instance
# annotated_model = InMemoryModel(
#     model.predict, 
#     examples = X_train_numpy, 
#     model_type = 'regressor'
# )

# interpreter = Interpretation(X_train_numpy, feature_names=feature_names)


In [147]:
#interpreter.feature_importance.plot_feature_importance(annotated_model, progressbar=False)

In [148]:
# interpreter.partial_dependence.plot_partial_dependence(
#     feature_names, annotated_model, grid_resolution=20, progressbar=False
# )

In [149]:
# from skater.core.local_interpretation.lime.lime_tabular import LimeTabularExplainer

# # create an explainer
# explainer = LimeTabularExplainer(X_train_numpy, feature_names=feature_names, mode="regression")

# # explain something
# explanation = explainer.explain_instance(X_train_numpy[5], annotated_model)

# # show the explanation
# explanation.show_in_notebook()

In [150]:
# import shap
# #shap.initjs()

# X100 = shap.utils.sample(X, 100)

# explainer = shap.Explainer(model)
# shap_values = explainer(X100)

# shap.plots.waterfall(shap_values[0])

In [151]:
# shap.plots.force(shap_values[0])

In [152]:
# shap.plots.force(shap_values)

In [153]:
# shap.plots.scatter(shap_values, color=shap_values)

In [154]:
# shap.plots.bar(shap_values)

In [155]:
# shap.plots.beeswarm(shap_values)

## Save Result

In [156]:
result = X_test.copy(deep=False)
result["RealSalesHl"] = y_test.copy(deep=False)
result["PredictSalesHl"] = y_pred

dir_path = os.path.dirname(RESULT_CSV_PATH)
if (not os.path.isdir(dir_path)):
    os.mkdir(dir_path)
 
result.to_csv(RESULT_CSV_PATH, index=False)
result.to_excel(RESULT_EXCEL_PATH, index=False)

NameError: name 'y_pred' is not defined

## Save model

In [None]:
import joblib

dir_path = os.path.dirname(MODEL_PATH)
if (not os.path.isdir(dir_path)):
    os.mkdir(dir_path)

joblib.dump(model, MODEL_PATH, compress=True)

## Use model

In [None]:
# import joblib
# import os
# import numpy as np

# file = open(MODEL_PATH, 'rb')

# model = joblib.load(file)

# SkuShort = 2605
# Week = 23
# NumberWorkdays = 5
# AvgTemp = 15.892857142857142
# AvgRain = 3.5000000000000004
# AvgSun = 6.735714285714286
# IsLockdown = 0
# PdtHl = -1.0
# PrevWeekPdtHl1 = -1.0
# BgtHl = 6665
# PrevWeekBgtHl1 = 6665.949490847161
# PrevWeekSalesHl1 = 5020
# PrevWeekSalesHl2 = 5038
# SalesHl = 5386.5

# x = np.array([[SkuShort, Week,NumberWorkdays, AvgTemp, AvgRain, AvgSun, IsLockdown, PdtHl, PrevWeekPdtHl1, BgtHl, PrevWeekBgtHl1, PrevWeekSalesHl1, PrevWeekSalesHl2]])

# y_pred = model.predict(x)
# result = y_pred[0]

# def get_percentage_diff(previous, current):
#     return 1 - (abs(previous - current)/max(previous, current))

# print('Forecast sales: %.4f hl'% result)
# print('Coefficient of determination: %.4f'% get_percentage_diff(SalesHl, result))

#full with sku = 0.7678
#full = 0.7363