In [6]:
import os, random, time
import xgboost
import datetime
import pygam
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from bayes_opt import BayesianOptimization
from fbprophet import Prophet
from sklearn.linear_model import LinearRegression as LR
from sklearn.metrics import r2_score

In [7]:
y_inven = pd.read_csv('./data/Y_Inven_210429.csv')
x_sales = pd.read_csv('./data/X_Sales_210429.csv')
x_product = pd.read_csv('./data/X_Product_210429.csv')

In [8]:
def dataset(x_dta, case=1, is_train=True):
    x_prev_col = [s for s in x_dta.columns.values if 'Prev' in s]
    x_post_col = [s for s in x_dta.columns.values if 'Post' in s]
    x_var_col = [s for s in x_dta.columns.values if 'Var' in s]

    X_prev_ = x_dta[x_prev_col]
    X_post_ = x_dta[x_post_col]
    X_var_ = x_dta[x_var_col]
    if case ==1 :
        Y_ =  x_dta['Sales']
    else :
        Y_ =  x_dta['Products']

    if is_train :
        X_prev_train = X_prev_[x_dta['YEAR']==2020]
        X_post_train = X_post_[x_dta['YEAR']==2020]
        X_var_train = X_var_[x_dta['YEAR']==2020]
        Y_train = Y_[x_dta['YEAR']==2020]
    else:
        X_prev_train = X_prev_[x_dta['YEAR']==2021]
        X_post_train = X_post_[x_dta['YEAR']==2021]
        X_var_train = X_var_[x_dta['YEAR']==2021]
        Y_train = Y_[x_dta['YEAR']==2021]
    
    return Y_train, X_prev_train, X_post_train, X_var_train    

def run_model_(model_, trX_, trY_, teX_, teY_):
    model_.fit(trX_, trY_)
    hat_prev_ = model_.predict(trX_)
    hat_  = model_.predict(teX_)
    Y_hat_ = np.concatenate((hat_prev_, hat_))
    Y_     = np.concatenate((trY_, teY_))
    real_ = np.mean(1- np.abs(trY_ - hat_prev_) / np.abs(trY_)) * 100
    fcst_ = np.mean(1- np.abs(teY_ - hat_     ) / np.abs(teY_)) * 100    
    return real_, fcst_, Y_hat_, Y_, hat_prev_, hat_

In [9]:
Y_sales_train, sales_prev_X_train, sales_post_X_train, sales_var_X_train = dataset(x_sales, 1)
Y_sales_test , sales_prev_X_test , sales_post_X_test , sales_var_X_test  = dataset(x_sales, 1, False)
Y_products_train, products_prev_X_train, products_post_X_train, products_var_X_train = dataset(x_product, 2)
Y_products_test , products_prev_X_test , products_post_X_test , products_var_X_test  = dataset(x_product, 2, False)
sales_var_col = [s for s in sales_var_X_train.columns.values if 'Var' in s]
product_var_col = [s for s in products_var_X_train.columns.values if 'Var' in s]

In [10]:
param_bound = {'alpha' : (0.9,0.99) , 'm_n_esitmator' : (10, 100), 'm_lr' : (0.01, 0.5), 'm_subsample' : (0.3, 0.9), 'm_max_depth' : (2,10), 'col_k' : (1,8)}
def sales_opt(alpha, m_n_esitmator, m_lr, m_subsample, m_max_depth, col_k):
    weight_mat = list(map(lambda x : alpha**x if x > 0 else 1, range(0,12)))
    sales_var_X_train1 = np.multiply(sales_var_X_train, np.tile([weight_mat], sales_var_X_train.shape[0]).reshape(sales_var_X_train.shape[0], -1)).copy()
    sales_var_X_test1 = np.multiply(sales_var_X_test, np.tile([weight_mat], sales_var_X_test.shape[0]).reshape(sales_var_X_test.shape[0], -1)).copy()
    model_sales=xgboost.XGBRegressor(n_estimators=round(m_n_esitmator), learning_rate=m_lr, gamma=0, subsample=m_subsample, colsample_bytree=1, max_depth=round(m_max_depth))#, tree_method='gpu_hist', gpu_id=0)
    
    real_sale, fcst_sale, sales_Y_, sales_Y_hat_, sales_prev, sales_hat = run_model_(model_sales, 
                                                                                     sales_var_X_train1[sales_var_col[:round(col_k)]], Y_sales_train,
                                                                                     sales_var_X_test1[sales_var_col[:round(col_k)]], Y_sales_test)
    return fcst_sale
    #print(" Sales Mean Average => Train :  %f5 / Test : %f5"%(real_sale, fcst_sale))   
sales_optimizer = BayesianOptimization(f=sales_opt, pbounds=param_bound, verbose=2, random_state=1)
sales_optimizer.maximize(init_points=10, n_iter=300)

|   iter    |  target   |   alpha   |   col_k   |   m_lr    | m_max_... | m_n_es... | m_subs... |
-------------------------------------------------------------------------------------------------
| [0m 1       [0m | [0m 20.4    [0m | [0m 0.9375  [0m | [0m 6.042   [0m | [0m 0.01006 [0m | [0m 4.419   [0m | [0m 23.21   [0m | [0m 0.3554  [0m |
| [95m 2       [0m | [95m 85.5    [0m | [95m 0.9168  [0m | [95m 3.419   [0m | [95m 0.2044  [0m | [95m 6.311   [0m | [95m 47.73   [0m | [95m 0.7111  [0m |
| [0m 3       [0m | [0m 71.46   [0m | [0m 0.9184  [0m | [0m 7.147   [0m | [0m 0.02342 [0m | [0m 7.364   [0m | [0m 47.56   [0m | [0m 0.6352  [0m |
| [95m 4       [0m | [95m 88.05   [0m | [95m 0.9126  [0m | [95m 2.387   [0m | [95m 0.4024  [0m | [95m 9.746   [0m | [95m 38.21   [0m | [95m 0.7154  [0m |
| [0m 5       [0m | [0m 76.2    [0m | [0m 0.9789  [0m | [0m 7.262   [0m | [0m 0.05167 [0m | [0m 2.312   [0m | [0m 25.28   [0m 

In [11]:
param_bound = {'alpha' : (0.9,0.99) , 'm_n_esitmator' : (10, 100), 'm_lr' : (0.01, 0.5), 'm_subsample' : (0.3, 0.9), 'm_max_depth' : (2,10), 'col_k' : (1,8)}
def product_opt(alpha, m_n_esitmator, m_lr, m_subsample, m_max_depth, col_k):
    weight_mat = list(map(lambda x : alpha**x if x > 0 else 1, range(0,12)))
    
    product_var_X_train1 = np.multiply(products_var_X_train, np.tile([weight_mat], products_var_X_train.shape[0]).reshape(products_var_X_train.shape[0], -1)).copy()
    product_var_X_test1 = np.multiply(products_var_X_test, np.tile([weight_mat], products_var_X_test.shape[0]).reshape(products_var_X_test.shape[0], -1)).copy()
    model_product=xgboost.XGBRegressor(n_estimators=round(m_n_esitmator), learning_rate=m_lr, gamma=0, subsample=m_subsample, colsample_bytree=1, max_depth=round(m_max_depth))#, tree_method='gpu_hist', gpu_id=0)
    
    real_product, fcst_product, product_Y_, product_Y_hat_, product_prev, product_hat = run_model_(model_product, 
                                                                                     product_var_X_train1[product_var_col[:round(col_k)]], Y_products_train,
                                                                                     product_var_X_test1[product_var_col[:round(col_k)]], Y_products_test)
    return fcst_product
    #print(" Sales Mean Average => Train :  %f5 / Test : %f5"%(real_sale, fcst_sale))   
product_optimizer = BayesianOptimization(f=product_opt, pbounds=param_bound, verbose=2, random_state=1)
product_optimizer.maximize(init_points=10, n_iter=300)

|   iter    |  target   |   alpha   |   col_k   |   m_lr    | m_max_... | m_n_es... | m_subs... |
-------------------------------------------------------------------------------------------------
| [0m 1       [0m | [0m 18.32   [0m | [0m 0.9375  [0m | [0m 6.042   [0m | [0m 0.01006 [0m | [0m 4.419   [0m | [0m 23.21   [0m | [0m 0.3554  [0m |
| [95m 2       [0m | [95m 85.27   [0m | [95m 0.9168  [0m | [95m 3.419   [0m | [95m 0.2044  [0m | [95m 6.311   [0m | [95m 47.73   [0m | [95m 0.7111  [0m |
| [0m 3       [0m | [0m 65.12   [0m | [0m 0.9184  [0m | [0m 7.147   [0m | [0m 0.02342 [0m | [0m 7.364   [0m | [0m 47.56   [0m | [0m 0.6352  [0m |
| [0m 4       [0m | [0m 84.9    [0m | [0m 0.9126  [0m | [0m 2.387   [0m | [0m 0.4024  [0m | [0m 9.746   [0m | [0m 38.21   [0m | [0m 0.7154  [0m |
| [0m 5       [0m | [0m 71.25   [0m | [0m 0.9789  [0m | [0m 7.262   [0m | [0m 0.05167 [0m | [0m 2.312   [0m | [0m 25.28   [0m | [0m 0

In [12]:
##        iter      |  target   |   alpha   |   col_k   |   m_lr    | m_max_... | m_n_es... | m_subs... |
## Sales   55       |  90.38    |  0.9526   |  4.938    |  0.06102  |  9.038    |  68.36    |  0.3951   |
## Product 24       |  89.73    |  0.9778   |  2.284    |  0.1653   |  2.388    |  90.34    |  0.4241   |
sales_weight = list(map(lambda x : 0.9526**x if x > 0 else 1, range(0,12)))
product_weight = list(map(lambda x : 0.9778**x if x > 0 else 1, range(0,12)))
sales_var_X_train1 = np.multiply(sales_var_X_train, np.tile([sales_weight], sales_var_X_train.shape[0]).reshape(sales_var_X_train.shape[0], -1)).copy()
sales_var_X_test1 = np.multiply(sales_var_X_test, np.tile([sales_weight], sales_var_X_test.shape[0]).reshape(sales_var_X_test.shape[0], -1)).copy()
products_var_X_train1 = np.multiply(products_var_X_train, np.tile([product_weight], products_var_X_train.shape[0]).reshape(products_var_X_train.shape[0], -1)).copy()
products_var_X_test1 = np.multiply(products_var_X_test, np.tile([product_weight], products_var_X_test.shape[0]).reshape(products_var_X_test.shape[0], -1)).copy()

model_sales=xgboost.XGBRegressor(n_estimators=68, learning_rate=0.06102, gamma=0, subsample=0.3951, colsample_bytree=1, max_depth=9)#, tree_method='gpu_hist', gpu_id=0)
model_products=xgboost.XGBRegressor(n_estimators=50, learning_rate=0.1653, gamma=0, subsample=0.4241, colsample_bytree=1, max_depth=2)#, tree_method='gpu_hist', gpu_id=0)
sales_var_col = [s for s in sales_var_X_train.columns.values if 'Var' in s]
product_var_col = [s for s in products_var_X_train.columns.values if 'Var' in s]

real_sale, fcst_sale, sales_Y_, sales_Y_hat_, sales_prev, sales_hat = run_model_(model_sales, 
                                                                                 sales_var_X_train1[sales_var_col[:5]], Y_sales_train,
                                                                                 sales_var_X_test1[sales_var_col[:5]], Y_sales_test)
real_product, fcst_product, products_Y_, products_Y_hat_, products_prev, products_hat = run_model_(model_products, 
                                                                                                   products_var_X_train1[product_var_col[:2]], Y_products_train,
                                                                                                   products_var_X_test1[product_var_col[:2]], Y_products_test)

print(" Sales Mean Average => Train :  %f5 / Test : %f5"%(real_sale, fcst_sale))
print(" Product Mean Average => Train :  %f5 / Test : %f5"%(real_product, fcst_product))

 Sales Mean Average => Train :  84.5762395 / Test : 89.4099925
 Product Mean Average => Train :  -214.5295785 / Test : 89.8037215


In [13]:
y_inven_train = y_inven[y_inven['YEAR']==2020]
y_inven_test = y_inven[y_inven['YEAR']==2021]

products_prev_x_test_hat = pd.concat([products_prev_X_test.iloc[:12].reset_index(drop=True), pd.DataFrame(products_hat[:12])], 1).copy()
for i in range(products_prev_x_test_hat.shape[0]):
    tmp_prod_hat = products_prev_x_test_hat.iloc[i, -1]
    for j in range(0, 8):
        if i+j+1 < products_prev_x_test_hat.shape[0]:
            products_prev_x_test_hat.iloc[i+j+1, j ] = tmp_prod_hat
            
            
dta_train = pd.concat([y_inven_train.iloc[1:, :], y_inven_test.iloc[:1, :]]).copy().reset_index(drop=True)
dta_train['Sales'] = Y_sales_train
dta_train = pd.concat([dta_train, products_prev_X_train], axis=1).copy()
dta_train['Product'] = Y_products_train
#dta_train = pd.concat([dta_train, products_prev_X_train], axis=1)

dta_test = y_inven_test.iloc[1:,:].copy()
dta_test['Sales'] = sales_hat[:12]
dta_test = pd.concat([dta_test.reset_index(drop=True), products_prev_x_test_hat.iloc[:,:-1]], axis=1).copy()
dta_test['Product'] = products_hat[:12]

In [14]:
param_bound = {'m_n_estimator' : (10, 100), 'm_lr' : (0.01, 0.5), 'm_subsample' : (0.3, 0.9), 
               'm_max_depth' : (2,10), 'col_k1' : (0,8), 'col_k2' : (0,8)}
def inven_opt(m_n_estimator, m_lr, m_subsample, m_max_depth, col_k1, col_k2):    
    
    i_col = round(col_k1)
    p_col = round(col_k2)
    p_m_n_estimator = round(m_n_estimator)
    p_m_lr = round(m_lr, 2)
    p_m_sub = round(m_subsample, 2)
    p_m_max_dep = round(m_max_depth)
    
    prev_inven_col   = [s for s in dta_test.columns.values if 'prev_INVENTORY' in s]
    prev_product_col = [s for s in dta_test.columns.values if 'Prev_Product' in s]
    train_col_name = list(['Sales', 'Product'])+prev_inven_col[:i_col]+prev_product_col[:p_col]
    
    model_inven=xgboost.XGBRegressor(n_estimators=p_m_n_estimator, learning_rate=p_m_lr, gamma=0, subsample=p_m_sub, 
                                     colsample_bytree=1, max_depth=p_m_max_dep)

    model_inven.fit(dta_train[train_col_name], dta_train['INVENTORY'])
    inven_prev = model_inven.predict(dta_train[train_col_name])

    inven_hat = [] 
    for i in range(0, dta_test.shape[0]):
        test_default_x = dta_test[['Sales', 'Product']].iloc[i:(i+1),:]
        test_prev_inven_x = dta_test[prev_inven_col[:i_col]].iloc[i:(i+1),:]
        test_prev_product_x = dta_test[prev_product_col[:p_col]].iloc[i:(i+1),:]

        if i > 0 :        
            for j in range(0, i_col):
                if j < len(inven_hat):
                    test_prev_inven_x.iloc[:1,j] = inven_hat[(len(inven_hat)-j-1)]

        test_x = pd.concat([test_default_x, test_prev_inven_x, test_prev_product_x], 1)                              
        inven_hat.append(model_inven.predict(test_x)[0])

    Y_inven_hat = np.concatenate((inven_prev, inven_hat))
    Y_inven = np.concatenate((dta_train['INVENTORY'], dta_test['INVENTORY']))

    real_avg = np.mean(1- np.abs(dta_train['INVENTORY'] - inven_prev) / dta_train['INVENTORY']) * 100
    fcst_avg = np.mean(1- np.abs(dta_test['INVENTORY'] - inven_hat) / dta_test['INVENTORY']) * 100 
    
    return fcst_avg

inven_optimizer = BayesianOptimization(f=inven_opt, pbounds=param_bound, verbose=2, random_state=1)
inven_optimizer.maximize(init_points=10, n_iter=300)

|   iter    |  target   |  col_k1   |  col_k2   |   m_lr    | m_max_... | m_n_es... | m_subs... |
-------------------------------------------------------------------------------------------------
| [0m 1       [0m | [0m 15.95   [0m | [0m 3.336   [0m | [0m 5.763   [0m | [0m 0.01006 [0m | [0m 4.419   [0m | [0m 23.21   [0m | [0m 0.3554  [0m |
| [95m 2       [0m | [95m 77.4    [0m | [95m 1.49    [0m | [95m 2.764   [0m | [95m 0.2044  [0m | [95m 6.311   [0m | [95m 47.73   [0m | [95m 0.7111  [0m |
| [0m 3       [0m | [0m 47.98   [0m | [0m 1.636   [0m | [0m 7.025   [0m | [0m 0.02342 [0m | [0m 7.364   [0m | [0m 47.56   [0m | [0m 0.6352  [0m |
| [0m 4       [0m | [0m 71.79   [0m | [0m 1.123   [0m | [0m 1.585   [0m | [0m 0.4024  [0m | [0m 9.746   [0m | [0m 38.21   [0m | [0m 0.7154  [0m |
| [0m 5       [0m | [0m 54.89   [0m | [0m 7.011   [0m | [0m 7.157   [0m | [0m 0.05167 [0m | [0m 2.312   [0m | [0m 25.28   [0m | [0m 0