In [10]:
import os, random, time
import xgboost
import datetime
import pygam
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from bayes_opt import BayesianOptimization
#from fbprophet import Prophet

from sklearn.linear_model import LinearRegression as LR
from sklearn.kernel_ridge import KernelRidge as KR
from sklearn.linear_model import Ridge, ElasticNet, BayesianRidge
from sklearn.metrics import r2_score
from sklearn.gaussian_process import GaussianProcessRegressor as gpr
from sklearn.gaussian_process.kernels import WhiteKernel, ConstantKernel as C, RBF
import warnings

In [2]:
y_inven = pd.read_csv('./E61D_Inven.csv')
x_sales = pd.read_csv('./E61D_Sale.csv')
x_product = pd.read_csv('./E61D_Product.csv')
d_set = pd.merge(pd.merge(y_inven, x_sales, how='left', on='Week'), x_product, how='left', on='Week')
d_set = d_set.drop(columns=['5xxx', '3xxC', '3xxT'])
d_set[d_set < -1000] = 0
#d_set_log = d_set.copy()
#d_set_log[d_set < 0] = 0
#d_set.to_csv('data_0506.csv', index=False)

rm_week = [201952, 202001, 202052, 202053, 202101]
rm_index = [i for i,s in enumerate(d_set['Week']) if s not in rm_week]
d_set1 = d_set.loc[rm_index]
d_set1 = d_set1.reset_index(drop=True)
#print(d_set.shape)
#d_set1.to_csv('data_0506.csv', index=False)

In [6]:
inven_col = [s for s in d_set1.columns if 'Prev_Inven' in s]
sale_col = [s for s in d_set1.columns if 'Sale' in s]
product_col = [s for s in d_set1.columns if 'Product' in s]
y_col = [s for s in d_set1.columns if 'Inven' == s]

In [8]:
d_set1[product_col].corr(method='pearson').iloc[0,:]

Product            1.000000
Var_Product_1      0.444154
Var_Product_2      0.429571
Var_Product_3      0.405463
Var_Product_4      0.367546
Var_Product_5      0.302000
Var_Product_6      0.227115
Var_Product_7      0.102748
Var_Product_8      0.152717
Var_Product_9      0.111518
Var_Product_10     0.073880
Var_Product_11     0.054266
Var_Product_12     0.104223
Post_Product_1     0.379531
Post_Product_2     0.290136
Post_Product_3     0.378590
Post_Product_4     0.306711
Post_Product_5     0.114843
Post_Product_6     0.046624
Post_Product_7    -0.050972
Post_Product_8    -0.072394
Post_Product_9    -0.120416
Post_Product_10   -0.121170
Post_Product_11   -0.172575
Post_Product_12   -0.124365
Prev_Product_1     0.751463
Prev_Product_2     0.543695
Prev_Product_3     0.470680
Prev_Product_4     0.426746
Prev_Product_5     0.391741
Prev_Product_6     0.383066
Prev_Product_7     0.360388
Prev_Product_8     0.341115
Prev_Product_9     0.306379
Prev_Product_10    0.228994
Prev_Product_11    0

In [3]:
def run_model_1(m_, x_dta, wt_mat, t_col_k, s_col_k, p_col_k, tr_s_day, tr_e_day, te_s_day=None, te_e_day=None):    
    tr_time = np.array([tr_s_day, tr_e_day])
    te_time = np.array([te_s_day, te_e_day])
    s_ind, e_ind = np.where(tr_time[0] == x_dta['Week'])[0][0], np.where(tr_time[1] == x_dta['Week'])[0][0]
    te_s_ind, te_e_ind = np.where(te_time[0] == x_dta['Week'])[0][0], np.where(te_time[1] == x_dta['Week'])[0][0]

    target_col = [s for s in x_dta.columns.values if 'Inven' == s]
    week_col = [s for s in x_dta.columns.values if 'Week' in s]

    prev_target_col = [s for s in x_dta.columns.values if 'Prev_Inven' in s]
    prev_sale_col = [s for s in x_dta.columns.values if 'Prev_Sale' in s]
    prev_product_col = [s for s in x_dta.columns.values if 'Prev_Product' in s]
    sale_col = [s for s in x_dta.columns.values if 'Sale' == s]
    product_col = [s for s in x_dta.columns.values if 'Product' == s]

    tmp_x_set1, tmp_x_set2, tmp_x_set3 = x_dta[prev_target_col], x_dta[prev_sale_col], x_dta[prev_product_col]    
    tmp_x_set1 = np.multiply(tmp_x_set1, np.tile([wt_mat[:tmp_x_set1.shape[1]]], tmp_x_set1.shape[0]).reshape(tmp_x_set1.shape[0], -1)).copy()
    tmp_x_set2 = np.multiply(tmp_x_set2, np.tile([wt_mat], tmp_x_set2.shape[0]).reshape(tmp_x_set2.shape[0], -1)).copy()
    tmp_x_set3 = np.multiply(tmp_x_set3, np.tile([wt_mat], tmp_x_set3.shape[0]).reshape(tmp_x_set3.shape[0], -1)).copy()

    tmp_x_set1_re = tmp_x_set1.iloc[:,:t_col_k]
    tmp_x_set2_re = tmp_x_set2.iloc[:,:s_col_k]
    tmp_x_set3_re = tmp_x_set3.iloc[:,:p_col_k]
    x_dta_re = pd.concat([x_dta[list(week_col+target_col+sale_col+product_col)], tmp_x_set1_re, tmp_x_set2_re, tmp_x_set3_re], axis=1)

    train_set = x_dta_re.iloc[s_ind:e_ind, :]
    test_set = x_dta_re.iloc[te_s_ind:te_e_ind, :]#.copy()

    tr_x_set = train_set[list(prev_target_col[:t_col_k]+sale_col+prev_sale_col[:s_col_k]+product_col+prev_product_col[:p_col_k])]
    tr_y_set = np.array(train_set[target_col])
    te_x_set = test_set[list(prev_target_col[:t_col_k]+sale_col+prev_sale_col[:s_col_k]+product_col+prev_product_col[:p_col_k])]
    te_y_set = np.array(test_set[target_col])

    m_.fit(tr_x_set, tr_y_set)
    hat_prev_ = m_.predict(tr_x_set)
    
    if len(tr_y_set) > 0 :
        tmp_y_id_ = np.where(tr_y_set!=0)[0]
        real_ = np.mean(1- np.abs(tr_y_set[tmp_y_id_] - hat_prev_[tmp_y_id_]) / np.abs(tr_y_set[tmp_y_id_])) * 100
    else:
        real_ = np.mean(1- np.abs(tr_y_set - hat_prev_) / np.abs(tr_y_set)) * 100     

    if te_s_day is None :
        return real_, tr_y_set, hat_prev_
    else:
        hat_  = m_.predict(te_x_set)
        Y_hat_ = np.concatenate((hat_prev_, hat_))
        Y_     = np.concatenate((tr_y_set, te_y_set))
        if len(te_y_set) > 0:
            tmp_y_id_ = np.where(te_y_set!=0)[0]
            fcst_ = np.mean(1- np.abs(te_y_set[tmp_y_id_] - hat_[tmp_y_id_]) / np.abs(te_y_set[tmp_y_id_])) * 100
        else:
            fcst_ = np.mean(1- np.abs(te_y_set - hat_     ) / np.abs(te_y_set)) * 100    
        return real_, fcst_, Y_, Y_hat_, hat_prev_, hat_

In [24]:
def run_model_2(m_, x_dta, target_key, wt_mat, tr_s_day, tr_e_day, te_s_day=None, te_e_day=None, col_k=None, col_k1=None):        
    tr_time = np.array([tr_s_day, tr_e_day])
    te_time = np.array([te_s_day, te_e_day])
    s_ind, e_ind = np.where(tr_time[0] == x_dta['Week'])[0][0], np.where(tr_time[1] == x_dta['Week'])[0][0]
    te_s_ind, te_e_ind = np.where(te_time[0] == x_dta['Week'])[0][0], np.where(te_time[1] == x_dta['Week'])[0][0]
    print(te_s_int, te_e_ind, s_ind, e_ind)
    target_col = [s for s in x_dta.columns.values if target_key == s]    
    tmp_x_col = [s for s in x_dta.columns.values if 'Prev_'+target_key in s]
    tmp_x_col1 = [s for s in x_dta.columns.values if 'Var_'+target_key in s]
    week_col = [s for s in x_dta.columns.values if 'Week' in s]

    tmp_x_set1, tmp_x_set2 = x_dta[tmp_x_col], x_dta[tmp_x_col1]
    tmp_x_set1 = np.multiply(tmp_x_set1, np.tile([wt_mat], tmp_x_set1.shape[0]).reshape(tmp_x_set1.shape[0], -1)).copy()
    tmp_x_set2 = np.multiply(tmp_x_set2, np.tile([wt_mat], tmp_x_set2.shape[0]).reshape(tmp_x_set2.shape[0], -1)).copy()
    if target_key == 'Product':
        tmp_x_set1_re = tmp_x_set1.iloc[:,:col_k]
        tmp_x_set2_re = tmp_x_set2.iloc[:,:col_k1]
        x_dta_re = pd.concat([x_dta[list(week_col+target_col)], tmp_x_set1_re, tmp_x_set2_re], axis=1)
        train_col = list(tmp_x_col[:col_k]+tmp_x_col1[:col_k1])    
    elif target_key == 'Sale':
        tmp_x_set2_re = tmp_x_set2.iloc[:,:col_k1]
        x_dta_re = pd.concat([x_dta[list(week_col+target_col)], tmp_x_set2_re], axis=1)
        train_col = list(tmp_x_col1[:col_k1])


    train_set = x_dta_re.iloc[s_ind:e_ind, :]
    test_set = x_dta_re.iloc[te_s_ind:te_e_ind, :]#.copy()

    tr_x_set = train_set[train_col]
    tr_y_set = np.array(train_set[target_col]).reshape(-1,1)
    te_x_set = test_set[train_col]
    te_y_set = np.array(test_set[target_col]).reshape(-1,1)

    m_.fit(tr_x_set, tr_y_set)
    hat_prev_ = m_.predict(tr_x_set)
    if len(np.shape(hat_prev_)) == 1:
        hat_prev_ = hat_prev_.reshape(-1,1)
    
    if len(tr_y_set) > 0 :
        tmp_y_id_ = np.where(tr_y_set!=0)[0]
        real_ = np.mean(1- np.abs(tr_y_set[tmp_y_id_] - hat_prev_[tmp_y_id_]) / np.abs(tr_y_set[tmp_y_id_])) * 100
    else:
        real_ = np.mean(1- np.abs(tr_y_set - hat_prev_) / np.abs(tr_y_set)) * 100     

    if te_s_day is None :
        return real_, tr_y_set, hat_prev_
    else:
        hat_ = []
        if target_key == 'Product':
            prev_x = list(map(lambda j : x_dta_re[target_key].iloc[te_s_ind-j], range(1, col_k+1)))
            for i in range(te_s_ind, te_e_ind+1):    
                var_x = x_dta_re[tmp_x_col1[:col_k1]].iloc[i]        
                te_x_ = np.array(prev_x+list(var_x)).reshape(1,-1)
                tmp_y_hat_ = m_.predict(te_x_)[0][0] #
                hat_.append(tmp_y_hat_)
                prev_x = (list([tmp_y_hat_]) + prev_x)[:col_k]
        elif target_key == 'Sale':
            tmp_y_hat_ = m_.predict(te_x_set)
            hat_.append(tmp_y_hat_) 
            
        Y_hat_ = np.concatenate((hat_prev_, np.array(hat_).reshape(-1,1))) #hat_))#
        Y_     = np.concatenate((tr_y_set, te_y_set))
        fcst_ = np.mean(np.ones_like(te_y_set) - np.clip(np.abs(te_y_set - hat_[0]) / te_y_set , 0, 1)) * 100
        return real_, fcst_, Y_, Y_hat_, hat_prev_, hat_[0]

In [25]:
warnings.filterwarnings(action='ignore', category=UserWarning)
param_bound = {'alpha' : (0.85,0.99) , 'm_n_estimator' : (10, 100), 'm_lr' : (0.01, 0.4), 'm_subsample' : (0.5, 0.9), 'm_max_depth' : (3,9), 'col_k' : (1,13), 'col_k1' : (1,13)}
def h_opt(alpha, m_n_estimator, m_lr, m_subsample, m_max_depth, col_k, col_k1):
    weight_mat = list(map(lambda x : round(alpha,3)**x if x > 0 else 1, range(0,12)))    
    model_=xgboost.XGBRegressor(n_estimators=round(m_n_estimator), learning_rate=round(m_lr,2), subsample=round(m_subsample,2), max_depth=round(m_max_depth), tree_method='gpu_hist', gpu_id=0)  
                                               
    acc_, acc_hat_, _, _, _, _  = run_model_2(model_, d_set1, 'Sale', weight_mat,201903, 202043, 202044, 202053, None, round(col_k1))
    return acc_hat_    

sale_optimizer = BayesianOptimization(f=h_opt, pbounds=param_bound, verbose=2, random_state=1)
sale_optimizer.maximize(init_points=50, n_iter=500)
#####################################################################################################################
#####    |  target   |   alpha   |   col_k   |  col_k1   |   m_lr    | m_max_... | m_n_es... | m_subs... |
#####    |  71.49    |  0.9099   |  10.69    |  2.458    |  0.1093   |  7.29     |  58.46    |  0.6755   |
#####    |  71.56    |  0.8941   |  12.07    |  7.294    |  0.1848   |  4.874    |  55.11    |  0.7917   |
#####    |  99.38    |  0.99     |  13.0     |  5.387    |  0.4      |  9.0      |  28.58    |  0.5      |  ---> np.log(sale)

|   iter    |  target   |   alpha   |   col_k   |  col_k1   |   m_lr    | m_max_... | m_n_es... | m_subs... |
-------------------------------------------------------------------------------------------------------------


IndexError: index 0 is out of bounds for axis 0 with size 0

In [23]:
warnings.filterwarnings(action='ignore', category=UserWarning)
param_bound = {'alpha' : (0.9,0.99) , 'm_n_estimator' : (10, 100), 'm_lr' : (0.01, 0.4), 'm_subsample' : (0.4, 0.9), 'm_max_depth' : (3,9), 'col_k' : (1,13), 'col_k1' : (1,13)}
def h_opt(alpha, m_n_estimator, m_lr, m_subsample, m_max_depth, col_k, col_k1):
    weight_mat = list(map(lambda x : round(alpha,3)**x if x > 0 else 1, range(0,12)))    #
    model_=xgboost.XGBRegressor(n_estimators=round(m_n_estimator), learning_rate=round(m_lr,3),
                                subsample=round(m_subsample,3), max_depth=round(m_max_depth), tree_method='gpu_hist', gpu_id=0)
    #model_ = KR(m_lr)
    acc_, acc_hat_, _, _, _, _ = run_model_2(model_, d_set1, 'Product', weight_mat, 201903, 202043, 202044, 202051, round(col_k), round(col_k1))  # prev / var : 
    return acc_hat_    

product_optimizer = BayesianOptimization(f=h_opt, pbounds=param_bound, verbose=2, random_state=1)
product_optimizer.maximize(init_points=20, n_iter=50)
#####################################################################################################################
#####    |  target   |   alpha   |   col_k   |  col_k1   |   m_lr    | m_max_... | m_n_es... | m_subs... |
#####    |  89.24    |  0.01     |  1.0      |  2.788    |  0.01     |  5.659    |  80.31    |  0.4      | => weight 1, Prev & Var sale
#####   |  88.67    |  0.99     |  1.0      |  1.0      |  0.01     |  3.0      |  82.61    |  0.9      | => KR
#####   |  89.5     |  0.02763  |  2.278    |  1.572    |  0.9655   |  8.61     |  10.1     |  0.8746   |  => alpah & m_lr

|   iter    |  target   |   alpha   |   col_k   |  col_k1   |   m_lr    | m_max_... | m_n_es... | m_subs... |
-------------------------------------------------------------------------------------------------------------


IndexError: invalid index to scalar variable.