<a href="https://colab.research.google.com/github/lennart194/thesis-code/blob/main/seperatemodeling_gwn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install ddop

In [2]:
import pandas as pd
import numpy as np

from ddop.newsvendor import GaussianWeightedNewsvendor
from ddop.newsvendor import SampleAverageApproximationNewsvendor

from sklearn.model_selection import RandomizedSearchCV

from ddop.metrics import make_scorer
from ddop.metrics import average_costs
from ddop.metrics import prescriptiveness_score

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Loading data-set

*   load the whole ultimative set
*   sort the rows first by item, then by date and set a multi index
**now every items time series can be separated by the index**

In [4]:
data = pd.read_csv('/content/drive/MyDrive/M5/ultimative_set.csv')
data = data.drop(columns=['Unnamed: 0'])
data = data.sort_values(by=['item_id', 'date'])
data = data.set_index(['item_id', 'date'])

## Split in feature and target matrix

In [5]:
X = data.drop(columns=['demand'])
Y = data['demand']

## train_test_split

In [6]:
X_train = X.loc[(slice(None), slice(None, "2015")), :]
X_test = X.loc[(slice(None), slice("2015", '2017')), :]

Y_train = Y.loc[(slice(None), slice(None, "2015"))]
Y_test = Y.loc[(slice(None), slice("2015", "2017"))]

## Definition of the item_ids 


*   will be needed within the for loops later
*   will contain all 25 products

In [7]:
prods = list(range(0,25))

cu = round(X_train['sell_price'].mean(),2)

co_90 = round(((cu-(0.90*cu))/0.90),2)

co_75 = round(((cu-(0.75*cu))/0.75),2)

co_50 = round(((cu-(0.50*cu))/0.50),2)

## Determining some parameters
*   the potential params and their values for randomized search are defined
*   the estimator, the scorer and finally the whole grid is defined

In [9]:
kernel_bandwidth = np.arange(25, 81)
param_random = dict(kernel_bandwidth = kernel_bandwidth)

scorer_avc = make_scorer(average_costs, greater_is_better=False)

# gwn for 90% service level

## randomized search
*   an empty list is created for every type of hyperparameter
*   in a for-loop the randomized search is executed for every item with cu and co of alpha = 0,9
*   the best hyperparams are calculated and added to the corresponding list

In [None]:
best_param_kbwth_90 = []

for prod in prods:
  
  gwn = GaussianWeightedNewsvendor(cu = cu, co = co_90)

  random = RandomizedSearchCV(gwn, param_random, cv=5, scoring = scorer_avc)
  random.fit(X_train.loc[prod], Y_train.loc[prod])
    
  kbwth = random.best_params_.get('kernel_bandwidth')

  best_param_kbwth_90.append(kbwth)

In [12]:
best_param_kbwth_90 = [26, 32, 34, 50, 26, 26, 33, 27, 27, 35, 73, 25, 30, 27, 31, 30, 34, 25, 79, 32, 38, 31, 41, 30, 28]

## Newsvendor-Model
*   the data-driven newsvendor model is executed in a loop for every single item (with optimal hyperparameters)
*   the average_costs for every item are calculated (later the prescritiveness score will be used for better comparability)

In [None]:
result_gwn_90_avc = []
for prod in prods:
  
  knn = GaussianWeightedNewsvendor(cu = cu, co = co_90, kernel_bandwidth = best_param_kbwth_90[prod])
  gwn.fit(X_train.loc[prod], Y_train.loc[prod])
  preds = gwn.predict(X_test.loc[prod])

  avc = round(average_costs(Y_test.loc[prod], preds, cu = cu, co = co_90),2)
  result_gwn_90_avc.append(avc)

In [15]:
GWN_separate_avc_90 = pd.DataFrame(result_gwn_90_avc, columns=['GWN_avc_90'])
GWN_separate_avc_90.index.name = 'item_id'

In [17]:
result_gwn_90_pscr = []
for prod in prods:
  
  gwn = GaussianWeightedNewsvendor(cu = cu, co = co_90, kernel_bandwidth = best_param_kbwth_90[prod])
  gwn.fit(X_train.loc[prod], Y_train.loc[prod])
  preds_gwn = gwn.predict(X_test.loc[prod])


  saa = SampleAverageApproximationNewsvendor(cu = cu, co = co_90)
  saa.fit(Y_train.loc[prod])
  preds_saa = saa.predict(n_steps = 508)
  
  pscr = prescriptiveness_score(Y_test.loc[prod], preds_gwn, preds_saa, cu = cu, co = co_90)
  result_gwn_90_pscr.append(pscr)

In [18]:
GWN_separate_pscr_90 = pd.DataFrame(result_gwn_90_pscr, columns=['GWN_pscr_90'])
GWN_separate_pscr_90.index.name = 'item_id'

# gwn for 75% service level

## randomized search
*   an empty list is created for every type of hyperparameter
*   in a for-loop the randomized search is executed for every item with cu and co of alpha = 0,75
*   the best hyperparams are calculated and added to the corresponding list

In [None]:
best_param_kbwth_75 = []

for prod in prods:
  
  
  gwn = GaussianWeightedNewsvendor(cu = cu, co = co_75)

  random = RandomizedSearchCV(gwn, param_random, cv=5, scoring = scorer_avc)
  random.fit(X_train.loc[prod], Y_train.loc[prod])
    
  kbwth = random.best_params_.get('kernel_bandwidth')

  best_param_kbwth_75.append(kbwth)

In [22]:
best_param_kbwth_75 = [26, 29, 25, 34, 32, 29, 26, 29, 25, 32, 77, 26, 26, 29, 30, 25, 36, 32, 70, 33, 26, 31, 37, 28, 33]

## Newsvendor-Model
*   the data-driven newsvendor model is executed in a loop for every single item (with optimal hyperparameters)
*   the average_costs for every item are calculated (later the prescritiveness score will be used for better comparability)

In [None]:
result_gwn_75_avc = []
for prod in prods:
  
  knn = GaussianWeightedNewsvendor(cu = cu, co = co_75, kernel_bandwidth = best_param_kbwth_75[prod])
  gwn.fit(X_train.loc[prod], Y_train.loc[prod])
  preds = gwn.predict(X_test.loc[prod])

  avc = round(average_costs(Y_test.loc[prod], preds, cu = cu, co = co_75),2)
  result_gwn_75_avc.append(avc)

In [24]:
GWN_separate_avc_75 = pd.DataFrame(result_gwn_75_avc, columns=['GWN_avc_75'])
GWN_separate_avc_75.index.name = 'item_id'

In [26]:
result_gwn_75_pscr = []
for prod in prods:
  
  gwn = GaussianWeightedNewsvendor(cu = cu, co = co_75, kernel_bandwidth = best_param_kbwth_75[prod])
  gwn.fit(X_train.loc[prod], Y_train.loc[prod])
  preds_gwn = gwn.predict(X_test.loc[prod])


  saa = SampleAverageApproximationNewsvendor(cu = cu, co = co_75)
  saa.fit(Y_train.loc[prod])
  preds_saa = saa.predict(n_steps = 508)
  
  pscr = prescriptiveness_score(Y_test.loc[prod], preds_gwn, preds_saa, cu = cu, co = co_75)
  result_gwn_75_pscr.append(pscr)

In [27]:
GWN_separate_pscr_75 = pd.DataFrame(result_gwn_75_pscr, columns=['GWN_pscr_75'])
GWN_separate_pscr_75.index.name = 'item_id'

# gwn for 50% service level

## randomized search
*   an empty list is created for every type of hyperparameter
*   in a for-loop the randomized search is executed for every item with cu and co of alpha = 0,5
*   the best hyperparams are calculated and added to the corresponding list

In [None]:
best_param_kbwth_50 = []

for prod in prods:
  
  
  gwn = GaussianWeightedNewsvendor(cu = cu, co = co_50)

  random = RandomizedSearchCV(gwn, param_random, cv=5, scoring = scorer_avc)
  random.fit(X_train.loc[prod], Y_train.loc[prod])
    
  kbwth = random.best_params_.get('kernel_bandwidth')

  best_param_kbwth_50.append(kbwth)

In [30]:
best_param_kbwth_50 = [25, 25, 26, 26, 31, 29, 30, 34, 28, 34, 69, 27, 28, 30, 33, 26, 25, 38, 71, 36, 28, 26, 31, 33, 27]

## Newsvendor-Model
*   the data-driven newsvendor model is executed in a loop for every single item (with optimal hyperparameters)
*   the average_costs for every item are calculated (later the prescritiveness score will be used for better comparability)

In [None]:
result_gwn_50_avc = []
for prod in prods:
  
  knn = GaussianWeightedNewsvendor(cu = cu, co = co_50, kernel_bandwidth = best_param_kbwth_50[prod])
  gwn.fit(X_train.loc[prod], Y_train.loc[prod])
  preds = gwn.predict(X_test.loc[prod])

  avc = round(average_costs(Y_test.loc[prod], preds, cu = cu, co = co_50),2)
  result_gwn_50_avc.append(avc)

In [32]:
GWN_separate_avc_50 = pd.DataFrame(result_gwn_50_avc, columns=['GWN_avc_50'])
GWN_separate_avc_50.index.name = 'item_id'

In [33]:
result_gwn_50_pscr = []
for prod in prods:
  
  gwn = GaussianWeightedNewsvendor(cu = cu, co = co_50, kernel_bandwidth = best_param_kbwth_50[prod])
  gwn.fit(X_train.loc[prod], Y_train.loc[prod])
  preds_gwn = gwn.predict(X_test.loc[prod])


  saa = SampleAverageApproximationNewsvendor(cu = cu, co = co_50)
  saa.fit(Y_train.loc[prod])
  preds_saa = saa.predict(n_steps = 508)
  
  pscr = prescriptiveness_score(Y_test.loc[prod], preds_gwn, preds_saa, cu = cu, co = co_50)
  result_gwn_50_pscr.append(pscr)

In [34]:
GWN_separate_pscr_50 = pd.DataFrame(result_gwn_50_pscr, columns=['GWN_pscr_50'])
GWN_separate_pscr_50.index.name = 'item_id'

# Merging

In [35]:
GWN_separated_avc = pd.concat([GWN_separate_avc_90, GWN_separate_avc_75, GWN_separate_avc_50], axis=1)
GWN_separated_avc

Unnamed: 0_level_0,GWN_avc_90,GWN_avc_75,GWN_avc_50
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,6.21,7.82,12.16
1,1.86,2.93,4.89
2,5.55,7.02,10.84
3,5.47,6.96,10.97
4,6.63,8.45,13.53
5,3.05,4.17,7.29
6,4.21,5.71,9.44
7,5.24,6.57,9.76
8,2.56,3.83,6.51
9,3.23,4.29,6.76


In [36]:
GWN_separated_pscr = pd.concat([GWN_separate_pscr_90, GWN_separate_pscr_75, GWN_separate_pscr_50], axis=1)
GWN_separated_pscr

Unnamed: 0_level_0,GWN_pscr_90,GWN_pscr_75,GWN_pscr_50
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.318706,0.273134,0.246671
1,0.193818,0.34798,0.335771
2,0.127054,0.12802,0.109905
3,0.521037,0.557679,0.481078
4,0.289737,0.171939,0.137425
5,0.216199,0.240397,0.103268
6,0.411594,0.429724,0.388732
7,0.333369,0.206285,0.119707
8,0.240099,0.223897,0.189666
9,0.080825,0.177113,0.118354


Saving Files

In [37]:
GWN_separated_avc.to_csv('/content/drive/MyDrive/M5/GWN_separated_avc.csv')

GWN_separated_pscr.to_csv('/content/drive/MyDrive/M5/GWN_separated_pscr.csv')