<a href="https://colab.research.google.com/github/lennart194/thesis-code/blob/main/separatemodeling_knn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install ddop

In [None]:
import pandas as pd
import numpy as np

from ddop.newsvendor import KNeighborsWeightedNewsvendor
from ddop.newsvendor import SampleAverageApproximationNewsvendor

from sklearn.model_selection import RandomizedSearchCV

from ddop.metrics import make_scorer
from ddop.metrics import average_costs

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Loading data-set

*   load the whole ultimative set
*   sort the rows first by item, then by date and set a multi index
**now every items time series can be separated by the index**



In [None]:
data = pd.read_csv('/content/drive/MyDrive/M5/ultimative_set.csv')
data = data.drop(columns=['Unnamed: 0'])
data = data.sort_values(by=['item_id', 'date'])
data = data.set_index(['item_id', 'date'])

## Split in feature and target matrix

In [None]:
X = data.drop(columns=['demand'])
Y = data['demand']

## train_test_split

In [None]:
X_train = X.loc[(slice(None), slice(None, "2015")), :]
X_test = X.loc[(slice(None), slice("2015", '2017')), :]

Y_train = Y.loc[(slice(None), slice(None, "2015"))]
Y_test = Y.loc[(slice(None), slice("2015", "2017"))]

## Definition of the item_ids 


*   will be needed within the for loops later
*   will contain all 25 products



In [None]:
prods = list(range(0,25))

## Determining some parameters
*   the potential params and their values for randomized search are defined
*   the estimator, the scorer and finally the whole grid is defined





In [None]:
k = np.array([7, 20, 25, 30, 35, 40, 45, 50, 55, 60])
param_random = dict(n_neighbors = k)

scorer_avc = make_scorer(average_costs, greater_is_better=False)

# knn for 95% service level

## randomized search
*   an empty list is created for every type of hyperparameter
*   in a for-loop the randomized search is executed for every item with cu and co of alpha = 0,95
*   the best hyperparams are calculated and added to the corresponding list




In [None]:
best_param_k_95 = []
for prod in prods:
  
  cu = X_train.loc[prod]['sell_price'].mean()
  co_95 = ((cu-(0.95*cu))/0.95)
  knn = KNeighborsWeightedNewsvendor(cu = cu, co = co_95)
  
  random = RandomizedSearchCV(knn, param_random, cv=5, scoring = scorer_avc)
  random.fit(X_train.loc[prod], Y_train.loc[prod])
  
  k = next(iter(random.best_params_.items()))[1]
  best_param_k_95.append(k)

## Newsvendor-Model
*   the data-driven newsvendor model is executed in a loop for every single item (with optimal hyperparameters)
*   the average_costs for every item are calculated (later the prescritiveness score will be used for better comparability)



In [None]:
result_knn_95_avc = []
for prod in prods:
  cu = X_train.loc[prod]['sell_price'].mean()
  co_95 = ((cu-(0.95*cu))/0.95)
  
  knn = KNeighborsWeightedNewsvendor(cu = cu, co = co_95, n_neighbors = best_param_k_95[prod])
  knn.fit(X_train.loc[prod], Y_train.loc[prod])
  preds = knn.predict(X_test.loc[prod])
  avc = round(average_costs(Y_test.loc[prod], preds, cu = cu, co = co_95),2)
  result_knn_95_avc.append(avc)

In [None]:
KNN_separate_avc_95 = pd.DataFrame(result_knn_95_avc, columns=['KNN_avc_95'])
KNN_separate_avc_95.index.name = 'item_id'

In [None]:
result_knn_95_pscr = []
for prod in prods:
  cu = X_train.loc[prod]['sell_price'].mean()
  co_95 = ((cu-(0.95*cu))/0.95)
  
  knn = KNeighborsWeightedNewsvendor(cu = cu, co = co_95, n_neighbors = best_param_k_95[prod])
  knn.fit(X_train.loc[prod], Y_train.loc[prod])
  preds_knn = knn.predict(X_test.loc[prod])

  saa = SampleAverageApproximationNewsvendor(cu = cu, co = co_95)
  saa.fit(Y_train.loc[prod])
  preds_saa = saa.predict(n_steps = 508)



  pscr = prescriptiveness_score(Y_test.loc[prod], preds_knn, preds_saa ,cu = cu, co = co_95)
  result_knn_95_pscr.append(pscr)

In [None]:
KNN_separate_pscr_95 = pd.DataFrame(result_knn_95_pscr, columns=['KNN_pscr_95'])
KNN_separate_pscr_95.index.name = 'item_id'

# knn for 90% service level

## randomized search
*   an empty list is created for every type of hyperparameter
*   in a for-loop the randomized search is executed for every item with cu and co of alpha = 0,90
*   the best hyperparams are calculated and added to the corresponding list

In [None]:
best_param_k_90 = []
for prod in prods:
  
  cu = X_train.loc[prod]['sell_price'].mean()
  co_90 = ((cu-(0.90*cu))/0.90)
  knn = KNeighborsWeightedNewsvendor(cu = cu, co = co_90)
  
  random = RandomizedSearchCV(knn, param_random, cv=5, scoring = scorer_avc)
  random.fit(X_train.loc[prod], Y_train.loc[prod])
  
  k = next(iter(random.best_params_.items()))[1]
  best_param_k_90.append(k)

## Newsvendor-Model
*   the data-driven newsvendor model is executed in a loop for every single item (with optimal hyperparameters)
*   the average_costs for every item are calculated (later the prescritiveness score will be used for better comparability)

In [None]:
result_knn_avc_90 = []
for prod in prods:
  cu = X_train.loc[prod]['sell_price'].mean()
  co_90 = ((cu-(0.90*cu))/0.90)
  
  knn = KNeighborsWeightedNewsvendor(cu = cu, co = co_90, n_neighbors = best_param_k_90[prod])
  knn.fit(X_train.loc[prod], Y_train.loc[prod])
  preds = knn.predict(X_test.loc[prod])
  avc = round(average_costs(Y_test.loc[prod], preds, cu = cu, co = co_90),2)
  result_knn_avc_90.append(avc)

In [None]:
KNN_separate_avc_90 = pd.DataFrame(result_knn_avc_90, columns=['KNN_avc_90'])
KNN_separate_avc_90.index.name = 'item_id'

In [None]:
result_knn_90_pscr = []
for prod in prods:
  cu = X_train.loc[prod]['sell_price'].mean()
  co_95 = ((cu-(0.95*cu))/0.95)
  
  knn = KNeighborsWeightedNewsvendor(cu = cu, co = co_90, n_neighbors = best_param_k_90[prod])
  knn.fit(X_train.loc[prod], Y_train.loc[prod])
  preds_knn = knn.predict(X_test.loc[prod])

  saa = SampleAverageApproximationNewsvendor(cu = cu, co = co_90)
  saa.fit(Y_train.loc[prod])
  preds_saa = saa.predict(n_steps = 508)



  pscr = prescriptiveness_score(Y_test.loc[prod], preds_knn, preds_saa ,cu = cu, co = co_90)
  result_knn_90_pscr.append(pscr)

In [None]:
KNN_separate_pscr_90 = pd.DataFrame(result_knn_90_pscr, columns=['KNN_pscr_90'])
KNN_separate_pscr_90.index.name = 'item_id'

# knn for 75% service level

## randomized search
*   an empty list is created for every type of hyperparameter
*   in a for-loop the randomized search is executed for every item with cu and co of alpha = 0,75
*   the best hyperparams are calculated and added to the corresponding list

In [None]:
best_param_k_75 = []
for prod in prods:
  
  cu = X_train.loc[prod]['sell_price'].mean()
  co_75 = ((cu-(0.75*cu))/0.75)
  knn = KNeighborsWeightedNewsvendor(cu = cu, co = co_75)
  
  random = RandomizedSearchCV(knn, param_random, cv=5, scoring = scorer_avc)
  random.fit(X_train.loc[prod], Y_train.loc[prod])
  
  k = next(iter(random.best_params_.items()))[1]
  best_param_k_75.append(k)

## Newsvendor-Model
*   the data-driven newsvendor model is executed in a loop for every single item (with optimal hyperparameters)
*   the average_costs for every item are calculated (later the prescritiveness score will be used for better comparability)

In [None]:
result_knn_avc_75 = []
for prod in prods:
  cu = X_train.loc[prod]['sell_price'].mean()
  co_75 = ((cu-(0.75*cu))/0.75)
  
  knn = KNeighborsWeightedNewsvendor(cu = cu, co = co_75, n_neighbors = best_param_k_75[prod])
  knn.fit(X_train.loc[prod], Y_train.loc[prod])
  preds = knn.predict(X_test.loc[prod])

  avc = round(average_costs(Y_test.loc[prod], preds, cu = cu, co = co_75),2)
  result_knn_avc_75.append(avc)

In [None]:
KNN_separate_avc_75 = pd.DataFrame(result_knn_avc_75, columns=['KNN_avc_75'])
KNN_separate_avc_75.index.name = 'item_id'

In [None]:
result_knn_75_pscr = []
for prod in prods:
  cu = X_train.loc[prod]['sell_price'].mean()
  co_75 = ((cu-(0.75*cu))/0.75)
  
  knn = KNeighborsWeightedNewsvendor(cu = cu, co = co_75, n_neighbors = best_param_k_75[prod])
  knn.fit(X_train.loc[prod], Y_train.loc[prod])
  preds_knn = knn.predict(X_test.loc[prod])

  saa = SampleAverageApproximationNewsvendor(cu = cu, co = co_75)
  saa.fit(Y_train.loc[prod])
  preds_saa = saa.predict(n_steps = 508)



  pscr = prescriptiveness_score(Y_test.loc[prod], preds_knn, preds_saa ,cu = cu, co = co_75)
  result_knn_75_pscr.append(pscr)

In [None]:
KNN_separate_pscr_75 = pd.DataFrame(result_knn_75_pscr, columns=['KNN_pscr_75'])
KNN_separate_pscr_75.index.name = 'item_id'

# knn for 50% service level

## randomized search
*   an empty list is created for every type of hyperparameter
*   in a for-loop the randomized search is executed for every item with cu and co of alpha = 0,5
*   the best hyperparams are calculated and added to the corresponding list

In [None]:
best_param_k_50 = []
for prod in prods:
  
  cu = X_train.loc[prod]['sell_price'].mean()
  co_50 = ((cu-(0.50*cu))/0.50)
  knn = KNeighborsWeightedNewsvendor(cu = cu, co = co_50)
  
  random = RandomizedSearchCV(knn, param_random, cv=5, scoring = scorer_avc)
  random.fit(X_train.loc[prod], Y_train.loc[prod])
  
  k = next(iter(random.best_params_.items()))[1]
  best_param_k_50.append(k)

## Newsvendor-Model
*   the data-driven newsvendor model is executed in a loop for every single item (with optimal hyperparameters)
*   the average_costs for every item are calculated (later the prescritiveness score will be used for better comparability)

In [None]:
result_knn_avc_50 = []
for prod in prods:
  cu = X_train.loc[prod]['sell_price'].mean()
  co_50 = ((cu-(0.50*cu))/0.50)
  
  knn = KNeighborsWeightedNewsvendor(cu = cu, co = co_50, n_neighbors = best_param_k_50[prod])
  knn.fit(X_train.loc[prod], Y_train.loc[prod])
  preds = knn.predict(X_test.loc[prod])

  avc = round(average_costs(Y_test.loc[prod], preds, cu = cu, co = co_50),2)
  result_knn_avc_50.append(avc)

In [None]:
KNN_separate_avc_50 = pd.DataFrame(result_knn_avc_50, columns=['KNN_avc_50'])
KNN_separate_avc_50.index.name = 'item_id'

In [None]:
result_knn_50_pscr = []
for prod in prods:
  cu = X_train.loc[prod]['sell_price'].mean()
  co_50 = ((cu-(0.50*cu))/0.50)
  
  knn = KNeighborsWeightedNewsvendor(cu = cu, co = co_50, n_neighbors = best_param_k_50[prod])
  knn.fit(X_train.loc[prod], Y_train.loc[prod])
  preds_knn = knn.predict(X_test.loc[prod])

  saa = SampleAverageApproximationNewsvendor(cu = cu, co = co_50)
  saa.fit(Y_train.loc[prod])
  preds_saa = saa.predict(n_steps = 508)



  pscr = prescriptiveness_score(Y_test.loc[prod], preds_knn, preds_saa ,cu = cu, co = co_50)
  result_knn_50_pscr.append(pscr)

In [None]:
KNN_separate_pscr_50 = pd.DataFrame(result_knn_50_pscr, columns=['KNN_pscr_50'])
KNN_separate_pscr_50.index.name = 'item_id'

# Merging

In [None]:
KNN_separated_avc = pd.concat([KNN_separate_avc_95, KNN_separate_avc_90, KNN_separate_avc_75, KNN_separate_avc_50], axis=1)
KNN_separated_avc

Unnamed: 0_level_0,KNN_avc_95,KNN_avc_90,KNN_avc_75,KNN_avc_50
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0.61,1.12,2.53,4.34
1,0.51,0.92,2.01,3.37
2,1.04,2.03,4.36,7.55
3,2.74,5.22,11.54,20.82
4,1.2,2.14,4.69,8.83
5,0.64,1.23,2.86,5.04
6,0.7,1.29,2.83,5.08
7,0.98,1.85,4.28,7.92
8,0.49,0.92,2.02,3.74
9,0.51,0.97,2.04,3.79


In [None]:
KNN_separated_pscr = pd.concat([KNN_separate_pscr_95, KNN_separate_pscr_90, KNN_separate_pscr_75, KNN_separate_pscr_50], axis=1)
KNN_separated_pscr

Unnamed: 0_level_0,KNN_pscr_95,KNN_pscr_90,KNN_pscr_75,KNN_pscr_50
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0.389899,0.292668,0.268284,0.251664
1,0.484868,0.473696,0.455351,0.430186
2,0.366856,0.209809,0.170859,0.130258
3,0.665134,0.660175,0.581053,0.501225
4,0.330748,0.230594,0.191143,0.154367
5,0.415665,0.322826,0.278808,0.164052
6,0.495907,0.44907,0.457087,0.419718
7,0.48304,0.419406,0.238871,0.156822
8,0.418848,0.338057,0.264229,0.213211
9,0.352075,0.270937,0.300352,0.220253


# Saving Files

In [None]:
KNN_separated_avc.to_csv('/content/drive/MyDrive/M5/KNN_separated_avc.csv')

KNN_separated_pscr.to_csv('/content/drive/MyDrive/M5/KNN_separated_pscr.csv')