<a href="https://colab.research.google.com/github/lennart194/thesis-code/blob/main/separatemodeling_lr.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install ddop

In [None]:
import pandas as pd
import numpy as np

from ddop.newsvendor import SampleAverageApproximationNewsvendor
from ddop.newsvendor import LinearRegressionNewsvendor

from ddop.metrics import average_costs

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## Loading data-set

*   load the whole ultimative set
*   sort the rows first by item, then by date and set a multi index
**now every items time series can be separated by the index**

In [None]:
data = pd.read_csv('/content/drive/MyDrive/M5/ultimative_set.csv')
data = data.drop(columns=['Unnamed: 0'])
data = data.sort_values(by=['item_id', 'date'])
data = data.set_index(['item_id', 'date'])

## Split in feature and target matrix

In [None]:
X = data.drop(columns=['demand'])
Y = data['demand']

## train_test_split

In [None]:
X_train = X.loc[(slice(None), slice(None, "2015")), :]
X_test = X.loc[(slice(None), slice("2015", '2017')), :]

Y_train = Y.loc[(slice(None), slice(None, "2015"))]
Y_test = Y.loc[(slice(None), slice("2015", "2017"))]

## Definition of the item_ids 


*   will be needed within the for loops later
*   will contain all 25 products

In [None]:
prods = list(range(0,25))

# Newsvendor-Model 95%
*   the data-driven newsvendor model is executed in a loop for every single item (with optimal hyperparameters)
*   the average_costs for every item are calculated (later the prescritiveness score will be used for better comparability)

In [None]:
result_lr_95_avc = []
for prod in prods:
  cu = X_train.loc[prod]['sell_price'].mean()
  co_95 = ((cu-(0.95*cu))/0.95)
  
  lr = LinearRegressionNewsvendor(cu = cu, co = co_95)
  lr.fit(X_train.loc[prod], Y_train.loc[prod])
  preds = lr.predict(X_test.loc[prod])
  avc = round(average_costs(Y_test.loc[prod], preds, cu = cu, co = co_95),2)
  result_lr_95_avc.append(avc)

In [None]:
LR_separate_avc_95 = pd.DataFrame(result_lr_95_avc, columns=['LR_avc_95'])
LR_separate_avc_95.index.name = 'item_id'

In [None]:
result_lr_95_pscr = []
for prod in prods:
  cu = X_train.loc[prod]['sell_price'].mean()
  co_95 = ((cu-(0.95*cu))/0.95)
  
  lr = LinearRegressionNewsvendor(cu = cu, co = co_95)
  lr.fit(X_train.loc[prod], Y_train.loc[prod])
  preds_lr = lr.predict(X_test.loc[prod])

  saa = SampleAverageApproximationNewsvendor(cu = cu, co = co_95)
  saa.fit(Y_train.loc[prod])
  preds_saa = saa.predict(n_steps = 508)



  pscr = prescriptiveness_score(Y_test.loc[prod], preds_lr, preds_saa ,cu = cu, co = co_95)
  result_lr_95_pscr.append(pscr)

In [None]:
LR_separate_pscr_95 = pd.DataFrame(result_lr_95_pscr, columns=['LR_pscr_95'])
LR_separate_pscr_95.index.name = 'item_id'

# Newsvendor-Model 90%
*   the data-driven newsvendor model is executed in a loop for every single item (with optimal hyperparameters)
*   the average_costs for every item are calculated (later the prescritiveness score will be used for better comparability)

In [None]:
result_lr_90_avc = []
for prod in prods:
  cu = X_train.loc[prod]['sell_price'].mean()
  co_90 = ((cu-(0.90*cu))/0.90)
  
  lr = LinearRegressionNewsvendor(cu = cu, co = co_90)
  lr.fit(X_train.loc[prod], Y_train.loc[prod])
  preds = lr.predict(X_test.loc[prod])
  avc = round(average_costs(Y_test.loc[prod], preds, cu = cu, co = co_90),2)
  result_lr_90_avc.append(avc)

In [None]:
LR_separate_avc_90 = pd.DataFrame(result_lr_90_avc, columns=['LR_avc_90'])
LR_separate_avc_90.index.name = 'item_id'

In [None]:
result_lr_90_pscr = []
for prod in prods:
  cu = X_train.loc[prod]['sell_price'].mean()
  co_90 = ((cu-(0.90*cu))/0.90)
  
  lr = LinearRegressionNewsvendor(cu = cu, co = co_90)
  lr.fit(X_train.loc[prod], Y_train.loc[prod])
  preds_lr = lr.predict(X_test.loc[prod])

  saa = SampleAverageApproximationNewsvendor(cu = cu, co = co_90)
  saa.fit(Y_train.loc[prod])
  preds_saa = saa.predict(n_steps = 508)



  pscr = prescriptiveness_score(Y_test.loc[prod], preds_lr, preds_saa ,cu = cu, co = co_90)
  result_lr_90_pscr.append(pscr)

In [None]:
LR_separate_pscr_90 = pd.DataFrame(result_lr_90_pscr, columns=['LR_pscr_90'])
LR_separate_pscr_90.index.name = 'item_id'

# Newsvendor-Model 75%
*   the data-driven newsvendor model is executed in a loop for every single item (with optimal hyperparameters)
*   the average_costs for every item are calculated (later the prescritiveness score will be used for better comparability)

In [None]:
result_lr_75_avc = []
for prod in prods:
  cu = X_train.loc[prod]['sell_price'].mean()
  co_75 = ((cu-(0.75*cu))/0.75)
  
  lr = LinearRegressionNewsvendor(cu = cu, co = co_75)
  lr.fit(X_train.loc[prod], Y_train.loc[prod])
  preds = lr.predict(X_test.loc[prod])
  avc = round(average_costs(Y_test.loc[prod], preds, cu = cu, co = co_75),2)
  result_lr_75_avc.append(avc)

In [None]:
LR_separate_avc_75 = pd.DataFrame(result_lr_75_avc, columns=['LR_avc_75'])
LR_separate_avc_75.index.name = 'item_id'

In [None]:
result_lr_75_pscr = []
for prod in prods:
  cu = X_train.loc[prod]['sell_price'].mean()
  co_75 = ((cu-(0.75*cu))/0.75)
  
  lr = LinearRegressionNewsvendor(cu = cu, co = co_75)
  lr.fit(X_train.loc[prod], Y_train.loc[prod])
  preds_lr = lr.predict(X_test.loc[prod])

  saa = SampleAverageApproximationNewsvendor(cu = cu, co = co_75)
  saa.fit(Y_train.loc[prod])
  preds_saa = saa.predict(n_steps = 508)



  pscr = prescriptiveness_score(Y_test.loc[prod], preds_lr, preds_saa ,cu = cu, co = co_75)
  result_lr_75_pscr.append(pscr)

In [None]:
LR_separate_pscr_75 = pd.DataFrame(result_lr_75_pscr, columns=['LR_pscr_75'])
LR_separate_pscr_75.index.name = 'item_id'

# Newsvendor-Model 50%
*   the data-driven newsvendor model is executed in a loop for every single item (with optimal hyperparameters)
*   the average_costs for every item are calculated (later the prescritiveness score will be used for better comparability)

In [None]:
result_lr_50_avc = []
for prod in prods:
  cu = X_train.loc[prod]['sell_price'].mean()
  co_50 = ((cu-(0.50*cu))/0.50)
  
  lr = LinearRegressionNewsvendor(cu = cu, co = co_50)
  lr.fit(X_train.loc[prod], Y_train.loc[prod])
  preds = lr.predict(X_test.loc[prod])
  avc = round(average_costs(Y_test.loc[prod], preds, cu = cu, co = co_50),2)
  result_lr_50_avc.append(avc)

In [None]:
LR_separate_avc_50 = pd.DataFrame(result_lr_50_avc, columns=['LR_avc_50'])
LR_separate_avc_50.index.name = 'item_id'

In [None]:
result_lr_50_pscr = []
for prod in prods:
  cu = X_train.loc[prod]['sell_price'].mean()
  co_50 = ((cu-(0.50*cu))/0.50)
  
  lr = LinearRegressionNewsvendor(cu = cu, co = co_50)
  lr.fit(X_train.loc[prod], Y_train.loc[prod])
  preds_lr = lr.predict(X_test.loc[prod])

  saa = SampleAverageApproximationNewsvendor(cu = cu, co = co_50)
  saa.fit(Y_train.loc[prod])
  preds_saa = saa.predict(n_steps = 508)



  pscr = prescriptiveness_score(Y_test.loc[prod], preds_lr, preds_saa ,cu = cu, co = co_50)
  result_lr_50_pscr.append(pscr)

In [None]:
LR_separate_pscr_50 = pd.DataFrame(result_lr_50_pscr, columns=['LR_pscr_50'])
LR_separate_pscr_50.index.name = 'item_id'

# Merging

In [None]:
LR_separated_avc = pd.concat([LR_separate_avc_95, LR_separate_avc_90, LR_separate_avc_75, LR_separate_avc_50], axis=1)
LR_separated_avc

Unnamed: 0_level_0,LR_avc_95,LR_avc_90,LR_avc_75,LR_avc_50
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0.45,0.95,2.36,4.02
1,0.37,0.81,1.83,3.26
2,0.87,2.0,4.35,7.23
3,2.12,4.32,10.61,19.39
4,0.88,1.92,4.24,8.89
5,0.51,1.11,2.48,4.46
6,0.51,1.13,2.72,5.0
7,0.73,1.55,3.99,7.76
8,0.38,0.78,1.98,3.45
9,0.38,0.78,1.94,3.51


In [None]:
LR_separated_pscr = pd.concat([LR_separate_pscr_95, LR_separate_pscr_90, LR_separate_pscr_75, LR_separate_pscr_50], axis=1)
LR_separated_pscr

Unnamed: 0_level_0,LR_pscr_95,LR_pscr_90,LR_pscr_75,LR_pscr_50
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0.554848,0.443399,0.315741,0.306944
1,0.623161,0.564712,0.504451,0.448251
2,0.469943,0.259715,0.171012,0.167468
3,0.741027,0.694325,0.614967,0.535613
4,0.505441,0.358591,0.269692,0.148169
5,0.540195,0.422634,0.374732,0.259606
6,0.63108,0.531465,0.477876,0.42837
7,0.613404,0.508554,0.291368,0.174098
8,0.554586,0.44672,0.278741,0.273576
9,0.517012,0.416323,0.334271,0.278727


Saving Files

In [None]:
LR_separated_avc.to_csv('/content/drive/MyDrive/M5/LR_separated_avc.csv')

LR_separated_pscr.to_csv('/content/drive/MyDrive/M5/LR_separated_pscr.csv')