<a href="https://colab.research.google.com/github/lennart194/thesis-code/blob/main/separatemodeling_SAA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install ddop

In [None]:
import pandas as pd
import numpy as np

from ddop.newsvendor import SampleAverageApproximationNewsvendor

from ddop.metrics import average_costs

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Loading data-set

*   load the whole ultimative set
*   sort the rows first by item, then by date and set a multi index
**now every items time series can be separated by the index**

In [None]:
data = pd.read_csv('/content/drive/MyDrive/M5/ultimative_set.csv')
data = data.drop(columns=['Unnamed: 0'])
data = data.sort_values(by=['item_id', 'date'])
data = data.set_index(['item_id', 'date'])

## Split in feature and target matrix

In [None]:
X = data.drop(columns=['demand'])
Y = data['demand']

## train_test_split

In [None]:
X_train = X.loc[(slice(None), slice(None, "2015")), :]
X_test = X.loc[(slice(None), slice("2015", '2017')), :]

Y_train = Y.loc[(slice(None), slice(None, "2015"))]
Y_test = Y.loc[(slice(None), slice("2015", "2017"))]

## Definition of the item_ids 


*   will be needed within the for loops later
*   will contain all 25 products

In [None]:
prods = list(range(0,25))

# SAA alpha = 0.95

In [None]:
result_saa_95 = []
for prod in prods:
  cu = X_train.loc[prod]['sell_price'].mean()
  co_95 = ((cu-(0.95*cu))/0.95)
  
  saa = SampleAverageApproximationNewsvendor(cu = cu, co = co_95)
  saa.fit(Y_train.loc[prod])
  preds = saa.predict(n_steps = 508)
  avc = round(average_costs(Y_test.loc[prod], preds, cu = cu, co = co_95),2)
  result_saa_95.append(avc)

In [None]:
SAA_separate_avc_95 = pd.DataFrame(result_saa_95, columns=['SAA_avc_95'])
SAA_separate_avc_95.index.name = 'item_id'

# SAA alpha = 0.9

In [None]:
result_saa_90 = []
for prod in prods:
  cu = X_train.loc[prod]['sell_price'].mean()
  co_90 = ((cu-(0.9*cu))/0.9)
  
  saa = SampleAverageApproximationNewsvendor(cu = cu, co = co_90)
  saa.fit(Y_train.loc[prod])
  preds = saa.predict(n_steps = 508)
  avc = round(average_costs(Y_test.loc[prod], preds, cu = cu, co = co_90),2)
  result_saa_90.append(avc)

In [None]:
SAA_separate_avc_90 = pd.DataFrame(result_saa_90, columns=['SAA_avc_90'])
SAA_separate_avc_90.index.name = 'item_id'

# SAA alpha = 0.75

In [None]:
result_saa_75 = []
for prod in prods:
  cu = X_train.loc[prod]['sell_price'].mean()
  co_75 = ((cu-(0.75*cu))/0.75)
  
  saa = SampleAverageApproximationNewsvendor(cu = cu, co = co_75)
  saa.fit(Y_train.loc[prod])
  preds = saa.predict(n_steps = 508)
  avc = round(average_costs(Y_test.loc[prod], preds, cu = cu, co = co_75),2)
  result_saa_75.append(avc)

In [None]:
SAA_separate_avc_75 = pd.DataFrame(result_saa_75, columns=['SAA_avc_75'])
SAA_separate_avc_75.index.name = 'item_id'

# SAA alpha = 0.5

In [None]:
result_saa_50 = []
for prod in prods:
  cu = X_train.loc[prod]['sell_price'].mean()
  co_50 = ((cu-(0.50*cu))/0.50)
  
  saa = SampleAverageApproximationNewsvendor(cu = cu, co = co_50)
  saa.fit(Y_train.loc[prod])
  preds = saa.predict(n_steps = 508)
  avc = round(average_costs(Y_test.loc[prod], preds, cu = cu, co = co_50),2)
  result_saa_50.append(avc)

In [None]:
SAA_separate_avc_50 = pd.DataFrame(result_saa_50, columns=['SAA_avc_50'])
SAA_separate_avc_50.index.name = 'item_id'

# fit together

In [None]:
SAA_separated = pd.concat([SAA_separate_avc_95, SAA_separate_avc_90, SAA_separate_avc_75, SAA_separate_avc_50], axis=1)

In [None]:
SAA_separated.to_csv('/content/drive/MyDrive/M5/SAA_separated.csv')