<a href="https://colab.research.google.com/github/lennart194/thesis-code/blob/main/separatemodeling_SAA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install ddop

In [2]:
import pandas as pd
import numpy as np

from ddop.newsvendor import SampleAverageApproximationNewsvendor

from ddop.metrics import average_costs

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Loading data-set

*   load the whole ultimative set
*   sort the rows first by item, then by date and set a multi index
**now every items time series can be separated by the index**

In [4]:
data = pd.read_csv('/content/drive/MyDrive/M5/ultimative_set.csv')
data = data.drop(columns=['Unnamed: 0'])
data = data.sort_values(by=['item_id', 'date'])
data = data.set_index(['item_id', 'date'])

## Split in feature and target matrix

In [5]:
X = data.drop(columns=['demand'])
Y = data['demand']

## train_test_split

In [6]:
X_train = X.loc[(slice(None), slice(None, "2015")), :]
X_test = X.loc[(slice(None), slice("2015", '2017')), :]

Y_train = Y.loc[(slice(None), slice(None, "2015"))]
Y_test = Y.loc[(slice(None), slice("2015", "2017"))]

## Definition of the item_ids 


*   will be needed within the for loops later
*   will contain all 25 products

In [7]:
prods = list(range(0,25))

cu = round(X_train['sell_price'].mean(),2)

co_90 = round(((cu-(0.90*cu))/0.90),2)

co_75 = round(((cu-(0.75*cu))/0.75),2)

co_50 = round(((cu-(0.50*cu))/0.50),2)

# SAA alpha = 0.9

In [8]:
result_saa_90 = []
for prod in prods:
  saa = SampleAverageApproximationNewsvendor(cu = cu, co = co_90)
  saa.fit(Y_train.loc[prod])
  preds = saa.predict(n_steps = 508)
  avc = round(average_costs(Y_test.loc[prod], preds, cu = cu, co = co_90),2)
  result_saa_90.append(avc)

In [9]:
SAA_separate_avc_90 = pd.DataFrame(result_saa_90, columns=['SAA_avc_90'])
SAA_separate_avc_90.index.name = 'item_id'

# SAA alpha = 0.75

In [11]:
result_saa_75 = []
for prod in prods:
  saa = SampleAverageApproximationNewsvendor(cu = cu, co = co_75)
  saa.fit(Y_train.loc[prod])
  preds = saa.predict(n_steps = 508)
  avc = round(average_costs(Y_test.loc[prod], preds, cu = cu, co = co_75),2)
  result_saa_75.append(avc)

In [12]:
SAA_separate_avc_75 = pd.DataFrame(result_saa_75, columns=['SAA_avc_75'])
SAA_separate_avc_75.index.name = 'item_id'

# SAA alpha = 0.5

In [14]:
result_saa_50 = []
for prod in prods:
  saa = SampleAverageApproximationNewsvendor(cu = cu, co = co_50)
  saa.fit(Y_train.loc[prod])
  preds = saa.predict(n_steps = 508)
  avc = round(average_costs(Y_test.loc[prod], preds, cu = cu, co = co_50),2)
  result_saa_50.append(avc)

In [15]:
SAA_separate_avc_50 = pd.DataFrame(result_saa_50, columns=['SAA_avc_50'])
SAA_separate_avc_50.index.name = 'item_id'

# fit together

In [17]:
SAA_separated = pd.concat([SAA_separate_avc_90, SAA_separate_avc_75, SAA_separate_avc_50], axis=1)
SAA_separated

Unnamed: 0_level_0,SAA_avc_90,SAA_avc_75,SAA_avc_50
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,3.74,7.49,12.6
1,1.8,3.94,6.31
2,2.91,5.61,9.27
3,5.29,10.17,15.4
4,3.24,6.19,11.14
5,2.08,4.22,6.42
6,3.33,7.1,11.91
7,2.73,4.8,8.02
8,1.94,3.71,6.41
9,1.83,3.97,6.62


In [18]:
SAA_separated.to_csv('/content/drive/MyDrive/M5/SAA_separated.csv')