In [11]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [9]:
import pandas as pd
import plotly.graph_objects as go
import numpy as np
import plotly.express as px
from keras.models import Sequential
from keras.layers import LSTM,Dense,Flatten,Dropout
from plotly.subplots import make_subplots
from keras.regularizers import l2

# Data Preparation

In [12]:
dir = 'drive/MyDrive/3001 Project/data/'

cal = pd.read_csv(f'{dir}raw data/calendar.csv')
sell = pd.read_csv(f'{dir}raw data/sell_prices.csv.zip', compression = 'zip')
train_eval = pd.read_csv(f'{dir}raw data/sales_train_evaluation.csv.zip',\
                         compression = 'zip')


In [4]:
product = '099'

In [None]:
cal_dummies = pd.read_csv(f'{dir}calendar_w_dummies.csv')
cal_dummies.set_index(['d'], inplace = True)
cal_dummies.head()

Unnamed: 0_level_0,wday,month,snap_CA,snap_TX,snap_WI,event
d,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
d_1,1,1,0,0,0,0
d_2,2,1,0,0,0,0
d_3,3,1,0,0,0,0
d_4,4,2,1,1,0,0
d_5,5,2,1,0,1,0


In [2]:
def get_active(df):
  df['active'] = df.max(axis = 1)
  idx = 0
  for i in range(df.shape[0]):
    if df.iloc[i, -1] > 0:
      idx = int(i)
      break
  
  return df.iloc[idx:, :-1]


def plot_learn_curve(xlist, ylist, xtitle, store_name):
  fig = go.Figure(data = go.Scatter(x = xlist, y = ylist))
  comb_title = 'Averaged RMSSE for Different ' + xtitle + ' for FOODS_3_' + product + ' Sales in ' + store_name
  fig.update_layout(title = comb_title, xaxis_title = xtitle,\
                    yaxis_title = 'RMSSE')
  fig.show()


def rmsse(train, val, pred):
  num = np.sum(np.square(pred - val))
  idx = 0
  for i in range(train.size):
    if train[i] > 0:
      idx = int(i)
      break
  
  train_sub = train[idx:]
  denom = np.sum(np.square(train_sub[1:] - train_sub[:-1]))/(train_sub.size-1)
  rmsse = num/denom

  return np.sqrt(rmsse/28)


def get_avg_rmsse(train, val, pred):
  """
  This function returns the list of rmsse for all column pairs in val & pred
  """
  rmsse_list = []
  for i in range(train.shape[1]):
    rmsse_list.append(rmsse(train[:,i], val[:,i], pred[:,i]))

  return rmsse_list


def plot_daily_truth_pred(titles, truth, pred, state):
  fig = make_subplots(rows = 1, cols = 3, subplot_titles = titles)

  for i in range(truth.shape[1]):
    fig.add_trace(go.Scatter(x = list(range(28)), y = truth[:, i],\
                             mode = 'lines', name = 'Ground Truth',\
                             line = dict(color = 'red')), row = 1, col = i+1)

    fig.add_trace(go.Scatter(x = list(range(28)), y = pred[:, i], mode = 'lines',\
                             name = 'Prediction', line = dict(color = 'blue')),\
                  row = 1, col = i+1) 

  title = 'Ground Truth v.s. Predictions for FOODS_3_' + product + ' Daily Unit Sales in ' + state + ' Stores'
  fig.update_layout(title_text = title)
  fig.update_xaxes(title_text = "Days", row = 1, col = 1)
  fig.update_xaxes(title_text = "Days", row = 1, col = 2)
  fig.update_xaxes(title_text = "Days", row = 2, col = 1)
  fig.update_xaxes(title_text = "Days", row = 2, col = 2)
  fig.update_yaxes(title_text = "Daily Unit Sales", row = 1, col = 1)
  fig.update_yaxes(title_text = "Daily Unit Sales", row = 2, col = 1)
  fig.show()


def plot_sum_truth_pred(x_list, truth, pred, state):
  fig = go.Figure()
  fig.add_trace(go.Bar(x = x_list, y = np.sum(truth, axis = 0),\
                       name = 'Ground Truth', marker_color='red'))

  fig.add_trace(go.Bar(x = x_list, y = np.sum(pred, axis = 0),\
                       name = 'Predictions', marker_color='blue'))

  title = 'Ground Truth v.s. Predictions for Sum of FOODS_3_' + product + ' Sales in ' + state + ' Stores'
  fig.update_layout(barmode = 'group', title_text = title)
  fig.show()


def get_product_state(item_id, window_size, batch_size, state):
  item = 'FOODS_3_' + item_id
  df = train_eval[(train_eval['item_id'] == item) &\
                  (train_eval['state_id'] == state)]
  df.set_index(['store_id'], inplace = True)
  df = df.iloc[:, 5:]
  df = df.T

  # filter out periods when the product is not actively sold
  df['active'] = df.max(axis = 1)
  idx = 0
  for i in range(df.shape[0]):
    if df.iloc[i, -1] > 0:
      idx = int(i)
      break
  df = df.iloc[idx:, :-1]

  # standardize data according to mean & std of the whole dataset
  # instad of column wise
  # because we want to keep track of the store-wise relationships
  df_std = df.copy()
  #df_std = (df_std - np.mean(df_std.values))/np.std(df_std.values)
  df_std = (df_std - df_std.mean())/df_std.std()


  # convert data according to length of window
  r = df.shape[0] - window_size
  c = df.shape[1]
  X = np.zeros((r, window_size, c))
  Y = np.zeros((r, c))
  for i in range(r):
    X[i] = df_std.iloc[i:window_size+i,:]
    Y[i] = df.iloc[window_size+i, :]

  # trim dataset when size of data % batch size != 0
  drop = X.shape[0] % batch_size
  if drop > 0:
    X = X[drop:]
    Y = Y[drop:]

  return X[:-56],Y[:-56],X[-56:-28],Y[-56:-28],X[-28:],Y[-28:],df.columns

# CA

In [None]:
RMSSE = []
for ws in [4, 8, 16, 32]:
  X_train_CA,Y_train_CA,X_val_CA,Y_val_CA,X_test_CA,Y_test_CA,stores_CA = get_product_state(product,\
                                                                                            ws, 32, 'CA')
  
  mod = Sequential()
  mod.add(LSTM(32, return_sequences = True, input_shape = (X_train_CA.shape[1],\
                                                           X_train_CA.shape[2])))
  
  mod.add(LSTM(16, return_sequences = True))
  mod.add(Flatten())
  mod.add(Dense(16, activation = 'relu'))
  mod.add(Dropout(0.1))
  mod.add(Dense(4, activation = 'relu'))
  mod.compile(loss = 'mean_squared_error', optimizer = 'adam')
  mod.fit(X_train_CA, Y_train_CA, epochs = 50, verbose = 0, shuffle = False,\
          batch_size = 32)  
  
  pred = mod.predict(X_val_CA)
  RMSSE.append(get_avg_rmsse(Y_train_CA, Y_val_CA, pred))





In [None]:
plot_learn_curve([4, 8, 16, 32], RMSSE, 'Window Length', 'CA')

Optimal Window Length = 16

In [6]:
CA_window_length = 16

In [None]:
RMSSE = []
for bs in [4, 8, 16, 32]:
  X_train_CA,Y_train_CA,X_val_CA,Y_val_CA,X_test_CA,Y_test_CA,stores_CA = get_product_state(product, CA_window_length, bs, 'CA')
  mod = Sequential()
  mod.add(LSTM(32, return_sequences = True, input_shape = (X_train_CA.shape[1],\
                                                           X_train_CA.shape[2])))
  
  mod.add(LSTM(16, return_sequences = True))
  mod.add(Flatten())
  mod.add(Dense(16, activation = 'relu'))
  mod.add(Dropout(0.1))
  mod.add(Dense(4, activation = 'relu'))
  mod.compile(loss = 'mean_squared_error', optimizer = 'adam')
  mod.fit(X_train_CA, Y_train_CA, epochs = 50, verbose = 0, shuffle = False,\
          batch_size = bs)  
  
  pred = mod.predict(X_val_CA)
  RMSSE.append(get_avg_rmsse(Y_train_CA, Y_val_CA, pred))




In [None]:
plot_learn_curve([4, 8, 16, 32], RMSSE, 'Batch Size', 'CA')

Optimal Batch Size = 32

In [13]:
CA_batch_size = 32
X_train_CA,Y_train_CA,X_val_CA,Y_val_CA,X_test_CA,Y_test_CA,stores_CA = get_product_state(product,\
                                                                                          CA_window_length,\
                                                                                          CA_batch_size, 'CA')


In [None]:
RMSSE = []
for l in np.arange(0.01, 0.11, 0.01):
  mod = Sequential()

  mod.add(LSTM(32, return_sequences = True, input_shape = (X_train_CA.shape[1],\
                                                           X_train_CA.shape[2]),\
               kernel_regularizer = l2(l), recurrent_regularizer = l2(l)))

  mod.add(LSTM(16, return_sequences = True, kernel_regularizer = l2(l),\
               recurrent_regularizer = l2(l)))
  
  mod.add(Flatten())
  mod.add(Dense(16, activation = 'relu', kernel_regularizer = l2(l)))
  mod.add(Dropout(0.1))
  mod.add(Dense(4, activation = 'relu'))
  mod.compile(loss = 'mean_squared_error', optimizer = 'adam')
  mod.fit(X_train_CA, Y_train_CA, epochs = 50, verbose = 0, shuffle = False,\
          batch_size = CA_batch_size)  
  
  pred = mod.predict(X_val_CA)
  RMSSE.append(get_avg_rmsse(Y_train_CA, Y_val_CA, pred))




In [None]:
plot_learn_curve(np.arange(0.01, 0.11, 0.01), RMSSE, 'L2 Weights Regularization', 'CA')

Optimal Weight Regularization = 0.01

In [None]:
CA_lambda = 0.01

In [None]:
RMSSE = []
for epoch in range(50, 160, 10):
  mod = Sequential()
  mod.add(LSTM(32, return_sequences = True, input_shape = (X_train_CA.shape[1],\
                                                           X_train_CA.shape[2]),\
               kernel_regularizer = l2(CA_lambda),\
               recurrent_regularizer = l2(CA_lambda)))
  
  mod.add(LSTM(16, return_sequences = True, kernel_regularizer = l2(CA_lambda),\
               recurrent_regularizer = l2(CA_lambda)))
  
  mod.add(Flatten())
  mod.add(Dense(16, activation = 'relu', kernel_regularizer = l2(CA_lambda)))
  mod.add(Dropout(0.1))
  mod.add(Dense(4, activation = 'relu'))
  mod.compile(loss = 'mean_squared_error', optimizer = 'adam')
  mod.fit(X_train_CA, Y_train_CA, epochs = epoch, verbose = 0, shuffle = False,\
          batch_size = CA_batch_size)  
  
  pred = mod.predict(X_val_CA)
  RMSSE.append(get_avg_rmsse(Y_train_CA, Y_val_CA, pred))



In [None]:
plot_learn_curve(list(range(50, 160, 10)), RMSSE, 'Number of Epochs', 'CA')

Optimal Model for CA:

Window Length = 16

Batch Size = 32

L2 Weight Regularization = 0.01

Number of Epochs = 60

In [None]:
CA_epochs = 60
X_train_val_CA = np.concatenate((X_train_CA, X_val_CA), axis = 0)
Y_train_val_CA = np.concatenate((Y_train_CA, Y_val_CA), axis = 0)

In [None]:
CA_mod = Sequential()
CA_mod.add(LSTM(32, return_sequences = True,\
                input_shape = (X_train_val_CA.shape[1],\
                               X_train_val_CA.shape[2]),\
                kernel_regularizer = l2(CA_lambda),\
                recurrent_regularizer = l2(CA_lambda)))
  
CA_mod.add(LSTM(16, return_sequences = True, kernel_regularizer = l2(CA_lambda),\
                recurrent_regularizer = l2(CA_lambda)))
  
CA_mod.add(Flatten())
CA_mod.add(Dense(16, activation = 'relu', kernel_regularizer = l2(CA_lambda)))
CA_mod.add(Dropout(0.1))
CA_mod.add(Dense(4, activation = 'relu'))
CA_mod.compile(loss = 'mean_squared_error', optimizer = 'adam')
CA_mod.fit(X_train_val_CA, Y_train_val_CA, epochs = CA_epochs, verbose = 2,\
           shuffle = False, batch_size = CA_batch_size)  
  

Epoch 1/60
59/59 - 1s - loss: 167.7748
Epoch 2/60
59/59 - 1s - loss: 50.0041
Epoch 3/60
59/59 - 1s - loss: 41.0852
Epoch 4/60
59/59 - 1s - loss: 39.9245
Epoch 5/60
59/59 - 1s - loss: 39.5806
Epoch 6/60
59/59 - 1s - loss: 38.6434
Epoch 7/60
59/59 - 1s - loss: 38.1519
Epoch 8/60
59/59 - 1s - loss: 38.8257
Epoch 9/60
59/59 - 1s - loss: 37.9989
Epoch 10/60
59/59 - 1s - loss: 38.5308
Epoch 11/60
59/59 - 1s - loss: 37.5981
Epoch 12/60
59/59 - 1s - loss: 37.0385
Epoch 13/60
59/59 - 1s - loss: 37.6109
Epoch 14/60
59/59 - 1s - loss: 36.4326
Epoch 15/60
59/59 - 1s - loss: 36.8014
Epoch 16/60
59/59 - 1s - loss: 36.7952
Epoch 17/60
59/59 - 1s - loss: 36.3315
Epoch 18/60
59/59 - 1s - loss: 36.3001
Epoch 19/60
59/59 - 1s - loss: 36.2327
Epoch 20/60
59/59 - 1s - loss: 35.5847
Epoch 21/60
59/59 - 1s - loss: 36.0072
Epoch 22/60
59/59 - 1s - loss: 35.7877
Epoch 23/60
59/59 - 1s - loss: 36.1114
Epoch 24/60
59/59 - 1s - loss: 35.6049
Epoch 25/60
59/59 - 1s - loss: 35.3379
Epoch 26/60
59/59 - 1s - loss: 35

<tensorflow.python.keras.callbacks.History at 0x7f12b6b2b048>

In [None]:
CA_pred = CA_mod.predict(X_test_CA)
CA_rmsse_list = get_avg_rmsse(Y_train_val_CA, Y_test_CA, CA_pred)

In [None]:
CA_rmsse_list, np.mean(CA_rmsse_list)

([0.6815919032966692,
  0.9662837663004737,
  0.6335535090347039,
  0.7137904907122954],
 0.7488049173360356)

In [None]:
fig = make_subplots(rows = 2, cols = 2, subplot_titles = tuple(list(stores_CA)))

fig.add_trace(go.Scatter(x = list(range(28)), y = Y_test_CA[:, 0],\
                         mode = 'lines', name = 'Ground Truth',\
                         line = dict(color = 'red')), row = 1, col = 1)

fig.add_trace(go.Scatter(x = list(range(28)), y = CA_pred[:, 0],\
                         mode = 'lines', name = 'Prediction',\
                         line = dict(color = 'blue')), row = 1, col = 1)

fig.add_trace(go.Scatter(x = list(range(28)), y = Y_test_CA[:, 1],\
                         mode = 'lines', name = 'Ground Truth',\
                         line = dict(color = 'red')), row = 1, col = 2)

fig.add_trace(go.Scatter(x = list(range(28)), y = CA_pred[:, 1],\
                         mode = 'lines', name = 'Prediction',\
                         line = dict(color = 'blue')), row = 1, col = 2)

fig.add_trace(go.Scatter(x = list(range(28)), y = Y_test_CA[:, 2],\
                         mode = 'lines', name = 'Ground Truth',\
                         line = dict(color = 'red')), row = 2, col = 1)

fig.add_trace(go.Scatter(x = list(range(28)), y = CA_pred[:, 2],\
                         mode = 'lines', name = 'Prediction',\
                         line = dict(color = 'blue')), row = 2, col = 1)

fig.add_trace(go.Scatter(x = list(range(28)), y = Y_test_CA[:, 3],\
                         mode = 'lines', name = 'Ground Truth',\
                         line = dict(color = 'red')), row = 2, col = 2)

fig.add_trace(go.Scatter(x = list(range(28)), y = CA_pred[:, 3],\
                         mode = 'lines', name = 'Prediction',\
                         line = dict(color = 'blue')), row = 2, col = 2)

fig.update_layout(title_text = 'Ground Truth v.s. Predictions for FOODS_3_003 Daily Unit Sales in CA Stores')
fig.update_xaxes(title_text = "Days", row = 1, col = 1)
fig.update_xaxes(title_text = "Days", row = 1, col = 2)
fig.update_xaxes(title_text = "Days", row = 2, col = 1)
fig.update_xaxes(title_text = "Days", row = 2, col = 2)
fig.update_yaxes(title_text = "Daily Unit Sales", row = 1, col = 1)
fig.update_yaxes(title_text = "Daily Unit Sales", row = 2, col = 1)


fig.show()

In [None]:
plot_sum_truth_pred(list(stores_CA), Y_test_CA, CA_pred, 'CA')

## TX

In [None]:
RMSSE = []
for ws in [4, 8, 16, 32]:
  X_train_TX,Y_train_TX,X_val_TX,Y_val_TX,X_test_TX,Y_test_TX,stores_TX = get_product_state(product,\
                                                                                            ws,\
                                                                                            32,\
                                                                                            'TX')
  
  mod = Sequential()
  mod.add(LSTM(16, return_sequences = True, input_shape = (X_train_TX.shape[1],\
                                                           X_train_TX.shape[2])))
  
  mod.add(LSTM(8, return_sequences = True))
  mod.add(Flatten())
  mod.add(Dense(8, activation = 'relu'))
  mod.add(Dropout(0.1))
  mod.add(Dense(3, activation = 'relu'))
  mod.compile(loss = 'mean_squared_error', optimizer = 'adam')
  mod.fit(X_train_TX, Y_train_TX, epochs = 50, verbose = 0, shuffle = False,\
          batch_size = 32)  
  
  pred = mod.predict(X_val_TX)
  RMSSE.append(get_avg_rmsse(Y_train_TX, Y_val_TX, pred))





In [None]:
plot_learn_curve([4, 8, 16, 32], RMSSE, 'Window Length', 'TX')

Optimal Window Length = 16

In [None]:
TX_window_length = 16

In [None]:
RMSSE = []
for bs in [4, 8, 16, 32]:
  X_train_TX,Y_train_TX,X_val_TX,Y_val_TX,X_test_TX,Y_test_TX,stores_TX = get_product_state(product,\
                                                                                            TX_window_length,\
                                                                                            bs,\
                                                                                            'TX')
  
  mod = Sequential()
  mod.add(LSTM(16, return_sequences = True, input_shape = (X_train_TX.shape[1],\
                                                           X_train_TX.shape[2])))
  
  mod.add(LSTM(8, return_sequences = True))
  mod.add(Flatten())
  mod.add(Dense(8, activation = 'relu'))
  mod.add(Dropout(0.1))
  mod.add(Dense(3, activation = 'relu'))
  mod.compile(loss = 'mean_squared_error', optimizer = 'adam')
  mod.fit(X_train_TX, Y_train_TX, epochs = 50, verbose = 0, shuffle = False,\
          batch_size = bs)  
  
  pred = mod.predict(X_val_TX)
  RMSSE.append(get_avg_rmsse(Y_train_TX, Y_val_TX, pred))



In [None]:
plot_learn_curve([4, 8, 16, 32], RMSSE, 'Batch Size', 'TX')

Optimal Batch Size = 16

In [None]:
TX_batch_size = 16
X_train_TX,Y_train_TX,X_val_TX,Y_val_TX,X_test_TX,Y_test_TX,stores_TX = get_product_state(product,\
                                                                                          TX_window_length,\
                                                                                          TX_batch_size,\
                                                                                          'TX')


In [None]:
RMSSE = []
for l in np.arange(0.01, 0.11, 0.01):
  mod = Sequential()

  mod.add(LSTM(16, return_sequences = True, input_shape = (X_train_TX.shape[1],\
                                                           X_train_TX.shape[2]),\
               kernel_regularizer = l2(l), recurrent_regularizer = l2(l)))

  mod.add(LSTM(8, return_sequences = True, kernel_regularizer = l2(l),\
               recurrent_regularizer = l2(l)))
  
  mod.add(Flatten())
  mod.add(Dense(8, activation = 'relu', kernel_regularizer = l2(l)))
  mod.add(Dropout(0.1))
  mod.add(Dense(3, activation = 'relu'))
  mod.compile(loss = 'mean_squared_error', optimizer = 'adam')
  mod.fit(X_train_TX, Y_train_TX, epochs = 50, verbose = 0, shuffle = False,\
          batch_size = TX_batch_size)  
  
  pred = mod.predict(X_val_TX)
  RMSSE.append(get_avg_rmsse(Y_train_TX, Y_val_TX, pred))



In [None]:
plot_learn_curve(np.arange(0.01, 0.11, 0.01), RMSSE, 'L2 Weights Regularization', 'TX')

Optimal L2 Weights Regularization = 0.01

In [None]:
TX_lambda = 0.01

In [None]:
RMSSE = []
for epoch in range(50, 160, 10):
  mod = Sequential()
  mod.add(LSTM(16, return_sequences = True, input_shape = (X_train_TX.shape[1],\
                                                           X_train_TX.shape[2]),\
               kernel_regularizer = l2(TX_lambda),\
               recurrent_regularizer = l2(TX_lambda)))

  mod.add(LSTM(8, return_sequences = True, kernel_regularizer = l2(TX_lambda),\
               recurrent_regularizer = l2(TX_lambda)))
  
  mod.add(Flatten())
  mod.add(Dense(8, activation = 'relu', kernel_regularizer = l2(TX_lambda)))
  mod.add(Dropout(0.1))
  mod.add(Dense(3, activation = 'relu'))
  mod.compile(loss = 'mean_squared_error', optimizer = 'adam')
  mod.fit(X_train_TX, Y_train_TX, epochs = epoch, verbose = 0, shuffle = False,\
          batch_size = TX_batch_size)  
  
  pred = mod.predict(X_val_TX)
  RMSSE.append(get_avg_rmsse(Y_train_TX, Y_val_TX, pred))



In [None]:
plot_learn_curve(list(range(50, 160, 10)), RMSSE, 'Number of Epochs', 'TX')

Optimal Model for TX:

Window Length = 16

Batch Size = 16

L2 Weight Regularization = 0.01

Number of Epochs = 60

In [None]:
TX_epochs = 60
X_train_val_TX = np.concatenate((X_train_TX, X_val_TX), axis = 0)
Y_train_val_TX = np.concatenate((Y_train_TX, Y_val_TX), axis = 0)

In [None]:
TX_mod = Sequential()
TX_mod.add(LSTM(16, return_sequences = True,\
                input_shape = (X_train_val_TX.shape[1], X_train_val_TX.shape[2]),\
                kernel_regularizer = l2(TX_lambda),\
                recurrent_regularizer = l2(TX_lambda)))
  
TX_mod.add(LSTM(8, return_sequences = True, kernel_regularizer = l2(TX_lambda),\
                recurrent_regularizer = l2(TX_lambda)))
  
TX_mod.add(Flatten())
TX_mod.add(Dense(8, activation = 'relu', kernel_regularizer = l2(TX_lambda)))
TX_mod.add(Dropout(0.1))
TX_mod.add(Dense(3, activation = 'relu'))
TX_mod.compile(loss = 'mean_squared_error', optimizer = 'adam')
TX_mod.fit(X_train_val_TX, Y_train_val_TX, epochs = TX_epochs, verbose = 2,\
           shuffle = False, batch_size = TX_batch_size)  
  
TX_pred = TX_mod.predict(X_test_TX)

Epoch 1/60
118/118 - 1s - loss: 205.3294
Epoch 2/60
118/118 - 1s - loss: 90.6089
Epoch 3/60
118/118 - 1s - loss: 55.7859
Epoch 4/60
118/118 - 1s - loss: 55.1652
Epoch 5/60
118/118 - 1s - loss: 53.9325
Epoch 6/60
118/118 - 1s - loss: 51.0475
Epoch 7/60
118/118 - 1s - loss: 50.5400
Epoch 8/60
118/118 - 1s - loss: 49.3908
Epoch 9/60
118/118 - 1s - loss: 48.2826
Epoch 10/60
118/118 - 1s - loss: 50.4732
Epoch 11/60
118/118 - 1s - loss: 47.5375
Epoch 12/60
118/118 - 1s - loss: 49.0214
Epoch 13/60
118/118 - 1s - loss: 47.6319
Epoch 14/60
118/118 - 1s - loss: 47.0087
Epoch 15/60
118/118 - 1s - loss: 46.4560
Epoch 16/60
118/118 - 1s - loss: 47.1524
Epoch 17/60
118/118 - 1s - loss: 45.5892
Epoch 18/60
118/118 - 1s - loss: 46.1742
Epoch 19/60
118/118 - 1s - loss: 43.8560
Epoch 20/60
118/118 - 1s - loss: 42.7102
Epoch 21/60
118/118 - 1s - loss: 43.2172
Epoch 22/60
118/118 - 1s - loss: 44.1227
Epoch 23/60
118/118 - 1s - loss: 41.7188
Epoch 24/60
118/118 - 1s - loss: 43.6881
Epoch 25/60
118/118 - 1s

In [None]:
TX_rmsse_list = get_avg_rmsse(Y_train_val_TX, Y_test_TX, TX_pred)
TX_rmsse_list, np.mean(TX_rmsse_list)

([0.5985512417546494, 0.5172350639200124, 0.4439888887025991],
 0.5199250647924203)

In [None]:
plot_daily_truth_pred(list(stores_TX), Y_test_TX, TX_pred, 'TX')

In [None]:
plot_sum_truth_pred(list(stores_TX), Y_test_TX, TX_pred, 'TX')

## WI

In [None]:
RMSSE = []
for ws in [4, 8, 16, 32]:
  X_train_WI,Y_train_WI,X_val_WI,Y_val_WI,X_test_WI,Y_test_WI,stores_WI = get_product_state(product,\
                                                                                            ws,\
                                                                                            32,\
                                                                                            'WI')
  
  mod = Sequential()
  mod.add(LSTM(16, return_sequences = True, input_shape = (X_train_WI.shape[1],\
                                                           X_train_WI.shape[2])))
  
  mod.add(LSTM(8, return_sequences = True))
  mod.add(Flatten())
  mod.add(Dense(8, activation = 'relu'))
  mod.add(Dropout(0.1))
  mod.add(Dense(3, activation = 'relu'))
  mod.compile(loss = 'mean_squared_error', optimizer = 'adam')
  mod.fit(X_train_WI, Y_train_WI, epochs = 50, verbose = 0, shuffle = False,\
          batch_size = 32)  
  
  pred = mod.predict(X_val_WI)
  RMSSE.append(get_avg_rmsse(Y_train_WI, Y_val_WI, pred))




In [None]:
plot_learn_curve([4, 8, 16, 32], RMSSE, 'Window Size', 'WI')

Optimal Window Size = 8

In [None]:
WI_window_length = 8

In [None]:
RMSSE = []
for bs in [4, 8, 16, 32]:
  X_train_WI,Y_train_WI,X_val_WI,Y_val_WI,X_test_WI,Y_test_WI,stores_WI = get_product_state(product,\
                                                                                            WI_window_length,\
                                                                                            bs,\
                                                                                            'WI')
  
  mod = Sequential()
  mod.add(LSTM(16, return_sequences = True, input_shape = (X_train_WI.shape[1],\
                                                           X_train_WI.shape[2])))
  
  mod.add(LSTM(8, return_sequences = True))
  mod.add(Flatten())
  mod.add(Dense(8, activation = 'relu'))
  mod.add(Dropout(0.1))
  mod.add(Dense(3, activation = 'relu'))
  mod.compile(loss = 'mean_squared_error', optimizer = 'adam')
  mod.fit(X_train_WI, Y_train_WI, epochs = 50, verbose = 0, shuffle = False,\
          batch_size = bs)  
  
  pred = mod.predict(X_val_WI)
  RMSSE.append(get_avg_rmsse(Y_train_WI, Y_val_WI, pred))




In [None]:
plot_learn_curve([4, 8, 16, 32], RMSSE, 'Batch Size', 'WI')

Optimal Batch Size = 32

In [None]:
WI_batch_size = 32
X_train_WI,Y_train_WI,X_val_WI,Y_val_WI,X_test_WI,Y_test_WI,stores_WI = get_product_state(product,\
                                                                                          WI_window_length,\
                                                                                          WI_batch_size,\
                                                                                          'WI')

In [None]:
RMSSE = []
for l in np.arange(0.01, 0.11, 0.01):
  mod = Sequential()

  mod.add(LSTM(16, return_sequences = True, input_shape = (X_train_WI.shape[1],\
                                                           X_train_WI.shape[2]),\
               kernel_regularizer = l2(l), recurrent_regularizer = l2(l)))

  mod.add(LSTM(8, return_sequences = True, kernel_regularizer = l2(l),\
               recurrent_regularizer = l2(l)))
  
  mod.add(Flatten())
  mod.add(Dense(8, activation = 'relu', kernel_regularizer = l2(l)))
  mod.add(Dropout(0.1))
  mod.add(Dense(3, activation = 'relu'))
  mod.compile(loss = 'mean_squared_error', optimizer = 'adam')
  mod.fit(X_train_WI, Y_train_WI, epochs = 50, verbose = 0, shuffle = False,\
          batch_size = WI_batch_size)  
  
  pred = mod.predict(X_val_WI)
  RMSSE.append(get_avg_rmsse(Y_train_WI, Y_val_WI, pred))




In [None]:
plot_learn_curve(np.arange(0.01, 0.11, 0.01), RMSSE, 'Weights Regulariation', 'WI')

Optimal L2 Weights Regularization = 0.07

In [None]:
WI_lambda = 0.07

In [None]:
RMSSE = []
for epoch in range(50, 160, 10):
  mod = Sequential()
  mod.add(LSTM(16, return_sequences = True, input_shape = (X_train_WI.shape[1],\
                                                           X_train_WI.shape[2]),\
               kernel_regularizer = l2(WI_lambda),\
               recurrent_regularizer = l2(WI_lambda)))

  mod.add(LSTM(8, return_sequences = True, kernel_regularizer = l2(WI_lambda),\
               recurrent_regularizer = l2(WI_lambda)))
  
  mod.add(Flatten())
  mod.add(Dense(8, activation = 'relu', kernel_regularizer = l2(WI_lambda)))
  mod.add(Dropout(0.1))
  mod.add(Dense(3, activation = 'relu'))
  mod.compile(loss = 'mean_squared_error', optimizer = 'adam')
  mod.fit(X_train_WI, Y_train_WI, epochs = epoch, verbose = 0, shuffle = False,\
          batch_size = WI_batch_size)  
  
  pred = mod.predict(X_val_WI)
  RMSSE.append(get_avg_rmsse(Y_train_WI, Y_val_WI, pred))



In [None]:
plot_learn_curve(list(range(50, 160, 10)), RMSSE, 'Number of Epochs', 'WI')

Optimal Model for WI:

Window Length = 8

Batch Size = 32

L2 Weight Regularization = 0.07

Number of Epochs = 80

In [None]:
WI_epochs = 80
X_train_val_WI = np.concatenate((X_train_WI, X_val_WI), axis = 0)
Y_train_val_WI = np.concatenate((Y_train_WI, Y_val_WI), axis = 0)

In [None]:
WI_mod = Sequential()
WI_mod.add(LSTM(16, return_sequences = True,\
                input_shape = (X_train_val_WI.shape[1], X_train_val_WI.shape[2]),\
                kernel_regularizer = l2(WI_lambda),\
                recurrent_regularizer = l2(WI_lambda)))

WI_mod.add(LSTM(8, return_sequences = True, kernel_regularizer = l2(WI_lambda),\
                recurrent_regularizer = l2(WI_lambda)))
  
WI_mod.add(Flatten())
WI_mod.add(Dense(8, activation = 'relu', kernel_regularizer = l2(WI_lambda)))
WI_mod.add(Dropout(0.1))
WI_mod.add(Dense(3, activation = 'relu'))
WI_mod.compile(loss = 'mean_squared_error', optimizer = 'adam')
WI_mod.fit(X_train_val_WI, Y_train_val_WI, epochs = WI_epochs, verbose = 2,\
           shuffle = False, batch_size = WI_batch_size)  
  

Epoch 1/80
60/60 - 0s - loss: 106.1240
Epoch 2/80
60/60 - 0s - loss: 90.2251
Epoch 3/80
60/60 - 0s - loss: 65.3584
Epoch 4/80
60/60 - 0s - loss: 62.2812
Epoch 5/80
60/60 - 0s - loss: 61.1743
Epoch 6/80
60/60 - 0s - loss: 60.5382
Epoch 7/80
60/60 - 0s - loss: 60.0578
Epoch 8/80
60/60 - 0s - loss: 60.3190
Epoch 9/80
60/60 - 0s - loss: 59.4794
Epoch 10/80
60/60 - 0s - loss: 59.5682
Epoch 11/80
60/60 - 0s - loss: 59.7413
Epoch 12/80
60/60 - 0s - loss: 58.9497
Epoch 13/80
60/60 - 0s - loss: 58.9010
Epoch 14/80
60/60 - 0s - loss: 59.0110
Epoch 15/80
60/60 - 0s - loss: 58.6634
Epoch 16/80
60/60 - 0s - loss: 58.8952
Epoch 17/80
60/60 - 0s - loss: 58.6388
Epoch 18/80
60/60 - 0s - loss: 58.4914
Epoch 19/80
60/60 - 0s - loss: 58.5819
Epoch 20/80
60/60 - 0s - loss: 58.8238
Epoch 21/80
60/60 - 0s - loss: 58.2928
Epoch 22/80
60/60 - 0s - loss: 58.8200
Epoch 23/80
60/60 - 0s - loss: 58.1362
Epoch 24/80
60/60 - 0s - loss: 58.4858
Epoch 25/80
60/60 - 0s - loss: 58.5905
Epoch 26/80
60/60 - 0s - loss: 57

<tensorflow.python.keras.callbacks.History at 0x7f12b4e38550>

In [None]:
WI_pred = WI_mod.predict(X_test_WI)
WI_rmsse_list = get_avg_rmsse(Y_train_val_WI, Y_test_WI, WI_pred)

In [None]:
WI_rmsse_list, np.mean(WI_rmsse_list)

([0.728503179355394, 1.1136967207000115, 0.6821836090698017],
 0.8414611697084023)

In [None]:
plot_daily_truth_pred(list(stores_WI), Y_test_WI, WI_pred, 'WI')

In [None]:
plot_sum_truth_pred(list(stores_WI), Y_test_WI, WI_pred, 'WI')