# This notebook contains some code inspired from Sequences, Time Series and Prediction deeplearning.ai course which is a great course to start learning Deep Learning on Tensorflow.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
import tensorflow as tf

import warnings
warnings.filterwarnings('ignore')

In [None]:
#Import Data
df = pd.read_csv('../input/sales-forecasting/train.csv')


# Data Preprocessing

Create a data frame containing sales grouped by day. 

In [None]:
#Create a data frame containing daily sales
df['Order Date'] = pd.to_datetime(df['Order Date'])
df.sort_values(['Order Date'],inplace=True)
daily_sales = pd.DataFrame(df.groupby('Order Date',sort=False)['Sales'].sum())
daily_sales.reset_index(inplace=True)
daily_sales['Order Year'] = daily_sales['Order Date'].apply(lambda x:x.year)

Split data into two sets, a Training Set and a Validation Set. The training set contains all observations except those occurring in year 2018 which are reserved for cross validation.

In [None]:
#Split Data into train and test sets :
X = np.array(daily_sales['Sales'])
X_train = np.array(daily_sales[daily_sales['Order Year']!=2018]['Sales'])
X_test = np.array(daily_sales[daily_sales['Order Year']==2018]['Sales'])
print('Train set size : ',len(X_train))
print('Test set size : ',len(X_test))

Now, data must be transformed into windowed dataset, this is easily done using tensorflow, the next function is a simple way of doing it.

In [None]:
def windowed_dataset(X, window_size, batch_size, shuffle_buffer):
    #Expand dataset fir RNN input shape expectation
    X = tf.expand_dims(X, axis=-1)
    #Create a dataset 
    ds = tf.data.Dataset.from_tensor_slices(X)
    #Windowing the data set, window_size lags (passed observations)
    ds = ds.window(window_size + 1, shift=1, drop_remainder=True)
    #Batching and shuffling observations
    ds = ds.flat_map(lambda w: w.batch(window_size + 1))
    ds = ds.shuffle(shuffle_buffer)
    #Decompose into predictors and predicted components
    ds = ds.map(lambda w: (w[:-1], w[-1]))
    return ds.batch(batch_size).prefetch(1)

Apply the previous function on daily sales using a time window of 41 days.

In [None]:
window_size = 41
#Split time defines the limit of training observations and start of validation observations.
split_time = 908


Xp_train = windowed_dataset(X_train,window_size=window_size,batch_size=256,shuffle_buffer=len(X_train))
Xp_validation = windowed_dataset(X_test,window_size=window_size,batch_size=512,shuffle_buffer=len(X_test))

# Model 

In [None]:
model1 = tf.keras.models.Sequential([
            
            
            #1D Convolutional layer (Helps in smoothing out some noise)
            tf.keras.layers.Conv1D(filters=3, kernel_size=5,
                      strides=1, padding="valid",
                      activation="relu",
                      input_shape=[None, 1]),
   
            
        
     
            # 2 LSTM layers
            tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(40, return_sequences=True)), 
    
            tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(40, return_sequences=False)),  
    
            # 3 Dense layers comprising the output layer
            tf.keras.layers.Dense(30),
            tf.keras.layers.Activation('relu'),
     
    
            tf.keras.layers.Dense(10),
            tf.keras.layers.Activation('relu'),

            
            
            tf.keras.layers.Dense(1),
    
          tf.keras.layers.Lambda(lambda x: x * 10000.0)
        ])

#Compile the model

model1.compile(loss=tf.keras.losses.Huber(),optimizer='Adam',metrics=['mae'])

In [None]:
history1 = model1.fit(Xp_train, epochs=500,validation_data=Xp_validation)

In [None]:
mae=history1.history['mae']
val_mae = history1.history['val_mae']
epochs=range(len(mae)) 
fig = plt.figure(figsize=(12,6))
ax = fig.add_axes([0,0,1,1])
ax.plot(epochs, mae, 'b',label='Training mae')
ax.plot(epochs, val_mae, 'r',label='Validation mae')
ax.legend()
plt.title('MAE')
plt.xlabel("Epochs")
plt.ylabel("MAE")

Clearly the model overfitts training set. 

# Regularization

### Model

Introduce L2 regularization and dropouts into the model

In [None]:
def Model(Lambda,drop_rate) :
    
    model = tf.keras.models.Sequential([
            
            
            
            tf.keras.layers.Conv1D(filters=3, kernel_size=5,
                      strides=1, padding="causal",
                      activation="relu",
                      input_shape=[None, 1],kernel_regularizer=tf.keras.regularizers.l2(Lambda)),
   
            tf.keras.layers.Dropout(drop_rate),
        
     
            
            tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(40, return_sequences=True,
                                          kernel_regularizer=tf.keras.regularizers.l2(Lambda))), 
    
            tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(40, return_sequences=False,
                                          kernel_regularizer=tf.keras.regularizers.l2(Lambda))),  
            
            tf.keras.layers.Dropout(drop_rate),
            
  
            tf.keras.layers.Dense(30,kernel_regularizer=tf.keras.regularizers.l2(Lambda)),
            tf.keras.layers.Activation('relu'),
     
    
            tf.keras.layers.Dense(10,kernel_regularizer=tf.keras.regularizers.l2(Lambda)),
            tf.keras.layers.Activation('relu'),

            
            
            tf.keras.layers.Dense(1,kernel_regularizer=tf.keras.regularizers.l2(Lambda)),
    
          tf.keras.layers.Lambda(lambda x: x * 10000.0)
        ])

    return model

### Hyperparameters space

Create a hyperparameter space, in this case it is the space of pairs of the form (regularization factor,dropout rate). Hence, to create this space, simply create a space of regularization factor and one other for dropout rates and multiply them.

In [None]:
#Regularization factor space using random logarithmic selection :[1e-3,1] 
r = -3*np.random.rand(20)
regu_factors = 10**r

#Dropout rate space : [0,0.5]
drop_rates = np.linspace(0,0.4,5)

#Hyperparameters space : Product of both previous spaces
import itertools
hyper_space = list(itertools.product(regu_factors,drop_rates))

print('Subset of hyperparameters space :')
print(hyper_space[0:10])

### Explore the hyperparameter space 

In [None]:
#Dictionaries to save results
history_dict = dict()
prediction_dict = dict()
i=0

#Loop over hyperparameter space components
for Lambda,rate in hyper_space :
    print(f'Processing for ({Lambda},{rate})')
    #Update parameters
    model = Model(Lambda,rate)
    #Compile model and train
    model.compile(loss=tf.keras.losses.Huber(),optimizer='Adam',metrics=['mae'])
    #Save training and validation history
    history = model.fit(Xp_train, epochs=500,validation_data=Xp_validation,callbacks=[early_stop]) 
    history_dict[f'Training MAE {i}'] = history.history['mae']
    history_dict[f'Validation MAE {i}'] = history.history['val_mae']
    i+=1

It takes time to search for hyperparameters. Let's just proceed with some given model.

# Regularized model

In [None]:
model = Model(0.5,0.2)
model.compile(loss=tf.keras.losses.Huber(),optimizer='Adam',metrics=['mae'])

In [None]:
from tensorflow.keras.callbacks import EarlyStopping
#Recuparate best model
early_stop = EarlyStopping(monitor='val_mae',patience=500,restore_best_weights=True,mode='min')
history = model.fit(Xp_train, epochs=500,validation_data=Xp_validation,callbacks=[early_stop]) 

In [None]:
mae=history.history['mae']
val_mae = history.history['val_mae']
print('min validation mae : ',min(history.history['val_mae']))

min_val = min(history.history['val_mae'])
index = history.history['val_mae'].index(min_val)
epochs=range(len(mae)) 
fig = plt.figure(figsize=(12,6))
ax = fig.add_axes([0,0,1,1])
ax.plot(epochs, mae, 'b',label='Training mae')
ax.plot(epochs, val_mae, 'r',label='Validation mae')
ax.plot(index,min_val,marker='*',ms=20,markerfacecolor='yellow',markeredgewidth=3, markeredgecolor='green')
ax.legend()
plt.title('MAE')
plt.xlabel("Epochs")
plt.ylabel("MAE")

# Prediction

In [None]:
#Given a series, this function predicts sales for each step.
def model_forecast(model, X, window_size):
    #Creating a dataset
    ds = tf.data.Dataset.from_tensor_slices(X)
    #Windowing
    ds = ds.window(window_size, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda w: w.batch(window_size))
    ds = ds.batch(908).prefetch(1)
    #Predict 
    forecast = model.predict(ds)
    return forecast

Let's apply this function to all Data and split results into training and testing results.

In [None]:
forecast = model_forecast(model,X[...,np.newaxis],window_size)
forecast = forecast[:,-1].reshape((len(forecast),))

In [None]:
train_predictions = forecast[:split_time-window_size]
test_predictions = forecast [split_time-window_size:-1]
MSE_train = tf.keras.metrics.mean_squared_error(X_train[window_size:], train_predictions)
MSE_test = tf.keras.metrics.mean_squared_error(X_test, test_predictions)
MAE_train = tf.keras.metrics.mean_absolute_error(X_train[window_size:], train_predictions)
MAE_test = tf.keras.metrics.mean_absolute_error(X_test, test_predictions)
print('Train RMSE = ',np.sqrt(MSE_train))
print('Test RMSE = ',np.sqrt(MSE_test))
print('Train MAE = ',MAE_train.numpy())
print('Test MAE = ',MAE_test.numpy())

Still a lot of work to do on regularization. Also note that daily sales contain a lot of noise and demonstrates no clear pattern...