In [0]:
!pip install -q tensorflow-gpu==2.0.0-alpha0

### Get Model files and Datasets from cloud storage

In [0]:
# from google.colab import files
# files.upload()

In [3]:
!wget https://storage.googleapis.com/columbia_applied_deep_learning/demand_forecasting.zip  \
    -O ./demand_forecasting.zip

--2019-05-19 03:52:50--  https://storage.googleapis.com/columbia_applied_deep_learning/demand_forecasting.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.129.128, 2607:f8b0:4001:c07::80
Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.129.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3361698 (3.2M) [application/zip]
Saving to: ‘./demand_forecasting.zip’


2019-05-19 03:52:50 (50.1 MB/s) - ‘./demand_forecasting.zip’ saved [3361698/3361698]



In [0]:
import zipfile
local_zip = './demand_forecasting.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('./')
zip_ref.close()

In [5]:
!ls -al

total 7284
drwxr-xr-x 1 root root    4096 May 19 03:38 .
drwxr-xr-x 1 root root    4096 May 19 03:37 ..
drwxr-xr-x 1 root root    4096 May 16 16:08 .config
-rw-r--r-- 1 root root 3361698 May 19 03:13 demand_forecasting.zip
drwxr-xr-x 2 root root    4096 May 19 03:38 __MACOSX
-rw-r--r-- 1 root root  189008 May 19 03:52 model1_eu.hdf5
-rw-r--r-- 1 root root  188968 May 19 03:52 model1_id2.hdf5
-rw-r--r-- 1 root root  171048 May 19 03:52 model1_th.hdf5
-rw-r--r-- 1 root root  901416 May 19 03:52 model2_eu.hdf5
-rw-r--r-- 1 root root  901312 May 19 03:52 model2_id2.hdf5
-rw-r--r-- 1 root root  901312 May 19 03:52 model2_th.hdf5
-rw-r--r-- 1 root root  150744 May 19 03:52 model3_eu.hdf5
-rw-r--r-- 1 root root  150744 May 19 03:52 model3_id2.hdf5
-rw-r--r-- 1 root root  150744 May 19 03:52 model3_th_exp_120.hdf5
-rw-r--r-- 1 root root  150744 May 19 03:52 model3_th_exp.hdf5
-rw-r--r-- 1 root root  142760 May 19 03:52 model3_th.hdf5
-rw-r--r-- 1 root root    8090 May 19 03:52 Product1-Dataset

### Imports

In [6]:
import pandas as pd
import numpy as np
import argparse
import os
import platform
import datetime, time
import datetime as dt
import warnings
from IPython.core.display import display, HTML
import tensorflow as tf
from plotly.offline import init_notebook_mode, iplot
from plotly import graph_objs as go
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from tensorflow.keras import optimizers
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Dense, Dropout, SimpleRNN, Input, LSTM, TimeDistributed,RepeatVector, Flatten
from tensorflow.keras.callbacks import Callback, ModelCheckpoint, EarlyStopping, ReduceLROnPlateau,RemoteMonitor
import sys
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import math
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
from tensorflow.keras.models import load_model

plt.style.use('default')
plt.style.use('seaborn-deep')

sns.set(style="whitegrid")
sns.set_context("talk")

%matplotlib inline
init_notebook_mode(connected=True)

In [0]:
checkpoint_dir = './'
model_filename = 'model1_eu.hdf5'
data_filename = 'Product3-Dataset-eu-2year.csv'
train_model = False

In [8]:
tf.__version__

'2.0.0-alpha0'

In [0]:
def configure_plotly_browser_state():
  import IPython
  display(IPython.core.display.HTML('''
        <script src="/static/components/requirejs/require.js"></script>
        <script>
          requirejs.config({
            paths: {
              base: '/static/base',
              plotly: 'https://cdn.plot.ly/plotly-latest.min.js?noext',
            },
          });
        </script>
        '''))

In [0]:
def plotly_df(df, title='', annotations=None, forecast=False):
    """Visualize all the dataframe columns as line plots."""
    common_kw = dict(x=df.index, mode='lines+markers')
    if forecast:
      xaxis = dict(title='Time Steps',type='date', rangeslider=dict(visible=False))
    else:
      xaxis = dict(title='Time Steps',type='date', rangeslider=dict(visible=True))
    
    data = [go.Scatter(y=df[c], name=c,  **common_kw) for c in df.columns]
    layout = dict(title=title, showlegend=True, annotations=annotations, xaxis=xaxis, )
    fig = dict(data=data, layout=layout)
    iplot(fig, show_link=False)

In [0]:
def create_timeseries_dataset(data, n_in=1, n_out=1, dropnan=True):
    """
    Frame a time series as a supervised learning dataset.
    Arguments:
        data: Sequence of observations as a list or NumPy array.
        n_in: Number of lag observations as input (X).
        n_out: Number of observations as output (y).
        dropnan: Boolean whether or not to drop rows with NaN values.
    Returns:
        Pandas DataFrame of series framed for supervised learning.
    """
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()

    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var_lag%d(t-%d)' % (j+1, i)) for j in range(n_vars)]

    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var_roll%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var_roll%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [0]:
def mean_absolute_percentage_error(y_pred, y_true): 
    return np.mean(np.abs((y_pred - y_true)) / (y_pred+ 1e-7)) * 100

### Read and Explore Dataset

In [0]:
df_raw = pd.read_csv(filepath_or_buffer=data_filename, header=0, names=None, usecols=['date', 'sale_amt'], parse_dates=[0])

In [0]:
df_raw.index = df_raw.date
df_raw.drop(['date'], inplace=True, axis=1)

In [15]:
configure_plotly_browser_state()
plotly_df(df_raw, title='Timeseries')

Take the first 90 rows away from when the business started as that doesn't represent the pattern

In [0]:
df_raw =df_raw[90:]

In [17]:
configure_plotly_browser_state()
plotly_df(df_raw, title='Timeseries')

### Baseline Metrics (7-day moving average )

In [0]:
train, val, test = df_raw['sale_amt'].values[:-14], df_raw['sale_amt'].values[-14:], df_raw['sale_amt'].values[-14:]

In [0]:
def baseline():
  # Moving average of last few days
  
  train_base = df_raw['sale_amt'][:-14]
  rolling_mean = train_base.rolling(window=7).mean()
  predictions = list()
  predictions = rolling_mean.values[-14:]
  
  for i in range(len(val)):    
    print('>Predicted=%.f, Expected=%.f' % (predictions[i], val[i]))

  df_out = pd.DataFrame(df_raw['sale_amt'][-14:])
  df_out.columns=['Original']
  df_out['Predicted'] = predictions
  
  
  return df_out

In [20]:
df_out = baseline()

>Predicted=74299, Expected=137733
>Predicted=60008, Expected=149899
>Predicted=79410, Expected=89982
>Predicted=81349, Expected=80733
>Predicted=82408, Expected=67755
>Predicted=82408, Expected=0
>Predicted=82408, Expected=0
>Predicted=82382, Expected=144153
>Predicted=96879, Expected=142017
>Predicted=80311, Expected=72274
>Predicted=79917, Expected=64477
>Predicted=79317, Expected=61764
>Predicted=79317, Expected=0
>Predicted=79317, Expected=0


In [21]:
configure_plotly_browser_state()
plotly_df(df_out, title='Baseline Predictions i.e., 7-day moving average for last 14 days', forecast=True)

In [22]:
rmse = math.sqrt(mean_squared_error(df_out['Original'].values, df_out['Predicted'].values))
print('Test RMSE: %.3f' % rmse)

Test RMSE: 56732.334


In [23]:
mape = mean_absolute_percentage_error(np.array(df_out['Predicted'].values),np.array(df_out['Original'].values))
print ('Test MAPE: %.3f' %mape)

Test MAPE: 60.004


In [24]:
err = (abs(sum(df_out['Predicted'].values) - sum(df_out['Original'].values)) / (sum(df_out['Original'].values))) *100
print ('Prediction Error: %.2f%%' %err)

Prediction Error: 10.78%


### Statefull LSTM Model

In [0]:
# frame a sequence as a supervised learning problem
def timeseries_to_supervised(data, lag=1):
    df = pd.DataFrame(data)
    columns = [df.shift(i) for i in range(1, lag+1)]
    columns.append(df)
    df = pd.concat(columns, axis=1)
    df.fillna(0, inplace=True)
    return df

In [0]:
# scale train and test data to [-1, 1]
def scale(train, test):
	# fit scaler
	scaler = MinMaxScaler(feature_range=(-1, 1))
	scaler = scaler.fit(train)
	# transform train
	train_scaled = scaler.transform(train)
	# transform test
	test_scaled = scaler.transform(test)
	return scaler, train_scaled, test_scaled

In [0]:
# inverse scaling for a forecasted value
def invert_scale(scaler, X, value):
	new_row = [x for x in X] + [value]
	array = np.array(new_row)
	array = array.reshape(1, len(array))
	inverted = scaler.inverse_transform(array)
	return inverted[0, -1]

In [0]:
def fit_lstm(train, batch_size, nb_epoch, neurons, test = None, load_model = False):
    
    X, y = train[:, 0:-1], train[:, -1]
    X = X.reshape(X.shape[0], 1, X.shape[1])
    
    if test.any():
        X_test, y_test = test[:, 0:-1], test[:, -1]
        X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])
                
    
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.LSTM(neurons, batch_input_shape=(batch_size, X.shape[1], X.shape[2]), 
                                stateful=True, return_sequences=True))
    model.add(tf.keras.layers.LSTM(neurons ,stateful=True))
    model.add(tf.keras.layers.Dense(1))
    model.compile('adam', loss='mse')
    
    if load_model:
        pass 
    
    c = [
          ModelCheckpoint(checkpoint_dir + model_filename, save_best_only=True,monitor='val_loss', mode='min', verbose=1, period=1),
          EarlyStopping(monitor='val_loss', min_delta=0, patience=200, verbose=1),
          ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=55, min_lr=0.0001, verbose=1)]
    
    
#     for i in range(nb_epoch):
#         print(i)
#         model.fit(X, y, epochs=1, batch_size=batch_size, verbose=2, shuffle=False, validation_data = (X_test, y_test) , callbacks = c)
#         model.reset_states()
        
    hist = model.fit(X, y, epochs=nb_epoch, batch_size=batch_size, verbose=2, shuffle=False, validation_data = (X_test, y_test) , callbacks = c) 
    
    return model, hist

In [0]:
# make a one-step forecast
def forecast_lstm(model, batch_size, X):
	X = X.reshape(1, 1, len(X))
	yhat = model.predict(X, batch_size=batch_size)
	return yhat[0,0]

In [0]:
# create a differenced series
def difference(dataset, interval=1):
    diff = list()
    for i in range(interval, len(dataset)):
        #print(dataset[i])
        value = dataset[i] - dataset[i - interval]
        diff.append(value)
    return pd.Series(diff)

In [0]:
def inverse_difference(history, yhat, interval=1):
  return yhat + history[-interval]

In [0]:
def plot_metrics(hist_name):
  
  plt.style.use('default')
  # plt.style.use('seaborn-deep')

  # sns.set(style="whitegrid")
  # sns.set_context("talk")

  fig, ax = plt.subplots(facecolor='w')
  fig.set_size_inches(fig.get_figwidth() * 2.0, fig.get_figheight() * 1.0)

  ax.plot(hist_name.history['loss'])
  ax.plot(hist_name.history['val_loss'])

  plt.xlabel('Epoch')
  plt.ylabel('MSE Loss')
  plt.title('Loss Over Time')
  plt.legend(['Train','Valid'])
  plt.show()
  
  return

**Data processing**

In [0]:
# transform data to be stationary
raw_values = df_raw['sale_amt'].values
diff_values = difference(raw_values, 1)

In [0]:
# transform data to be supervised learning
supervised = timeseries_to_supervised(diff_values, 1)
supervised_values = supervised.values

In [0]:
# split data into train and test-sets
train, test = supervised_values[:len(supervised_values)-14], supervised_values[len(supervised_values)-14:]

In [0]:
# transform the scale of the data
scaler, train_scaled, test_scaled = scale(train, test)

In [0]:
if train_model:
  # fit the model
  lstm_model, hist = fit_lstm(train_scaled, 1, 1200, 32, test_scaled)

W0516 17:23:43.131497 140058128934784 tf_logging.py:161] <tensorflow.python.keras.layers.recurrent.UnifiedLSTM object at 0x7f60ec86be10>: Note that this layer is not optimized for performance. Please use tf.keras.layers.CuDNNLSTM for better performance on GPU.
W0516 17:23:43.226477 140058128934784 tf_logging.py:161] <tensorflow.python.keras.layers.recurrent.UnifiedLSTM object at 0x7f60ec86b908>: Note that this layer is not optimized for performance. Please use tf.keras.layers.CuDNNLSTM for better performance on GPU.


Train on 634 samples, validate on 14 samples
Epoch 1/1200

Epoch 00001: val_loss improved from inf to 0.24601, saving model to /content/model1_eu.hdf5
634/634 - 3s - loss: 0.0805 - val_loss: 0.2460
Epoch 2/1200

Epoch 00002: val_loss did not improve from 0.24601
634/634 - 2s - loss: 0.0625 - val_loss: 13.6930
Epoch 3/1200

Epoch 00003: val_loss improved from 0.24601 to 0.13905, saving model to /content/model1_eu.hdf5
634/634 - 2s - loss: 0.6925 - val_loss: 0.1390
Epoch 4/1200

Epoch 00004: val_loss did not improve from 0.13905
634/634 - 2s - loss: 0.0667 - val_loss: 0.1634
Epoch 5/1200

Epoch 00005: val_loss did not improve from 0.13905
634/634 - 2s - loss: 0.0768 - val_loss: 0.1964
Epoch 6/1200

Epoch 00006: val_loss did not improve from 0.13905
634/634 - 2s - loss: 0.0777 - val_loss: 0.1556
Epoch 7/1200

Epoch 00007: val_loss improved from 0.13905 to 0.11969, saving model to /content/model1_eu.hdf5
634/634 - 2s - loss: 0.0745 - val_loss: 0.1197
Epoch 8/1200

Epoch 00008: val_loss imp

In [0]:
# from google.colab import files
# files.download(model_filename)

In [0]:
# Load the Model file and do the inference

def forecast(train_scaled=train_scaled,test_scaled=test_scaled):  
  model_from_file = load_model(checkpoint_dir + model_filename)

  # forecast the entire training dataset to build up state for forecasting
  train_reshaped = train_scaled[:, 0].reshape(len(train_scaled), 1, 1)
  model_from_file.predict(train_reshaped, batch_size=1)

  # walk-forward validation on the test data
  predictions = list()
  for i in range(len(test_scaled)):
      # make one-step forecast
      X, y = test_scaled[i, 0:-1], test_scaled[i, -1]
      yhat = forecast_lstm(model_from_file, 1, X)
      # invert scaling
      yhat = invert_scale(scaler, X, yhat)
      # invert differencing
      yhat = inverse_difference(raw_values, yhat, len(test_scaled)+1-i)
      # store forecast
      predictions.append(yhat)
      expected = raw_values[len(train) + i + 1]
#       print('day=%d, Predicted=%f, Expected=%f' % (i+1, yhat, expected))  
      
  
  df_out = pd.DataFrame(df_raw[-14:]['sale_amt'])
  df_out.columns=['Original']
  df_out['Predicted'] = predictions
  
  
  return df_out

In [38]:
df_pred = forecast()

W0519 03:58:58.367589 139899815688064 tf_logging.py:161] <tensorflow.python.keras.layers.recurrent.UnifiedLSTM object at 0x7f3c8082d438>: Note that this layer is not optimized for performance. Please use tf.keras.layers.CuDNNLSTM for better performance on GPU.
W0519 03:58:59.064797 139899815688064 tf_logging.py:161] <tensorflow.python.keras.layers.recurrent.UnifiedLSTM object at 0x7f3c80825b70>: Note that this layer is not optimized for performance. Please use tf.keras.layers.CuDNNLSTM for better performance on GPU.


In [39]:
configure_plotly_browser_state()
plotly_df(df_pred, title='Predictions', forecast = True)

In [40]:
configure_plotly_browser_state()
plotly_df(df_raw, title='Timeseries')

In [41]:
rmse = math.sqrt(mean_squared_error(df_pred['Original'].values, df_pred['Predicted'].values))
print('Test RMSE: %.3f' % rmse)

Test RMSE: 11119.342


In [42]:
mape = mean_absolute_percentage_error(np.array(df_pred['Predicted'].values),np.array(df_pred['Original'].values))
print ('Test MAPE: %.3f' %mape)

Test MAPE: 7.013


In [43]:
err = (abs(sum(df_pred['Predicted'].values) - sum(df_pred['Original'].values)) / (sum(df_pred['Original'].values))) *100
print ('Prediction Error: %.2f%%' %err)

Prediction Error: 1.82%
