In [1]:
import time
import pandas as pd
import numpy as np
from math import sqrt
from keras.models import Sequential
from keras.layers import LSTM, Dense, Flatten, TimeDistributed
from keras.layers.convolutional import Conv1D, MaxPooling1D
from sklearn.metrics import mean_squared_error

Using TensorFlow backend.


In [45]:
from sqlalchemy import create_engine
engine = create_engine('mysql+pymysql://iss:6Jg3bwm56xtJ2mrfNQwvsaY$@idm5peipdsus5o.crcvo0yw3sz7.ap-southeast-1.rds.amazonaws.com:3306/iss_project')

In [3]:
import numpy as np
 
# split a univariate sequence into samples
def split_sequence(sequence, n_steps):
    X, y = list(), list()
    for i in range(len(sequence)):
        # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the sequence
        if end_ix > len(sequence)-1:
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

In [4]:
# load dataset from DB
df = pd.read_sql_table('orderproducts_top20', engine)
prod_monthly = pd.crosstab(df['order_date'], df['product_sku']).resample('M').sum()
prod_monthly = prod_monthly['2018-01':'2021-03']
items = prod_monthly.columns

In [5]:
items

Index(['EFX-FLY-BLK', 'M80-2B-BLK', 'M80-2G-BLK', 'M80-AC-BLK', 'M80-AD-BLK',
       'M80-BTY-BLK-L', 'M80-BTY-BLK-S', 'M80-EB-BLK', 'M80-EG-BLK',
       'M80-SEB-BLK', 'M80-SEG-ASH', 'M80-SEG-BLK', 'M80-TICK-V2-BLK',
       'M80-TOUR-V2-BLK', 'M80-VAD-BLK', 'M80-VEB-BLK', 'M80-VEB-GRY',
       'M80-VEG-BLK', 'M80-VEG-GRY', 'M80-VHB-BLK'],
      dtype='object', name='product_sku')

### CNN-LSTM

In [6]:
def cnn_lstm(raw_seq):
    n_steps = 4
    # split into samples
    X, y = split_sequence(raw_seq, n_steps)
    # reshape from [samples, timesteps] into [samples, subsequences, timesteps, features]
    n_features = 1
    n_seq = 2
    n_steps = 2
    X = X.reshape((X.shape[0], n_seq, n_steps, n_features))

    # define model
    model = Sequential()
    model.add(TimeDistributed(Conv1D(filters=64, kernel_size=1, activation='relu'), input_shape=(None, n_steps, n_features)))
    model.add(TimeDistributed(MaxPooling1D(pool_size=2)))
    model.add(TimeDistributed(Flatten()))
    model.add(LSTM(50, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    # fit model
    model.fit(X, y, epochs=2000, verbose=0)
    
    return model

In [41]:
start_time = time.time()
results = pd.DataFrame()
for item in items:
    raw_seq = prod_monthly[item]
    model   = cnn_lstm(raw_seq)
    data    = raw_seq[-7:]
    
    preds, y_test = [], []
    for i in range(3):
        X_input = np.array(data[i:i+4])
        X_input = X_input.reshape((1, 2, 2, 1))
        pred = float(model.predict(X_input))
        adj_pred = 0 if pred < 0 else round(pred)
        preds.append(adj_pred)
        y_test.append(data[i+4])
    item_name = [item for x in range(3)]
    rmse = round(np.sqrt(mean_squared_error(y_test, preds)),2)
    diff = abs(sum(np.array(y_test) - preds))
    res = pd.DataFrame(zip(item_name, np.array(y_test), np.array(preds)), 
                       index=['m+1','m+2','m+3'], columns=['item', 'test', 'predict'])
    res['rmse'] = ''
    res.loc['m+1', 'rmse'] = rmse
    res['abs(diff)'] = ''
    res.loc['m+1', 'abs(diff)'] = diff
    results = pd.concat([results,res], axis=0)

print(results)
end_time = time.time()
print('time taken :', round(end_time-start_time,1),'secs')

                item  test  predict  rmse abs(diff)
m+1      EFX-FLY-BLK     2        2  1.29         1
m+2      EFX-FLY-BLK     2        4                
m+3      EFX-FLY-BLK     6        5                
m+1       M80-2B-BLK     1        1  0.58         1
m+2       M80-2B-BLK     1        1                
m+3       M80-2B-BLK     2        1                
m+1       M80-2G-BLK     5        5  2.38         3
m+2       M80-2G-BLK     4        5                
m+3       M80-2G-BLK     9        5                
m+1       M80-AC-BLK     0        0   0.0         0
m+2       M80-AC-BLK     0        0                
m+3       M80-AC-BLK     3        3                
m+1       M80-AD-BLK     3        3  1.29         3
m+2       M80-AD-BLK     1        2                
m+3       M80-AD-BLK     0        2                
m+1    M80-BTY-BLK-L     3        3  0.58         1
m+2    M80-BTY-BLK-L     1        2                
m+3    M80-BTY-BLK-L     2        2                
m+1    M80-B

In [42]:
results.to_csv('../data-processed/top20forecasts_LSTM_01to03.csv')

#### Writing to database

results  = results.reset_index().rename(columns={'index':'month'})
results.to_sql(name='top20forecasts_LSTM', con=engine, if_exists = 'replace', index=False)

#### Predicting for Apr-21

In [10]:
start_time = time.time()
results = pd.DataFrame()
for item in items:
    raw_seq = prod_monthly[item]
    model   = cnn_lstm(raw_seq)
    data    = raw_seq[-4:]

    
    preds, y_test = [], []
    for i in range(1):
        X_input = np.array(data[i:i+4])
        X_input = X_input.reshape((1, 2, 2, 1))
        pred = float(model.predict(X_input))
        adj_pred = 0 if pred < 0 else round(pred)
        preds.append(adj_pred)
        #y_test.append(data[i+4])
    item_name = [item for x in range(3)]
    res = pd.DataFrame(zip(np.array(preds), item_name), 
                   index=['Apr-21'], columns=['forecast', 'item'])    
    #res = pd.DataFrame(zip(np.array(y_test), np.array(preds), item_name), 
    #               index=['m+1','m+2','m+3', 'm+4'], columns=['test', 'predict', 'item'])
    results = pd.concat([results,res], axis=0)
end_time = time.time()
print('time taken :', round(end_time-start_time,1),'secs')
print(results)

time taken : 929.4 secs
        forecast             item
Apr-21         3      EFX-FLY-BLK
Apr-21         1       M80-2B-BLK
Apr-21         4       M80-2G-BLK
Apr-21         3       M80-AC-BLK
Apr-21         3       M80-AD-BLK
Apr-21         2    M80-BTY-BLK-L
Apr-21         3    M80-BTY-BLK-S
Apr-21         0       M80-EB-BLK
Apr-21         3       M80-EG-BLK
Apr-21         0      M80-SEB-BLK
Apr-21         2      M80-SEG-ASH
Apr-21         1      M80-SEG-BLK
Apr-21         3  M80-TICK-V2-BLK
Apr-21         1  M80-TOUR-V2-BLK
Apr-21         3      M80-VAD-BLK
Apr-21         2      M80-VEB-BLK
Apr-21         1      M80-VEB-GRY
Apr-21         4      M80-VEG-BLK
Apr-21         2      M80-VEG-GRY
Apr-21         0      M80-VHB-BLK


In [28]:
results.to_csv('../data-processed/top20forecasts_LSTM_Apr-21.csv')