In [1]:
import numpy as np
from tensorflow import set_random_seed
from numpy.random import seed
import pandas as pd
#import matplotlib.pyplot as plt
from keras import optimizers
from keras.callbacks import ModelCheckpoint
from keras.utils import plot_model
from keras.models import Sequential, Model
from keras.layers.convolutional import Conv1D, MaxPooling1D
from keras.layers import Dense, LSTM, RepeatVector, TimeDistributed, Flatten, Dropout
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from datetime import datetime
from datetime import timedelta

Using TensorFlow backend.


In [2]:
#dataset_empty = 0
set_random_seed(1)
seed(1)

In [3]:
#Convert the train data into time series
def time_series_data(data, window, lag):
    dropnan = True
    cols, names = list(), list()

    for i in range(window, 0, -1):
        #past time series data (t-)
        cols.append(data.shift(i))
        names = names + [('%s(t-%d)' % (col, i)) for col in data.columns]
    
    #current time series data (t = 0)
    cols.append(data)
    names = names + [('%s(t)' % (col)) for col in data.columns]
    
    #future data (t + lag)
    cols.append(data.shift(-lag))
    names = names + [('%s(t+%d)' % (col, lag)) for col in data.columns]
    
    #all data
    all_data = pd.concat(cols, axis=1)
    all_data.columns = names
    
    #drops rows with NaN
    if dropnan:
        all_data.dropna(inplace=True)
    return all_data

In [8]:
dataset = pd.read_csv('sales data-set.csv', usecols = ['Store', 'Item', 'Date', 'Weekly_Sales'])
train = dataset[['Store', 'Item', 'Date', 'Weekly_Sales']]
#train = train.loc[(train['Store'] == 1) & (train['Item'] == 1)]
#train = train[(train['Date'] >= '2012-01-01')]
train = train.loc[train['Store'] == 1]

print(train.head())

   Store  Item        Date  Weekly_Sales
0      1     1  2010-02-07      24924.50
1      1     2  2010-02-07      50605.27
2      1     3  2010-02-07      13740.12
3      1     4  2010-02-07      39954.04
4      1     5  2010-02-07      32229.38


In [9]:
#Re-arranges the train dataset to apply shift methods
train_r = train.sort_values('Date').groupby(['Item', 'Store', 'Date'], as_index=False)
train_r = train_r.agg({'Weekly_Sales':['mean']})
train_r.columns = ['Item', 'Store', 'Date', 'Weekly_Sales']
#cols_to_drop = ['Date']
#train_r.drop(cols_to_drop, axis=1, inplace=True)
train_r.head()

Unnamed: 0,Item,Store,Date,Weekly_Sales
0,1,1,2010-02-07,24924.5
1,1,1,2010-02-14,46039.49
2,1,1,2010-02-21,41595.55
3,1,1,2010-02-28,19403.54
4,1,1,2010-03-07,21827.9


In [10]:
train_r = train_r.groupby(['Date'], as_index=False)['Weekly_Sales'].sum()
train_r.head()

Unnamed: 0,Date,Weekly_Sales
0,2010-02-07,1643690.9
1,2010-02-14,1641957.44
2,2010-02-21,1611968.17
3,2010-02-28,1409727.59
4,2010-03-07,1554806.68


In [11]:
#weekly sales in 10 million dollars
train_r['Weekly_Sales'] = round(train_r['Weekly_Sales'] / 1000000, 2)
train_r.head()

Unnamed: 0,Date,Weekly_Sales
0,2010-02-07,1.64
1,2010-02-14,1.64
2,2010-02-21,1.61
3,2010-02-28,1.41
4,2010-03-07,1.55


In [12]:
#the model will use last 117 weekly sales data and 
#current timestep (7 days) to forecast next weekly sales data 12 weeks ahead

#the model will use last 29 weekly sales data and 
#current timestep (7 days) to forecast next weekly sales data 4 weeks ahead

window = 117
lag = 12

series_data = time_series_data(train_r, window, lag)
series_data.head()

Unnamed: 0,Date(t-117),Weekly_Sales(t-117),Date(t-116),Weekly_Sales(t-116),Date(t-115),Weekly_Sales(t-115),Date(t-114),Weekly_Sales(t-114),Date(t-113),Weekly_Sales(t-113),...,Date(t-3),Weekly_Sales(t-3),Date(t-2),Weekly_Sales(t-2),Date(t-1),Weekly_Sales(t-1),Date(t),Weekly_Sales(t),Date(t+12),Weekly_Sales(t+12)
117,2010-02-07,1.64,2010-02-14,1.64,2010-02-21,1.61,2010-02-28,1.41,2010-03-07,1.55,...,2012-04-15,1.62,2012-04-22,1.52,2012-04-29,1.47,2012-05-06,1.68,2012-07-29,1.44
118,2010-02-14,1.64,2010-02-21,1.61,2010-02-28,1.41,2010-03-07,1.55,2010-03-14,1.44,...,2012-04-22,1.52,2012-04-29,1.47,2012-05-06,1.68,2012-05-13,1.61,2012-08-05,1.63
119,2010-02-21,1.61,2010-02-28,1.41,2010-03-07,1.55,2010-03-14,1.44,2010-03-21,1.47,...,2012-04-29,1.47,2012-05-06,1.68,2012-05-13,1.61,2012-05-20,1.6,2012-08-12,1.59
120,2010-02-28,1.41,2010-03-07,1.55,2010-03-14,1.44,2010-03-21,1.47,2010-03-28,1.4,...,2012-05-06,1.68,2012-05-13,1.61,2012-05-20,1.6,2012-05-27,1.56,2012-08-19,1.6
121,2010-03-07,1.55,2010-03-14,1.44,2010-03-21,1.47,2010-03-28,1.4,2010-04-04,1.59,...,2012-05-13,1.61,2012-05-20,1.6,2012-05-27,1.56,2012-06-03,1.62,2012-08-26,1.49


In [13]:
future_dates = series_data[['Date(t+%d)' % lag]]
future_dates

Unnamed: 0,Date(t+12)
117,2012-07-29
118,2012-08-05
119,2012-08-12
120,2012-08-19
121,2012-08-26
122,2012-09-02
123,2012-09-09
124,2012-09-16
125,2012-09-23
126,2012-09-30


In [14]:
#drops last record of (t + lag)
#last_record_item = 'Item(t-%d)' % window
#last_record_store = 'Store(t-%d)' % window
#series_data = series_data[(series_data['Item(t)'] == series_data[last_record_item])]
#series_data = series_data[(series_data['Store(t)'] == series_data[last_record_store])]

#drops Item and Store columns
cols_to_drop = [('%s(t+%d)' % (col, lag)) for col in ['Date']]
for i in range(window, 0, -1):
    cols_to_drop += [('%s(t-%d)' % (col, i)) for col in ['Date']]

series_data.drop(cols_to_drop, axis=1, inplace=True)
series_data.drop(['Date(t)'], axis=1, inplace=True)

lbls_col = 'Weekly_Sales(t+%d)' % lag
lbls = series_data[lbls_col]
series_data = series_data.drop(lbls_col, axis=1)

series_data.head()

Unnamed: 0,Weekly_Sales(t-117),Weekly_Sales(t-116),Weekly_Sales(t-115),Weekly_Sales(t-114),Weekly_Sales(t-113),Weekly_Sales(t-112),Weekly_Sales(t-111),Weekly_Sales(t-110),Weekly_Sales(t-109),Weekly_Sales(t-108),...,Weekly_Sales(t-9),Weekly_Sales(t-8),Weekly_Sales(t-7),Weekly_Sales(t-6),Weekly_Sales(t-5),Weekly_Sales(t-4),Weekly_Sales(t-3),Weekly_Sales(t-2),Weekly_Sales(t-1),Weekly_Sales(t)
117,1.64,1.64,1.61,1.41,1.55,1.44,1.47,1.4,1.59,1.55,...,1.69,1.68,1.68,1.51,1.65,1.9,1.62,1.52,1.47,1.68
118,1.64,1.61,1.41,1.55,1.44,1.47,1.4,1.59,1.55,1.47,...,1.68,1.68,1.51,1.65,1.9,1.62,1.52,1.47,1.68,1.61
119,1.61,1.41,1.55,1.44,1.47,1.4,1.59,1.55,1.47,1.39,...,1.68,1.51,1.65,1.9,1.62,1.52,1.47,1.68,1.61,1.6
120,1.41,1.55,1.44,1.47,1.4,1.59,1.55,1.47,1.39,1.43,...,1.51,1.65,1.9,1.62,1.52,1.47,1.68,1.61,1.6,1.56
121,1.55,1.44,1.47,1.4,1.59,1.55,1.47,1.39,1.43,1.6,...,1.65,1.9,1.62,1.52,1.47,1.68,1.61,1.6,1.56,1.62


In [15]:
series_data.shape[0]

14

In [16]:
#train and test split

X_train_1, X_test_1, Y_train_1, Y_test_1 = train_test_split(series_data, lbls.values, test_size=0.1, random_state=0)
X_train = X_train_1
Y_train = Y_train_1

X_train_1, X_test_1, Y_train_1, Y_test_1 = train_test_split(series_data, lbls.values, test_size=0.5, random_state=0)
X_test = X_test_1
Y_test = Y_test_1

print('Train dataset:', X_train.shape)
print('Test dataset:', X_test.shape)
X_train.head()

Train dataset: (12, 118)
Test dataset: (7, 118)


Unnamed: 0,Weekly_Sales(t-117),Weekly_Sales(t-116),Weekly_Sales(t-115),Weekly_Sales(t-114),Weekly_Sales(t-113),Weekly_Sales(t-112),Weekly_Sales(t-111),Weekly_Sales(t-110),Weekly_Sales(t-109),Weekly_Sales(t-108),...,Weekly_Sales(t-9),Weekly_Sales(t-8),Weekly_Sales(t-7),Weekly_Sales(t-6),Weekly_Sales(t-5),Weekly_Sales(t-4),Weekly_Sales(t-3),Weekly_Sales(t-2),Weekly_Sales(t-1),Weekly_Sales(t)
121,1.55,1.44,1.47,1.4,1.59,1.55,1.47,1.39,1.43,1.6,...,1.65,1.9,1.62,1.52,1.47,1.68,1.61,1.6,1.56,1.62
128,1.39,1.43,1.6,1.49,1.4,1.43,1.62,1.54,1.5,1.42,...,1.6,1.56,1.62,1.7,1.63,1.53,1.54,1.77,1.53,1.5
119,1.61,1.41,1.55,1.44,1.47,1.4,1.59,1.55,1.47,1.39,...,1.68,1.51,1.65,1.9,1.62,1.52,1.47,1.68,1.61,1.6
130,1.6,1.49,1.4,1.43,1.62,1.54,1.5,1.42,1.49,1.55,...,1.62,1.7,1.63,1.53,1.54,1.77,1.53,1.5,1.44,1.63
126,1.55,1.47,1.39,1.43,1.6,1.49,1.4,1.43,1.62,1.54,...,1.68,1.61,1.6,1.56,1.62,1.7,1.63,1.53,1.54,1.77


In [17]:
X_train_series = X_train.values.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test_series = X_test.values.reshape((X_test.shape[0], X_test.shape[1], 1))
print('Train dataset:', X_train_series.shape)
print('Test dataset:', X_test_series.shape)

Train dataset: (12, 118, 1)
Test dataset: (7, 118, 1)


In [18]:
#CNN-LSTM
subsequences = 2
time_steps = X_train_series.shape[1] // subsequences
X_train_series_sub = X_train_series.reshape((X_train_series.shape[0], subsequences, time_steps, 1))
X_test_series_sub = X_test_series.reshape((X_test_series.shape[0], subsequences, time_steps, 1))
print('Train dataset:', X_train_series_sub.shape)
print('Test dataset:', X_test_series_sub.shape)

Train dataset: (12, 2, 59, 1)
Test dataset: (7, 2, 59, 1)


In [19]:
epochs = 2000
batch = 32
learning_rate = 0.00000001
adam = optimizers.Adam(learning_rate)

In [21]:
model_cnn_lstm = Sequential()
model_cnn_lstm.add(TimeDistributed(Conv1D(filters = 64, kernel_size = 1, activation = 'relu'), input_shape = (None, X_train_series_sub.shape[2], X_train_series_sub.shape[3])))
model_cnn_lstm.add(TimeDistributed(MaxPooling1D(pool_size=2)))
model_cnn_lstm.add(TimeDistributed(Flatten()))
model_cnn_lstm.add(LSTM(60, activation = 'relu'))
model_cnn_lstm.add(Dropout(0.1))
#model_cnn_lstm.add(Dense(100, activation='relu'))
#model_cnn_lstm.add(Dropout(0.4))
#model_cnn_lstm.add(Dense(50, activation='relu'))
#model_cnn_lstm.add(Dropout(0.3))
#model_cnn_lstm.add(Dense(32, activation='relu'))
#model_cnn_lstm.add(Dense(16, activation='relu'))
model_cnn_lstm.add(Dense(30, activation = 'relu'))
model_cnn_lstm.add(Dropout(0.1))
model_cnn_lstm.add(Dense(1))
model_cnn_lstm.compile(loss = 'mse', optimizer = 'adam')
model_cnn_lstm.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
time_distributed_4 (TimeDist (None, None, 59, 64)      128       
_________________________________________________________________
time_distributed_5 (TimeDist (None, None, 29, 64)      0         
_________________________________________________________________
time_distributed_6 (TimeDist (None, None, 1856)        0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 60)                460080    
_________________________________________________________________
dropout_3 (Dropout)          (None, 60)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 30)                1830      
_________________________________________________________________
dropout_4 (Dropout)          (None, 30)                0         
__________

In [22]:
file_name = "cnn_lstm_weights_store_1.hdf5"    #saves file to create checkpoints for each epoch
checkpoint = ModelCheckpoint(file_name, monitor = 'loss', verbose = 1, save_best_only = True, mode = 'min')
callbacks_list = [checkpoint]

print('Please wait...')

cnn_lstm = model_cnn_lstm.fit(X_train_series_sub, Y_train, validation_data = (X_test_series_sub, Y_test), epochs = epochs, verbose = 1, callbacks = callbacks_list, batch_size = batch)

Please wait...


W0808 00:57:37.611335 13880 deprecation.py:323] From C:\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
W0808 00:57:38.982346 13880 deprecation_wrapper.py:119] From C:\Anaconda3\envs\tensorflow\lib\site-packages\keras\backend\tensorflow_backend.py:986: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead.



Train on 12 samples, validate on 7 samples
Epoch 1/2000

Epoch 00001: loss improved from inf to 2.18303, saving model to cnn_lstm_weights_store_1.hdf5
Epoch 2/2000

Epoch 00002: loss improved from 2.18303 to 1.47561, saving model to cnn_lstm_weights_store_1.hdf5
Epoch 3/2000

Epoch 00003: loss improved from 1.47561 to 0.97181, saving model to cnn_lstm_weights_store_1.hdf5
Epoch 4/2000

Epoch 00004: loss improved from 0.97181 to 0.15412, saving model to cnn_lstm_weights_store_1.hdf5
Epoch 5/2000

Epoch 00005: loss improved from 0.15412 to 0.12033, saving model to cnn_lstm_weights_store_1.hdf5
Epoch 6/2000

Epoch 00006: loss did not improve from 0.12033
Epoch 7/2000

Epoch 00007: loss did not improve from 0.12033
Epoch 8/2000

Epoch 00008: loss did not improve from 0.12033
Epoch 9/2000

Epoch 00009: loss did not improve from 0.12033
Epoch 10/2000

Epoch 00010: loss improved from 0.12033 to 0.09253, saving model to cnn_lstm_weights_store_1.hdf5
Epoch 11/2000

Epoch 00011: loss did not imp

In [23]:
#load from saved file
file_name = "cnn_lstm_weights_store_1.hdf5"

model_cnn_lstm.load_weights(file_name)
model_cnn_lstm.compile(loss = 'mse', optimizer = 'adam')

#prediction
cnn_lstm_train_prediction = model_cnn_lstm.predict(X_train_series_sub)
cnn_lstm_test_prediction = model_cnn_lstm.predict(X_test_series_sub)

cnn_lstm_train_prediction

array([[1.4278002],
       [1.4880353],
       [1.5069249],
       [1.4157519],
       [1.4028581],
       [1.478154 ],
       [1.4242017],
       [1.5142789],
       [1.4765154],
       [1.4063959],
       [1.4478455],
       [1.4140166]], dtype=float32)

In [24]:
print('Train Error (RMSE):', np.sqrt(mean_squared_error(Y_train, cnn_lstm_train_prediction)))
print('Test Error (RMSE):', np.sqrt(mean_squared_error(Y_test, cnn_lstm_test_prediction)))

Train Error (RMSE): 0.10162008847270289
Test Error (RMSE): 0.11135429285840663


In [None]:
#save to csv
date_format = "%Y-%m-%d"
a = datetime.strptime(train['Date'].max(), date_format)
a = a + timedelta(7)

next_date = str(datetime.date(a))
next_sales = np.round(cnn_lstm_train_prediction[len(cnn_lstm_train_prediction) - 1], 2)

dataset = dataset.append(pd.DataFrame({'Store' : '1',
                                       'Item' : '1',
                                       'Date' : next_date,
                                       'Weekly_Sales' : next_sales}))
dataset.tail()

In [None]:
dataset.to_csv('sales data-set.csv', index = False)