# From
https://machinelearningmastery.com/how-to-develop-lstm-models-for-time-series-forecasting/

In [1]:
from numpy import array
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense

import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import datetime
from dateutil.relativedelta import relativedelta

In [2]:
# function to split a univariate sequence into samples
def split_sequence(sequence, n_steps):
    X, y = list(), list()
    for i in range(len(sequence)):
        # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the sequence
        if end_ix > len(sequence)-1:
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)

In [3]:
df_sales = pd.read_csv('sales_train_v2.csv')
# correct the date
df_sales['date'] = pd.to_datetime(df_sales['date'], dayfirst=True)
# to handle months with no data, first create an empty dataframe with all the dateblocks
df_sales_month_empty = pd.DataFrame({'date_block_num':np.arange(34)})
# get the monthly item count
df_sales_month = df_sales.loc[(df_sales['shop_id']==59)].groupby(['date_block_num'])['item_cnt_day'].sum().reset_index(name='item_tot_month')
# merge into the empty dataframe to have 0's for months with no sales
df_sales_month = pd.merge(df_sales_month_empty,df_sales_month, how='left', on='date_block_num').fillna(0)
df_sales_month

Unnamed: 0,date_block_num,item_tot_month
0,0,2017.0
1,1,1897.0
2,2,2028.0
3,3,1388.0
4,4,1374.0
5,5,1707.0
6,6,1747.0
7,7,2048.0
8,8,2008.0
9,9,1751.0


In [4]:
# define the raw input sequence
raw_seq = df_sales_month['item_tot_month'].to_list()
# choose a number of time steps
n_steps = 5
# split into samples
X, y = split_sequence(raw_seq, n_steps)
n_train = len(raw_seq) - n_steps
n_train

29

In [5]:
# reshape from [samples, timesteps] into [samples, timesteps, features]
n_features = 1
X = X.reshape((X.shape[0], X.shape[1], n_features))

In [6]:
# define model
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(n_steps, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

In [7]:
# fit model
model.fit(X, y, epochs=200, verbose=0)

<tensorflow.python.keras.callbacks.History at 0x7fa26be10400>

In [8]:
# predict using the last n_steps inputs
x_input = array(raw_seq[n_train:])
x_input = x_input.reshape((1, n_steps, n_features))

In [9]:
# print the prediction for November 2015
yhat = model.predict(x_input, verbose=0)
print(yhat)

[[968.5896]]
