# Deep Learning for Time Series Store forecast 
* By Alex Dance https://www.linkedin.com/in/alex-dance/
* This notebook is one of several notebooks for a project to improve store and product forecasts
1.	EDA – Exploratory Data Analysis – includes working with annual forecasts
2.	Main Modelling
3.	XG Boost modelling by Month
4.	Weighted average
5.	ARIMA – Month and Other Modelling
6.	Deep Learning


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime

In [None]:
import tensorflow as tf
from tensorflow import keras
from pylab import rcParams

In [None]:
class color:  # Testing to make the heading look a liitle more impressive
   BOLD = '\033[1m'

In [None]:
from sklearn.metrics import mean_squared_error , mean_absolute_error

In [None]:
df = pd.read_csv("../input/demand-forecasting-kernels-only/train.csv")
df.head()

# Getting Data Ready

In [None]:
df['date'] =  pd.to_datetime(df['date'])

In [None]:
df = df.set_index('date')

In [None]:
df.head()

In [None]:
df_1_1 = df[(df.store==1) & (df.item==1)] 

In [None]:
Deep1_all = df_1_1.resample('D')['sales'].sum()

In [None]:
#Deep1_all = df.resample('D')['sales'].sum()  # this is of doing the forecast on the total dataset by day

In [None]:
Deep1_all.head()

# The next few lines are to ensure can match the index to the date so can compare the predictions with an actual date

In [None]:
Deep1_all_With_index = Deep1_all.copy()

In [None]:
Deep1_all_With_index =Deep1_all_With_index.reset_index()

In [None]:
Deep1_all_With_index.head()

In [None]:
Deep1_all_With_index.head()

In [None]:
Deep1 = Deep1_all_With_index.drop(['date'], axis = 1)

In [None]:
Deep1['sales'] = Deep1['sales'].astype('float32')

In [None]:
Deep1.info()

In [None]:
values = Deep1.values

In [None]:
print(values)

In [None]:
values = values.astype('float32')

In [None]:
Deep1.shape

In [None]:
train_size = int(len(Deep1) -376) # This is 366 days of the year + 10 days of extra data beforehand
test_size = len(Deep1) - train_size
train, test = Deep1.iloc[0:train_size], Deep1.iloc[train_size:len(Deep1)]
print(len(train), len(test))

In [None]:
print(test)

In [None]:
print(train)

In [None]:
train.head()

In [None]:
# One of the most difficult parts of Deep Learning modelling is to get the dataset in the right format 
# This function completes that proces

def create_dataset(X, y, time_steps=1):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        v = X.iloc[i:(i + time_steps)].values
        Xs.append(v)
        ys.append(y.iloc[i + time_steps])
        #print(Xs[-1], ys[-1])  
    return np.array(Xs), np.array(ys)

In [None]:
test.shape

In [None]:
# These next few lines are about getting the data ready for modelling
time_steps = 10

# reshape to [samples, time_steps, n_features]
X_train, y_train = create_dataset(train, train.sales, time_steps)

#X_train_c, y_train_c = create_dataset(X_train_c_a, y_train_c_a, time_steps)
X_test, y_test = create_dataset(test, test.sales, time_steps)

In [None]:
print(X_train.shape, y_train.shape)
# Note the 3 dimensional shape

In [None]:
len(X_test)


In [None]:

deep_model = keras.Sequential()
deep_model.add(keras.layers.LSTM(
  units=128,
  input_shape=(X_train.shape[1], X_train.shape[2])
))
deep_model.add(keras.layers.Dense(units=2))
deep_model.add(keras.layers.Dense(units=1))

deep_model.compile(
  loss='mse',
  optimizer=keras.optimizers.Adam(0.001)) # was 0.001

In [None]:
history = deep_model.fit(
    X_train, y_train,
    epochs=30,
    batch_size=16,
    validation_split=0.1,
    verbose=1,
    shuffle=False
)

In [None]:
print(y_train)

In [None]:
y_pred = deep_model.predict(X_test)

# Now Looking at the data

In [None]:
print(y_pred)

In [None]:
X_test.shape

In [None]:
print(X_test)

In [None]:
print(y_pred)

In [None]:
print(X_test)

In [None]:
Results =[]

In [None]:
Results = pd.DataFrame( columns=['sales','pred'])

In [None]:
Results['sales'] = test['sales']

In [None]:
Results.head()

# Getting the data ready to compare and ensuring can see by actual date rather then an index

In [None]:
y_pred_df = pd.DataFrame(y_pred, columns=['pred'])

In [None]:
Results = Results[10:]  # As the Deep Learning process added the forst 10 dates I had to drop the first 10 rows, so the first result was 1 Jan 2017

In [None]:
y_pred_df.head()

In [None]:
Results= Results.reset_index() 

In [None]:
Results.head(10)

In [None]:
Results ['pred'] = y_pred_df['pred']  

In [None]:
Results = Results.set_index('index')

In [None]:
Results =Results.drop (['sales'],axis=1)

In [None]:
New_Results = pd.concat([Results, Deep1_all_With_index], axis=1)

In [None]:
New_Results.head() ##### GOOD

In [None]:
New_Results.tail() ##### GOOD

In [None]:
Results_with_date_2017 = New_Results[(New_Results.date>'2016-12-31')]

In [None]:
Results_with_date_2017.head()

In [None]:
RMSE_Deep  = np.mean(np.sqrt((Results_with_date_2017['pred'] - Results_with_date_2017['sales']) ** 2)) 
print(RMSE_Deep)

In [None]:
# Note this compares to 4.009 from XG boost for the same data period

# Plotting

In [None]:
_ = Results_with_date_2017[['sales','pred']].plot(figsize=(15, 5))

In [None]:
Results_with_date_Jan_2017 =Results_with_date_2017[(New_Results.date<'2017-02-01')]

In [None]:
_ = Results_with_date_Jan_2017[['sales','pred']].plot(figsize=(15, 5))