In [None]:
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('canretail.csv',
                 skiprows=6, skipfooter=9,
                 engine='python')
df.head()

In [None]:
from pandas.tseries.offsets import MonthEnd

In [None]:
# to create time series data. we use pandas date type instead of string
df['Adjustments'] = pd.to_datetime(df['Adjustments']) + MonthEnd(1)
df = df.set_index('Adjustments')
df.head()

In [None]:
df.plot()

In [None]:
split_date = pd.Timestamp('01-01-2011')

In [None]:
train = df.loc[:split_date, ['Unadjusted']]
test = df.loc[split_date:, ['Unadjusted']]

In [None]:
ax = train.plot()
test.plot(ax=ax)
plt.legend(['train', 'test'])

In [None]:
from sklearn.preprocessing import MinMaxScaler

sc = MinMaxScaler()

# why do we normalize the data independently?
# use used to normalize before splitting...
train_sc = sc.fit_transform(train)
test_sc = sc.transform(test)

In [None]:
# given h, predict h+1
train_sc[:4]

In [None]:
X_train = train_sc[:-1]
y_train = train_sc[1:]

X_test = test_sc[:-1]
y_test = test_sc[1:]

# Let's try an MLP

In [None]:
from keras.models import Sequential
from keras.layers import Dense
import keras.backend as K
from keras.callbacks import EarlyStopping

In [None]:
K.clear_session()

model = Sequential()
model.add(Dense(12, input_dim=1, activation='relu'))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.summary()

In [None]:
early_stop = EarlyStopping(monitor='loss', patience=1, verbose=1)


In [None]:
model.fit(X_train, y_train, epochs=200,
          batch_size=2, verbose=1,
          callbacks=[early_stop])

In [None]:
y_pred = model.predict(X_test)


In [None]:
plt.plot(y_test, color='g') # real
plt.plot(y_pred, color='b') # predicted
# predicts the previous value, and actually not well
# its just saying, when my number is x, what should my output be?

# RNN

In [None]:
from keras.layers import LSTM


In [None]:
X_train.shape


In [None]:
#3D tensor with shape (batch_size, timesteps, input_dim)
X_train[:, None].shape

In [None]:
X_train_t = X_train[:, None]
X_test_t = X_test[:, None]

In [None]:
K.clear_session()
model = Sequential()

model.add(LSTM(6, input_shape=(1, 1)))

model.add(Dense(1))

model.compile(loss='mean_squared_error', optimizer='adam')

In [None]:
model.fit(X_train_t, y_train,
          epochs=100, batch_size=1, verbose=1,
          callbacks=[early_stop])

In [None]:
# its actually not better!
# recalls the last values a bit better, but still no overlap
y_pred = model.predict(X_test_t)
plt.plot(y_test)
plt.plot(y_pred)

# With Window

In [None]:
train_sc.shape


In [None]:
train_sc_df = pd.DataFrame(train_sc, columns=['Scaled'], index=train.index)
test_sc_df = pd.DataFrame(test_sc, columns=['Scaled'], index=test.index)
train_sc_df.head()


In [None]:
for s in range(1, 13):
    train_sc_df['shift_{}'.format(s)] = train_sc_df['Scaled'].shift(s)
    test_sc_df['shift_{}'.format(s)] = test_sc_df['Scaled'].shift(s)

In [None]:
# same data but we shift the data down by 1 at each column
train_sc_df.head(13)


In [None]:
X_train = train_sc_df.dropna().drop('Scaled', axis=1)
y_train = train_sc_df.dropna()[['Scaled']]

X_test = test_sc_df.dropna().drop('Scaled', axis=1)
y_test = test_sc_df.dropna()[['Scaled']]

In [None]:
X_train.head()


In [None]:
X_train.shape


In [None]:
X_train = X_train.values
X_test= X_test.values

y_train = y_train.values
y_test = y_test.values

## MLP with Window

In [None]:
K.clear_session()

model = Sequential()
model.add(Dense(12, input_dim=12, activation='relu'))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.summary()

In [None]:
model.fit(X_train, y_train, epochs=200,
          batch_size=1, verbose=1, callbacks=[early_stop])


In [None]:

y_pred = model.predict(X_test)
plt.plot(y_test)
plt.plot(y_pred)

In [None]:
# this is passing 12 months all at once, later we will do something better
X_train_t = X_train.reshape(X_train.shape[0], 1, 12)
X_test_t = X_test.reshape(X_test.shape[0], 1, 12)

In [None]:
X_train_t.shape

In [None]:

K.clear_session()
model = Sequential()

model.add(LSTM(6, input_shape=(1, 12)))

model.add(Dense(1))

model.compile(loss='mean_squared_error', optimizer='adam')

In [None]:
model.summary()


In [None]:
model.fit(X_train_t, y_train, epochs=100,
          batch_size=1, verbose=1, callbacks=[early_stop])

In [None]:
y_pred = model.predict(X_test_t)
plt.plot(y_test)
plt.plot(y_pred)