In [53]:
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import TimeDistributed
from keras.layers import LSTM
from keras.layers import Activation
import math
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import numpy as np

%matplotlib inline
plt.rcParams['figure.figsize'] = (16, 10)

In [29]:
# TBD different random seeds
np.random.seed(7)

### On statefulness

Making a RNN stateful means that the states for the samples of each batch will be reused as initial states for the samples in the next batch.

When using stateful RNNs, it is therefore assumed that:

- all batches have the same number of samples
- if X1 and X2 are successive batches of samples, then X2[i] is the follow-up sequence to X1[i], for every i.

Notes that the methods predict, fit, train_on_batch, predict_classes, etc. will all update the states of the stateful layers in a model. This allows you to do not only stateful training, but also stateful prediction.




In [72]:
# whether to use LSTM or MLP
use_LSTM = True

# number of features used in the regression (for MLP)
mlp_num_features = 10
#

# predict several timesteps at once
lstm_predict_sequences = True
lstm_num_predictions = 5

# lstm_num_timesteps
lstm_num_timesteps = 5
# lstm_num_features
lstm_num_features = 1
# stateful?
lstm_stateful = False
# use two lstm layers?
lstm_stack_layers = False

# window_size
window_size = lstm_num_timesteps if use_LSTM else mlp_num_features

batch_size = 1
num_epochs = 10
# dimensionality of the output space
num_neurons = 4

# scale the dataset to values between scale_min and scale_max
scale = False
scale_min = -1
scale_max = 1
#scaler = MinMaxScaler(feature_range=(scale_min, scale_max))
scaler = StandardScaler()

In [31]:
# various test datasets
ts_train_lineartrend = np.arange(1,101, dtype='float64').reshape(-1,1)  
ts_test_lineartrend_outofrange = np.arange(101,121, dtype='float64').reshape(-1,1)
ts_test_lineartrend_withinrange = np.arange(21,41, dtype='float64').reshape(-1,1)


In [32]:
testname = 'linear_trend_within_range'
ts_train = ts_train_lineartrend
ts_test = ts_test_lineartrend_withinrange
ts_all = np.append(ts_train, ts_test).reshape(-1,1)
len_overall = len(ts_all)

In [33]:
len_overall

120

In [34]:
ts_all.dtype

dtype('float64')

In [35]:
ts_train.shape, ts_test.shape

((100, 1), (20, 1))

In [36]:
ts_train[:10]

array([[  1.],
       [  2.],
       [  3.],
       [  4.],
       [  5.],
       [  6.],
       [  7.],
       [  8.],
       [  9.],
       [ 10.]])

In [37]:
ts_test[:10]

array([[ 21.],
       [ 22.],
       [ 23.],
       [ 24.],
       [ 25.],
       [ 26.],
       [ 27.],
       [ 28.],
       [ 29.],
       [ 30.]])

In [38]:
if scale:
    ts_train = scaler.fit_transform(ts_train)
    ts_test = scaler.transform(ts_test)

In [39]:
ts_train[:10]

array([[  1.],
       [  2.],
       [  3.],
       [  4.],
       [  5.],
       [  6.],
       [  7.],
       [  8.],
       [  9.],
       [ 10.]])

In [40]:
ts_test[:10]

array([[ 21.],
       [ 22.],
       [ 23.],
       [ 24.],
       [ 25.],
       [ 26.],
       [ 27.],
       [ 28.],
       [ 29.],
       [ 30.]])

In [41]:
# convert an array of values into a dataset matrix
def create_dataset(dataset, window_size):
    dataX, dataY = [], []
    for i in range(len(dataset) - window_size):
        a = dataset[i:(i + window_size), 0]
        dataX.append(a)
        dataY.append(dataset[i + window_size, 0])
    return np.array(dataX), np.array(dataY)

In [42]:
if use_LSTM:
    X_train, y_train = create_dataset(ts_train, lstm_num_timesteps)
    X_test, y_test = create_dataset(ts_test, lstm_num_timesteps)
else:
    X_train, y_train = create_dataset(ts_train, mlp_num_features)
    X_test, y_test = create_dataset(ts_test, mlp_num_features)
    
# the train and test matrices end up shorter than the respective timeseries by window_size + 1!
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((95, 5), (15, 5), (95,), (15,))

In [43]:
X_train[:5,:]

array([[ 1.,  2.,  3.,  4.,  5.],
       [ 2.,  3.,  4.,  5.,  6.],
       [ 3.,  4.,  5.,  6.,  7.],
       [ 4.,  5.,  6.,  7.,  8.],
       [ 5.,  6.,  7.,  8.,  9.]])

In [44]:
y_train[:5]

array([  6.,   7.,   8.,   9.,  10.])

In [45]:
X_test[:5,:]

array([[ 21.,  22.,  23.,  24.,  25.],
       [ 22.,  23.,  24.,  25.,  26.],
       [ 23.,  24.,  25.,  26.,  27.],
       [ 24.,  25.,  26.,  27.,  28.],
       [ 25.,  26.,  27.,  28.,  29.]])

In [46]:
y_test[:5]

array([ 26.,  27.,  28.,  29.,  30.])

In [47]:
if use_LSTM:
    # reshape input to be [samples, time steps, features]
    X_train = np.reshape(X_train, (X_train.shape[0], lstm_num_timesteps, lstm_num_features))
    X_test = np.reshape(X_test, (X_test.shape[0], lstm_num_timesteps, lstm_num_features))

In [48]:
X_train.shape

(95, 5, 1)

In [76]:
model = Sequential()

# LSTM input shape
# (samples, time steps, features)
# LSTM output shape
# if return_sequences: 3D tensor with shape (batch_size, timesteps, units).
# else, 2D tensor with shape (batch_size, units).

if use_LSTM:
    
    print('LSTM')
    # the last state for each sample at index i in a batch will be used as initial state
    # for the sample of index i in the following batch
    if lstm_stateful:
        print('stateful')
        #
        if lstm_stack_layers:
            print('stack_layers')
            model.add(LSTM(num_neurons,
                       batch_input_shape=(batch_size, X_train.shape[1], X_train.shape[2]),
                       stateful = True,
                       return_sequences = True))
            print(model.output_shape)
            model.add(LSTM(num_neurons,
                       stateful = True))
            print(model.output_shape)
            model.add(Dense(1))
            print(model.output_shape)
            model.compile(loss='mean_squared_error', optimizer='adam')
            
        # 
        elif lstm_predict_sequences:
            print('predict_sequences')
            model.add(LSTM(num_neurons,
                       batch_input_shape=(batch_size, X_train.shape[1], X_train.shape[2]),
                       stateful = True,
                       return_sequences = True))
            print(model.output_shape)
            model.add(TimeDistributed(Dense(lstm_num_predictions)))
            print(model.output_shape)
            model.add(Activation("linear"))  
            model.compile(loss='mean_squared_error', optimizer='adam')
            
        #    
        else:
            print('predict single')
            model.add(LSTM(num_neurons,
                       batch_input_shape=(batch_size, X_train.shape[1], X_train.shape[2]),
                       stateful = True))
            print(model.output_shape)
            model.add(Dense(1))
            print(model.output_shape)
            model.compile(loss='mean_squared_error', optimizer='adam')
        
    

    # stateful == False    
    else: 
        print('stateless')
        
        if lstm_stack_layers:
            print('stack layers')
            # input_dim: dimensionality of the input (alternatively, input_shape)
            # required when using this layer as the first layer in a model
            model.add(LSTM(num_neurons, input_dim = lstm_num_features, return_sequences = True))
            print(model.output_shape)
            model.add(LSTM(num_neurons))
            print(model.output_shape)
            model.add(Dense(1))
            print(model.output_shape)
            model.compile(loss='mean_squared_error', optimizer='adam')
        # 
        # 
        elif lstm_predict_sequences:
            print('predict_sequences')
            model.add(LSTM(num_neurons,
                      #  input_dim = lstm_num_features,
                       batch_input_shape=(batch_size, X_train.shape[1], X_train.shape[2]), 
                       return_sequences = True))
            print(model.output_shape) 
            model.add(TimeDistributed(Dense(lstm_num_predictions)))
            print(model.output_shape) 
            model.compile(loss='mean_squared_error', optimizer='adam')
            
        else:
            print('predict single')
            model.add(LSTM(num_neurons, input_dim = lstm_num_features))
            print(model.output_shape) 
            model.add(Dense(1))
            print(model.output_shape) 
            model.compile(loss='mean_squared_error', optimizer='adam')
        
   

# feedforward
else:
    print('MLP')
    
    model.add(Dense(num_neurons, input_dim = mlp_num_features, activation='relu'))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')


LSTM
stateless
predict_sequences
(95, 5, 4)
(95, 5, 5)


In [77]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
lstm_15 (LSTM)                   (95, 5, 4)            96          lstm_input_15[0][0]              
____________________________________________________________________________________________________
timedistributed_14 (TimeDistribut(95, 5, 5)            25          lstm_15[0][0]                    
Total params: 121
____________________________________________________________________________________________________


In [78]:

if use_LSTM & lstm_stateful:
        
    for i in range(num_epochs):
            print('epoch: ' + str(i))
            # shuffle must be False!
            model.fit(X_train, y_train, nb_epoch = 1, batch_size = batch_size, shuffle = False)
            model.reset_states()
            
else: 
    model.fit(X_train, y_train, nb_epoch = num_epochs, batch_size = batch_size)


Exception: Error when checking model target: expected timedistributed_14 to have 3 dimensions, but got array with shape (95, 1)

In [None]:
test_loss = np.nan
if lstm_stateful:
    test_loss = model.evaluate(X_test, y_test, batch_size = batch_size)
else:
    test_loss = model.evaluate(X_test, y_test, batch_size = X_test.shape[0])
test_loss

In [None]:
if lstm_stateful:
    model.reset_states()
    pred_train = model.predict(X_train, batch_size = batch_size)
    model.reset_states()
    pred_test = model.predict(X_test, batch_size = batch_size)
else:
    pred_train1 = model.predict(X_train, batch_size = X_train.shape[0])
    pred_test1 = model.predict(X_test, batch_size = X_test.shape[0])
    pred_train2 = model.predict(X_train, batch_size = batch_size)
    pred_test2 = model.predict(X_test, batch_size = batch_size)

In [None]:
#if not lstm_stateful:
#    print(np.round(pred_test1) == np.round(pred_test2))

In [None]:
if not lstm_stateful:
    pred_test = pred_test1
    pred_train = pred_train1

In [None]:
for i in X_test:
    if lstm_stateful:
        model.reset_states()
    #print(i)
    r = i.reshape(1, len(i), 1)
    #print(i.shape), print(r.shape)
    print(model.predict(r))

In [None]:
pred_test

In [None]:
def calc_dependent_predictions(model, data, prediction_window):
    prediction_seqs = []
    for i in range(int(len(data)/prediction_window)):
        print('Calculating predictions starting from: {}'.format(i))
        curr_frame = data[i*prediction_window]
        predicted = []
        for j in range(prediction_window):
            #print('Calculating single prediction: {}'.format(j))
            #print(curr_frame)
            pred = model.predict(curr_frame[np.newaxis,:,:])[0,0]
            #pred = model.predict(curr_frame.reshape(1, len(curr_frame), 1)) # same
            #print(pred)
            predicted.append(pred)
            curr_frame = curr_frame[1:] 
            curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0)
        prediction_seqs.append(predicted)
    return prediction_seqs

In [None]:
prediction_window = 5 

prediction_seqs_train = calc_dependent_predictions(model, X_train, prediction_window)
prediction_seqs_test = calc_dependent_predictions(model, X_test, prediction_window)

In [None]:
prediction_seqs_train 

In [None]:
prediction_seqs_test

In [None]:
y_train[:10]

In [None]:
pred_train[:10,0]

In [None]:
y_test[:10]

In [None]:
pred_test[:10,0]

In [None]:
if scale:
    pred_train = scaler.inverse_transform(pred_train)
    y_train = scaler.inverse_transform(y_train.reshape(-1,1))
    pred_test = scaler.inverse_transform(pred_test)
    y_test = scaler.inverse_transform(y_test.reshape(-1,1))


In [None]:
y_train[:10],pred_train[:10,0]

In [None]:
y_test[:],pred_test[:,0]

In [None]:
# calculate root mean squared error
rsme_train = math.sqrt(mean_squared_error(y_train, pred_train[:,0]))
print('Train Score: %.2f RMSE' % (rsme_train))
rsme_test = math.sqrt(mean_squared_error(y_test, pred_test[:,0]))
print('Test Score: %.2f RMSE' % (rsme_test))

In [None]:
print(len(ts_train), len(pred_train), len(y_train))
len(ts_test), len(pred_test), len(y_test) 

In [None]:
# shift train predictions for plotting
pred_train_shifted = np.empty_like(ts_all)
print(pred_train_shifted.size)
pred_train_shifted[:, :] = np.nan
# train predictions start at position window_size + 1 (or window_size, if counting from 0)
pred_train_shifted[window_size : len(pred_train) + window_size, :] = pred_train
pred_train_shifted[:13]

In [None]:
# shift test predictions for plotting
window_size = lstm_num_timesteps if use_LSTM else mlp_num_features
pred_test_shifted = np.empty_like(ts_all)
pred_test_shifted[:, :] = np.nan
pred_test_shifted[len(pred_train) + (window_size * 2) : len_overall + 1, :] = pred_test
pred_test_shifted[-13:]

In [None]:
plt.plot(ts_all)
plt.plot(pred_train_shifted)
plt.plot(pred_test_shifted)
plt.savefig(testname + '_lstm_' + str(use_LSTM) + '_stateful_' + str(lstm_stateful) + '_window_' + str(window_size) +
            '_epochs_' + str(num_epochs) + '_2layers_' + str(lstm_stack_layers) + '_scale_' + str(scale) + '.png')
plt.show()

In [None]:
plot_start = -30
plot_end = -1
plt.plot(ts_all[plot_start:plot_end])
plt.plot(pred_train_shifted[plot_start:plot_end])
plt.plot(pred_test_shifted[plot_start:plot_end])
plt.show()


In [None]:
def plot_results(predicted_data, true_data):
    fig = plt.figure(facecolor='white')
    ax = fig.add_subplot(111)
    ax.plot(true_data, label='True Data')
    plt.plot(predicted_data, label='Prediction')
    plt.legend()
    plt.show()

In [None]:
plot_results(pred_train, y_train)

In [None]:
plot_results(pred_test, y_test)

In [None]:
def plot_results_multiple(predicted_data, true_data, prediction_window):
    fig = plt.figure(facecolor='white')
    ax = fig.add_subplot(111)
    ax.plot(true_data, label='True Data')
    #Pad the list of predictions to shift it in the graph to it's correct start
    for i, data in enumerate(predicted_data):
        padding = [None for p in range(i * prediction_window)]
        plt.plot(padding + data, label='Prediction')
        plt.legend()
    plt.show()

In [None]:
plot_results_multiple(prediction_seqs_train, y_test, prediction_window)

In [None]:
plot_results_multiple(prediction_seqs_test, y_test, prediction_window)

In [None]:
#seq2seq
# predict and append prediction to existing values
# TimeDistributedDense after return_sequences True

In [None]:
# https://groups.google.com/forum/#!topic/keras-users/9GsDwkSdqBg