# Time Series Prediction with LSTM Recurrent Neural Networks 
# in Python with Keras
2016-07-21 https://machinelearningmastery.com/time-series-prediction-lstm-recurrent-neural-networks-python-keras/


In [None]:
%matplotlib inline
import pandas
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (14, 6)
dataset = pandas.read_csv('airline-passengers.csv', usecols=[1], engine='python')
plt.plot(dataset)
plt.show()

In [None]:
look_back = 1
LOOKBACK_TIMESTEPS = True
EPOCHS = 60
TRAIN_RATIO = 0.67
SEED = 7

In [None]:
%run training_utils.py
limit_memory()

In [None]:
import numpy
import matplotlib.pyplot as plt
import pandas
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

In [None]:
# fix random seed for reproducibility
if SEED is not None:
    numpy.random.seed(SEED)

dataframe = pandas.read_csv('airline-passengers.csv', usecols=[1], engine='python')
dataset = dataframe.values
dataset = dataset.astype('float32')
# dataset[:5]

In [None]:
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
# NOT HERE:
# dataset = scaler.fit_transform(dataset)
# dataset[:5]

In [None]:
# split into train and test sets
train_size = int(len(dataset) * TRAIN_RATIO)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
train = scaler.fit_transform(train)
test = scaler.transform(test)
print(len(train), len(test))

In [None]:
orig_test = scaler.inverse_transform(test)
# calculate RMSE for x(t) = x(t-1)
base_rmse = math.sqrt(mean_squared_error(orig_test[look_back:], orig_test[look_back-1:len(orig_test)-1]))

In [None]:
def create_dataset(dataset, look_back=1):
	dataX, dataY = [], []
	for i in range(len(dataset)-look_back-1):
		a = dataset[i:(i+look_back), 0]
		dataX.append(a)
		dataY.append(dataset[i + look_back, 0])
	return numpy.array(dataX), numpy.array(dataY)

In [None]:
# reshape into X=t and Y=t+1

trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)

In [None]:
# to ja miałem zrobione lepiej [np.newaxis, :] - itd.!!!
# Uawaga - niekoniecznie! okazuje się, że shape matters!

# reshape input to be [samples, time steps, features]
if LOOKBACK_TIMESTEPS:
    trainX = numpy.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
    testX = numpy.reshape(testX, (testX.shape[0], testX.shape[1], 1))
    in_shape = (look_back, 1)
else:
    trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
    testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
    in_shape = (1, look_back)

In [None]:
# create and fit the LSTM network
model = Sequential()
model.add(LSTM(4, input_shape=in_shape))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
history = model.fit(trainX, trainY, epochs=EPOCHS, batch_size=1, verbose=1,
                    callbacks=[PlotLearning(log_scale=False)], validation_data=(testX, testY))

### Plotting history of training (afterwards)
2016-06-17 https://machinelearningmastery.com/display-deep-learning-model-training-history-in-keras/

In [None]:
# %matplotlib notebook
# print(history.history.keys())

# summarize history for accuracy
if 'accuracy' in history.history:
    plt.plot(history.history['accuracy'])
    if 'val_accuracy' in history.history:
        plt.plot(history.history['val_accuracy'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
if 'val_loss' in history.history:
    plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
# make predictions
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)

# invert scaling
trainPredict_s = scaler.inverse_transform(trainPredict)
trainY_s = scaler.inverse_transform([trainY])
testPredict_s = scaler.inverse_transform(testPredict)
testY_s = scaler.inverse_transform([testY])
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(trainY_s[0], trainPredict_s[:,0]))

testScore = math.sqrt(mean_squared_error(testY_s[0], testPredict_s[:,0]))


In [None]:
plt.rcParams['figure.figsize'] = (15, 8)
trainPredictPlot = numpy.empty_like(dataset)
trainPredictPlot[:, :] = numpy.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict_s
# shift test predictions for plotting
testPredictPlot = numpy.empty_like(dataset)
testPredictPlot[:, :] = numpy.nan
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict_s
# plot baseline and predictions
plt.plot(scaler.inverse_transform(dataset))
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.show()
print('Train Score: %.2f RMSE' % (trainScore))
print('Test Score: %.2f RMSE' % (testScore))
print('Baseline x(t-1) Score: %.2f RMSE' % (base_rmse))