In [1]:
import resource

In [2]:
#Memory usage (in bytes) before running program
resource.getrusage(resource.RUSAGE_SELF).ru_maxrss

161772

In [3]:
import numpy
import matplotlib.pyplot as plt
import pandas
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

import time

Using TensorFlow backend.


In [4]:
#Reading data
dataframe = pandas.read_csv('data_zigzag.csv', usecols=[1], engine='python')
dataset = dataframe.values
dataset = dataset.astype('float32')

In [5]:
#Normalize the data
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)

In [6]:
#Train, Test set Split
train_size = int(len(dataset) * 0.7)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
print(len(train), len(test))

(123, 54)


In [7]:
#Function to generate time series data matrix from given data
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return numpy.array(dataX), numpy.array(dataY)

In [8]:
#reshape into X=t and Y=t+1 using create_dataset function defined above
look_back = 1
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)

In [9]:
#LSTM Sequential model takes input in form of [samples, time steps, features]
#so reshape input to [samples, time steps, features]
trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

In [10]:

#Sequential Model from Keras library used
model = Sequential()

#add LSTM layer and a regular densely-connected NN layer
model.add(LSTM(4, input_shape=(1, look_back)))
model.add(Dense(1)) #fully connected

#Compile, set optimizer to adam and then fit the training data
#optimizer used = adam, SGD, RMSprop, Adagrad, Adadelta, Adamax, Nadam
model.compile(loss='mean_squared_error', optimizer='SGD') 

time_start = time.clock()
model.fit(trainX, trainY, epochs=200, batch_size=3, verbose=2)

Epoch 1/200
0s - loss: 0.1024
Epoch 2/200
0s - loss: 0.0664
Epoch 3/200
0s - loss: 0.0590
Epoch 4/200
0s - loss: 0.0572
Epoch 5/200
0s - loss: 0.0568
Epoch 6/200
0s - loss: 0.0565
Epoch 7/200
0s - loss: 0.0562
Epoch 8/200
0s - loss: 0.0560
Epoch 9/200
0s - loss: 0.0554
Epoch 10/200
0s - loss: 0.0553
Epoch 11/200
0s - loss: 0.0549
Epoch 12/200
0s - loss: 0.0547
Epoch 13/200
0s - loss: 0.0542
Epoch 14/200
0s - loss: 0.0540
Epoch 15/200
0s - loss: 0.0538
Epoch 16/200
0s - loss: 0.0535
Epoch 17/200
0s - loss: 0.0530
Epoch 18/200
0s - loss: 0.0528
Epoch 19/200
0s - loss: 0.0525
Epoch 20/200
0s - loss: 0.0521
Epoch 21/200
0s - loss: 0.0519
Epoch 22/200
0s - loss: 0.0518
Epoch 23/200
0s - loss: 0.0514
Epoch 24/200
0s - loss: 0.0511
Epoch 25/200
0s - loss: 0.0508
Epoch 26/200
0s - loss: 0.0504
Epoch 27/200
0s - loss: 0.0503
Epoch 28/200
0s - loss: 0.0500
Epoch 29/200
0s - loss: 0.0497
Epoch 30/200
0s - loss: 0.0493
Epoch 31/200
0s - loss: 0.0491
Epoch 32/200
0s - loss: 0.0489
Epoch 33/200
0s -

<keras.callbacks.History at 0x7f6390fcc4d0>

In [11]:
#Predicting for test set & train set and calculating loss 
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)

#inverting predictions before calculating error so that performance is reported in the same units as the original data
#reverse of normalizing
print testPredict

[[ 0.36248112]
 [ 0.41449237]
 [ 0.46491492]
 [ 0.51332277]
 [ 0.55937618]
 [ 0.51332277]
 [ 0.46491492]
 [ 0.41449237]
 [ 0.36248112]
 [ 0.30937535]
 [ 0.25571305]
 [ 0.20204553]
 [ 0.14890726]
 [ 0.09678832]
 [ 0.55937618]
 [ 0.51332277]
 [ 0.46491492]
 [ 0.41449237]
 [ 0.36248112]
 [ 0.30937535]
 [ 0.25571305]
 [ 0.20204553]
 [ 0.14890726]
 [ 0.09678832]
 [ 0.55937618]
 [ 0.51332277]
 [ 0.46491492]
 [ 0.41449237]
 [ 0.36248112]
 [ 0.30937535]
 [ 0.25571305]
 [ 0.20204553]
 [ 0.14890726]
 [ 0.09678832]
 [ 0.30937535]
 [ 0.25571305]
 [ 0.20204553]
 [ 0.14890726]
 [ 0.09678832]
 [ 0.14890726]
 [ 0.20204553]
 [ 0.25571305]
 [ 0.30937535]
 [ 0.09678832]
 [ 0.14890726]
 [ 0.20204553]
 [ 0.25571305]
 [ 0.30937535]
 [ 0.09678832]
 [ 0.14890726]
 [ 0.20204553]
 [ 0.25571305]]


In [12]:
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])

#calculating root mean squared error
trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))

Train Score: 2.31 RMSE
Test Score: 2.08 RMSE


In [13]:
print testPredict

[[ 6.07473516]
 [ 6.80289268]
 [ 7.50880909]
 [ 8.18651867]
 [ 8.8312664 ]
 [ 8.18651867]
 [ 7.50880909]
 [ 6.80289268]
 [ 6.07473516]
 [ 5.33125448]
 [ 4.57998228]
 [ 3.82863712]
 [ 3.08470154]
 [ 2.35503626]
 [ 8.8312664 ]
 [ 8.18651867]
 [ 7.50880909]
 [ 6.80289268]
 [ 6.07473516]
 [ 5.33125448]
 [ 4.57998228]
 [ 3.82863712]
 [ 3.08470154]
 [ 2.35503626]
 [ 8.8312664 ]
 [ 8.18651867]
 [ 7.50880909]
 [ 6.80289268]
 [ 6.07473516]
 [ 5.33125448]
 [ 4.57998228]
 [ 3.82863712]
 [ 3.08470154]
 [ 2.35503626]
 [ 5.33125448]
 [ 4.57998228]
 [ 3.82863712]
 [ 3.08470154]
 [ 2.35503626]
 [ 3.08470154]
 [ 3.82863712]
 [ 4.57998228]
 [ 5.33125448]
 [ 2.35503626]
 [ 3.08470154]
 [ 3.82863712]
 [ 4.57998228]
 [ 5.33125448]
 [ 2.35503626]
 [ 3.08470154]
 [ 3.82863712]
 [ 4.57998228]]


In [None]:
#time taken by the whole process
time_elapsed = (time.clock() - time_start)
time_elapsed

In [14]:
#Shifting axis for plotting
trainPredictPlot = numpy.empty_like(dataset)
trainPredictPlot[:, :] = numpy.nan
trainPredictPlot[0:len(trainPredict), :] = trainPredict
#trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict

testPredictPlot = numpy.empty_like(dataset)
testPredictPlot[:, :] = numpy.nan
testPredictPlot[len(trainPredict)+(look_back)+1:len(dataset)-1-(look_back), :] = testPredict
#testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict

#Plotting baseline and predictions
plt.plot(scaler.inverse_transform(dataset))
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.show()

In [None]:
#Memory usage (in bytes) after running program
resource.getrusage(resource.RUSAGE_SELF).ru_maxrss

In [None]:
min(trainPredict)

In [None]:
max(trainPredict)