# Using Deep Learning to forecast time series data

In [2]:
import numpy as np
import pandas as pd
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

In [3]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

In [4]:
np.random.seed(1)

In [9]:
dataset=pd.read_csv('international-airline-passengers.csv', usecols=[1], engine='python', skipfooter=3)

In [10]:
dataset.head()

Unnamed: 0,International airline passengers: monthly totals in thousands. Jan 49 ? Dec 60
0,112
1,118
2,132
3,129
4,121


In [11]:
dataset=dataset.values
dataset=dataset.astype('float32')

In [12]:
#Normalize the data
scaler = MinMaxScaler(feature_range=(0,1))
dataset=scaler.fit_transform(dataset)

In [13]:
dataset #All values between 0 and 1

array([[0.01544401],
       [0.02702703],
       [0.05405405],
       [0.04826255],
       [0.03281853],
       [0.05984557],
       [0.08494207],
       [0.08494207],
       [0.06177607],
       [0.02895753],
       [0.        ],
       [0.02702703],
       [0.02123553],
       [0.04247104],
       [0.07142857],
       [0.05984557],
       [0.04054055],
       [0.08687258],
       [0.12741312],
       [0.12741312],
       [0.10424709],
       [0.05598456],
       [0.01930502],
       [0.06949806],
       [0.07915059],
       [0.08880308],
       [0.14285713],
       [0.11389962],
       [0.13127413],
       [0.14285713],
       [0.18339768],
       [0.18339768],
       [0.15444016],
       [0.11196911],
       [0.08108109],
       [0.1196911 ],
       [0.12934363],
       [0.14671814],
       [0.17181468],
       [0.14864865],
       [0.15250966],
       [0.22007722],
       [0.24324325],
       [0.26640925],
       [0.2027027 ],
       [0.16795367],
       [0.13127413],
       [0.173

In [14]:
#Splitting the train and test set
train_size=int(len(dataset)*0.67)

In [15]:
print(train_size)

96


In [17]:
test_size=len(dataset)-train_size
print(test_size)

48


In [18]:
train, test=dataset[0:train_size, :], dataset[train_size: len(dataset), :]

In [19]:
print(len(train), len(test))

96 48


In [20]:
#Create dataset matrix
def create_dataset(dataset, look_back=1):
    dataX, dataY=[],[]
    for i in range(len(dataset)-look_back-1):
        a=dataset[i: (i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i+look_back, 0])
    return np.array(dataX), np.array(dataY)

In [21]:
#Reshape dataset such that X=current time, Y=future time
look_back=1
trainX, trainY=create_dataset(train, look_back)
testX, testY=create_dataset(test, look_back)

In [23]:
trainX=np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX=np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

In [24]:
print(trainX)

[[[0.01544401]]

 [[0.02702703]]

 [[0.05405405]]

 [[0.04826255]]

 [[0.03281853]]

 [[0.05984557]]

 [[0.08494207]]

 [[0.08494207]]

 [[0.06177607]]

 [[0.02895753]]

 [[0.        ]]

 [[0.02702703]]

 [[0.02123553]]

 [[0.04247104]]

 [[0.07142857]]

 [[0.05984557]]

 [[0.04054055]]

 [[0.08687258]]

 [[0.12741312]]

 [[0.12741312]]

 [[0.10424709]]

 [[0.05598456]]

 [[0.01930502]]

 [[0.06949806]]

 [[0.07915059]]

 [[0.08880308]]

 [[0.14285713]]

 [[0.11389962]]

 [[0.13127413]]

 [[0.14285713]]

 [[0.18339768]]

 [[0.18339768]]

 [[0.15444016]]

 [[0.11196911]]

 [[0.08108109]]

 [[0.1196911 ]]

 [[0.12934363]]

 [[0.14671814]]

 [[0.17181468]]

 [[0.14864865]]

 [[0.15250966]]

 [[0.22007722]]

 [[0.24324325]]

 [[0.26640925]]

 [[0.2027027 ]]

 [[0.16795367]]

 [[0.13127413]]

 [[0.17374519]]

 [[0.17760617]]

 [[0.17760617]]

 [[0.25482625]]

 [[0.25289574]]

 [[0.24131274]]

 [[0.26833975]]

 [[0.3088803 ]]

 [[0.32432434]]

 [[0.25675675]]

 [[0.20656371]]

 [[0.14671814]

In [25]:
print(testX)

[[[0.4073359 ]]

 [[0.3803089 ]]

 [[0.48648646]]

 [[0.47104248]]

 [[0.484556  ]]

 [[0.6138996 ]]

 [[0.6969112 ]]

 [[0.70077217]]

 [[0.57915056]]

 [[0.46911195]]

 [[0.38803086]]

 [[0.44787642]]

 [[0.45559844]]

 [[0.4131274 ]]

 [[0.4980695 ]]

 [[0.47104248]]

 [[0.49999997]]

 [[0.6389961 ]]

 [[0.7471043 ]]

 [[0.7741313 ]]

 [[0.57915056]]

 [[0.492278  ]]

 [[0.3976834 ]]

 [[0.44980696]]

 [[0.49420848]]

 [[0.45945945]]

 [[0.5830116 ]]

 [[0.5637065 ]]

 [[0.61003864]]

 [[0.71042466]]

 [[0.8571429 ]]

 [[0.8783784 ]]

 [[0.69305015]]

 [[0.5849421 ]]

 [[0.4980695 ]]

 [[0.58108103]]

 [[0.6042471 ]]

 [[0.554054  ]]

 [[0.60810804]]

 [[0.6891892 ]]

 [[0.71042466]]

 [[0.8320464 ]]

 [[1.        ]]

 [[0.96911204]]

 [[0.7799227 ]]

 [[0.6891892 ]]]


Let's create a LSTM (RNN) model

In [26]:
model = Sequential()
model.add(LSTM(4, input_shape = (1, look_back)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')

Fit the model

In [27]:
model.fit(trainX, trainY, batch_size=1, verbose=2)

Epoch 1/1
 - 1s - loss: 0.0222


<keras.callbacks.callbacks.History at 0x1a41398c90>

Make predictions

In [28]:
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)

In [29]:
trainPredict

array([[0.1157463 ],
       [0.11876079],
       [0.12588267],
       [0.12434632],
       [0.12027659],
       [0.12742455],
       [0.13416867],
       [0.13416867],
       [0.12793973],
       [0.11926542],
       [0.11176297],
       [0.11876079],
       [0.11725068],
       [0.1228155 ],
       [0.13052467],
       [0.12742455],
       [0.12230647],
       [0.1346916 ],
       [0.14580612],
       [0.14580612],
       [0.13942419],
       [0.12639603],
       [0.11674857],
       [0.13000646],
       [0.13260339],
       [0.13521512],
       [0.15010503],
       [0.14207348],
       [0.14687757],
       [0.15010503],
       [0.16155134],
       [0.16155134],
       [0.15335187],
       [0.1415425 ],
       [0.13312458],
       [0.14366983],
       [0.14634156],
       [0.15118517],
       [0.15825766],
       [0.15172605],
       [0.15280941],
       [0.17209867],
       [0.17884798],
       [0.18566155],
       [0.16708075],
       [0.15716384],
       [0.14687757],
       [0.158

In [30]:
testPredict

array([[0.22830689],
       [0.21998769],
       [0.25297183],
       [0.24812783],
       [0.2523656 ],
       [0.2933487 ],
       [0.31988332],
       [0.3211189 ],
       [0.28227875],
       [0.24752332],
       [0.22235857],
       [0.24088868],
       [0.24329804],
       [0.23009701],
       [0.25661352],
       [0.24812783],
       [0.2572212 ],
       [0.30136105],
       [0.33594358],
       [0.3445835 ],
       [0.28227875],
       [0.25479177],
       [0.22532907],
       [0.24149068],
       [0.25539884],
       [0.24450408],
       [0.28350708],
       [0.27737013],
       [0.2921172 ],
       [0.32420787],
       [0.37103269],
       [0.37776747],
       [0.31864777],
       [0.28412142],
       [0.25661352],
       [0.28289285],
       [0.29027057],
       [0.27430648],
       [0.29150155],
       [0.31741232],
       [0.32420787],
       [0.36305463],
       [0.41597086],
       [0.4063371 ],
       [0.3464336 ],
       [0.31741232]], dtype=float32)

The values are between 0 and 1 as a result of the normalization, so we need to reverse the predicted values to get the actual values

In [31]:
trainPredict = scaler.inverse_transform(trainPredict)
testPredict = scaler.inverse_transform(testPredict)

trainY=scaler.inverse_transform([trainY])
testY=scaler.inverse_transform([testY])

In [32]:
print(trainPredict, trainY)  #Now we have actual values

[[163.95657]
 [165.5181 ]
 [169.20721]
 [168.41139]
 [166.30328]
 [170.00592]
 [173.49936]
 [173.49936]
 [170.27278]
 [165.77948]
 [161.89322]
 [165.5181 ]
 [164.73586]
 [167.61844]
 [171.61177]
 [170.00592]
 [167.35475]
 [173.77025]
 [179.52756]
 [179.52756]
 [176.22173]
 [169.47314]
 [164.47575]
 [171.34335]
 [172.68855]
 [174.04144]
 [181.75441]
 [177.59407]
 [180.08258]
 [181.75441]
 [187.6836 ]
 [187.6836 ]
 [183.43626]
 [177.31902]
 [172.95853]
 [178.42096]
 [179.80493]
 [182.31392]
 [185.97748]
 [182.59409]
 [183.15527]
 [193.14711]
 [196.64326]
 [200.17268]
 [190.54782]
 [185.41086]
 [180.08258]
 [186.26117]
 [186.82936]
 [186.82936]
 [198.4039 ]
 [198.1099 ]
 [196.35063]
 [200.46825]
 [206.72421]
 [209.13092]
 [198.69815]
 [191.12372]
 [182.31392]
 [188.2544 ]
 [189.1125 ]
 [184.563  ]
 [198.1099 ]
 [195.76604]
 [197.81612]
 [206.72421]
 [218.26042]
 [215.50517]
 [205.22641]
 [196.35063]
 [188.82622]
 [196.35063]
 [200.17268]
 [197.52255]
 [207.62526]
 [208.22693]
 [208.52808]

In [33]:
print(testPredict, testY)  #Now we have actual values

[[222.26297]
 [217.95363]
 [235.03941]
 [232.53023]
 [234.72537]
 [255.95464]
 [269.69955]
 [270.3396 ]
 [250.2204 ]
 [232.21709]
 [219.18173]
 [228.78033]
 [230.02838]
 [223.19026]
 [236.92581]
 [232.53023]
 [237.24059]
 [260.10504]
 [278.01877]
 [282.49423]
 [250.2204 ]
 [235.98213]
 [220.72046]
 [229.09218]
 [236.2966 ]
 [230.6531 ]
 [250.85667]
 [247.67773]
 [255.31671]
 [271.93967]
 [296.19495]
 [299.68356]
 [269.05954]
 [251.1749 ]
 [236.92581]
 [250.5385 ]
 [254.36015]
 [246.09076]
 [254.9978 ]
 [268.4196 ]
 [271.93967]
 [292.0623 ]
 [319.47293]
 [314.48264]
 [283.4526 ]
 [268.4196 ]] [[301.00001152 355.999993   348.00001085 355.00000681 421.9999924
  465.0000161  466.99998848 403.99999397 346.99999379 304.99998716
  335.99999132 339.99999783 317.99999288 362.00000277 348.00001085
  362.99998896 434.99999813 491.00002756 505.00001948 403.99999397
  359.00001333 310.00001073 337.00000838 359.99999952 342.00000109
  406.0000281  395.99998094 420.00002002 471.99998119 548.00004318


Calculate RMSE

In [34]:
trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:, 0]))
print('Train: %.2f RMSE' % (trainScore))

Train: 56.01 RMSE


In [35]:
testScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:, 0]))
print('Train: %.2f RMSE' % (testScore))

Train: 56.01 RMSE


End of Notebook!!!