In [24]:
from pandas import DataFrame
from pandas import Series
from pandas import concat
from pandas import read_csv
from pandas import datetime
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from math import sqrt
from matplotlib import pyplot
import numpy

In [25]:
# date-time parsing function for loading the dataset
def parser(x):
    return datetime.strptime('190'+x, '%Y-%m')

In [26]:
# frame a sequence as a supervised learning problem
def timeseries_to_supervised(data, lag=1):
    df = DataFrame(data)
    columns = [df.shift(i) for i in range(1, lag+1)]
    columns.append(df)
    df = concat(columns, axis=1)
    df.fillna(0, inplace=True)
    return df

In [27]:
# create a differenced series
def difference(dataset, interval=1):
    diff = list()
    for i in range(interval, len(dataset)):
        value = dataset[i] - dataset[i - interval]
        diff.append(value)
    return Series(diff)

In [28]:
# invert differenced value
def inverse_difference(history, yhat, interval=1):
    return yhat + history[-interval]

In [29]:
# scale train and test data to [-1, 1]
def scale(train, test):
    # fit scaler
    scaler = MinMaxScaler(feature_range=(-1, 1))
    scaler = scaler.fit(train)
    # transform train
    train = train.reshape(train.shape[0], train.shape[1])
    train_scaled = scaler.transform(train)
    # transform test
    test = test.reshape(test.shape[0], test.shape[1])
    test_scaled = scaler.transform(test)
    return scaler, train_scaled, test_scaled

In [30]:
# inverse scaling for a forecasted value
def invert_scale(scaler, X, value):
    new_row = [x for x in X] + [value]
    array = numpy.array(new_row)
    array = array.reshape(1, len(array))
    inverted = scaler.inverse_transform(array)
    return inverted[0, -1]

In [31]:
# fit an LSTM network to training data
def fit_lstm(train, batch_size, nb_epoch, neurons):
    X, y = train[:, 0:-1], train[:, -1]
    X = X.reshape(X.shape[0], 1, X.shape[1])
    model = Sequential()
    model.add(LSTM(neurons, batch_input_shape=(batch_size, X.shape[1], X.shape[2]), stateful=True))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    for i in range(nb_epoch):
        model.fit(X, y, epochs=1, batch_size=batch_size, verbose=0, shuffle=False)
        model.reset_states()
    return model

In [32]:
# make a one-step forecast
def forecast_lstm(model, batch_size, X):
    X = X.reshape(1, 1, len(X))
    yhat = model.predict(X, batch_size=batch_size)
    return yhat[0,0]

In [33]:
# load dataset
series = read_csv('bajri1.csv', header=0, parse_dates=[0], index_col=0, squeeze=True)

In [34]:
# transform data to be stationary
raw_values = series.values
diff_values = difference(raw_values, 1)

In [35]:
# transform data to be supervised learning
supervised = timeseries_to_supervised(diff_values, 1)
supervised_values = supervised.values

In [36]:
# split data into train and test-sets
train, test = supervised_values[0:-366], supervised_values[-366:]

In [37]:
# transform the scale of the data
scaler, train_scaled, test_scaled = scale(train, test)

In [38]:
# fit the model
lstm_model = fit_lstm(train_scaled, 1, 3000, 4)
# forecast the entire training dataset to build up state for forecasting
train_reshaped = train_scaled[:, 0].reshape(len(train_scaled), 1, 1)
lstm_model.predict(train_reshaped, batch_size=1)

KeyboardInterrupt: 

In [16]:
# walk-forward validation on the test data
predictions = list()
for i in range(len(test_scaled)):
    # make one-step forecast
    X, y = test_scaled[i, 0:-1], test_scaled[i, -1]
    yhat = forecast_lstm(lstm_model, 1, X)
    # invert scaling
    yhat = invert_scale(scaler, X, yhat)
    # invert differencing
    yhat = inverse_difference(raw_values, yhat, len(test_scaled)+1-i)
    # store forecast
    predictions.append(yhat)
    expected = raw_values[len(train) + i + 1]
    print('Month=%d, Predicted=%f, Expected=%f' % (i+1, yhat, expected))

Month=1, Predicted=1536.240755, Expected=1392.000000
Month=2, Predicted=1556.699048, Expected=1334.000000
Month=3, Predicted=1627.339965, Expected=1263.000000
Month=4, Predicted=1754.913826, Expected=1251.000000
Month=5, Predicted=1738.221472, Expected=1331.000000
Month=6, Predicted=1769.605316, Expected=1438.000000
Month=7, Predicted=1808.134609, Expected=1304.000000
Month=8, Predicted=1832.959194, Expected=1345.000000
Month=9, Predicted=1519.959194, Expected=1463.000000
Month=10, Predicted=1453.446001, Expected=1801.000000
Month=11, Predicted=1520.206711, Expected=1544.000000
Month=12, Predicted=1591.338646, Expected=1899.000000
Month=13, Predicted=1558.959194, Expected=1846.000000
Month=14, Predicted=1656.148126, Expected=1272.000000
Month=15, Predicted=1544.592498, Expected=1354.000000
Month=16, Predicted=1509.589105, Expected=1290.000000
Month=17, Predicted=1545.031596, Expected=1304.000000
Month=18, Predicted=1535.364016, Expected=1285.000000
Month=19, Predicted=1529.994581, Expe

Month=276, Predicted=1599.394004, Expected=1230.000000
Month=277, Predicted=1662.627294, Expected=1278.000000
Month=278, Predicted=1771.190751, Expected=1272.000000
Month=279, Predicted=1714.662400, Expected=1312.000000
Month=280, Predicted=1742.186350, Expected=1326.000000
Month=281, Predicted=1740.829862, Expected=1324.000000
Month=282, Predicted=1654.933390, Expected=1511.000000
Month=283, Predicted=1402.375339, Expected=1524.000000
Month=284, Predicted=1394.820438, Expected=1566.000000
Month=285, Predicted=1471.970999, Expected=1976.000000
Month=286, Predicted=1425.012557, Expected=1963.000000
Month=287, Predicted=1399.924603, Expected=1625.000000
Month=288, Predicted=1409.271632, Expected=1676.000000
Month=289, Predicted=1347.496877, Expected=1742.000000
Month=290, Predicted=1406.574873, Expected=1753.000000
Month=291, Predicted=1401.476017, Expected=1719.000000
Month=292, Predicted=1390.246732, Expected=1864.000000
Month=293, Predicted=1389.603020, Expected=1797.000000
Month=294,

IndexError: index 1461 is out of bounds for axis 0 with size 1461

In [23]:
# report performance
rmse = sqrt(mean_squared_error(raw_values[-366:], predictions))
print('Test RMSE: %.3f' % rmse)
# line plot of observed vs predicted
pyplot.plot(raw_values[-366:])
pyplot.plot(predictions)
pyplot.show()

ValueError: Found input variables with inconsistent numbers of samples: [366, 367]