<a href="https://colab.research.google.com/github/sagar3122/Time-Series/blob/master/LSTM_Test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import pandas as pd
import matplotlib as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from math import sqrt
import numpy as np




#frame a sequence as a supervised learning problem

def timeseries_to_supervised(data, lag = 1):
    df1 = pd.DataFrame(data)
    # print(df1)
    columns = [df1.shift(i) for i in range(1, lag+1)]
    #print(columns)
    columns.append(df1)
    # print(type(columns))
    # print(columns)
    df1 = pd.concat(columns, axis = 1)
    # print(columns)
    df1.fillna(0, inplace = True)
    # print(df1)
    return df1




#transform to be stationary
#create a differenced series
def difference(dataset, interval = 1):
    diff = list()
    for i in range(interval, len(dataset)):
        # print("dataset.iloc[i]", dataset.iloc[i])
        # print("dataset.iloc[i-interval]", dataset.iloc[i-interval])
        value = dataset.iloc[i] - dataset.iloc[i-interval]
        # print(value)
        diff.append(value)
    return pd.DataFrame(diff)




#invert the transform
#invert differenced value
def inverse_difference(original_dataset, yhat, interval = 1):
    # print(yhat, -interval)
    # print("yhat + original_dataset[-interval]", yhat , original_dataset.iloc[-interval])
    return yhat + original_dataset.iloc[-interval]

# inverted = list()
# for i in range(len(differenced)):
#     value = inverse_difference(df_train, differenced.iloc[i], len(df_train)-i)
#     inverted.append(value)
# inverted = pd.DataFrame(inverted)
# print(inverted.head())
# print(inverted.shape)


#scale train to [-1,1]
def scale(train):
    #fit the scaler
    scaler = MinMaxScaler(feature_range = (-1,1))
    scaler = scaler.fit(train)
    #transform train
    # train = train.reshape(train.shape[0],train.shape[1])
    # print(train.shape)
    # print(train)
    train_scaled = scaler.transform(train)
    #transform test
    # test = test.reshape(test.shape[0],test.shape[1])
    # print(test.shape)
    # print(test)
    # test_scaled = scaler.transform(test)
    return scaler, train_scaled

#inverse scaling for a forecasted value
def invert_scale(scaler, X, value):
    new_row = [x for x in X] + [value]
    array = np.array(new_row)
    array = array.reshape(1, len(array))
    inverted =  scaler.inverse_transform(array)
    return inverted[0,-1]

#fit an LSTM network to training data
def fit_LSTM(train, batch_size, nb_epoch, neurons):
    X, y = train[:, 0:-1], train[:, -1]
    # print(X)
    # print(X.shape)
    # print(y)
    # print(y.shape)
    X = X.reshape(X.shape[0], 1, X.shape[1]) #this is the 3D input to the LSTM network- samples, timesteps, features
    # print(X)
    # print(X.shape)
    model = Sequential()
    model.add(LSTM(neurons, batch_input_shape = (batch_size, X.shape[1], X.shape[2]), stateful = True))
    model.add(Dense(1))
    model.compile(loss = 'mean_squared_error', optimizer = 'adam')
    for i in range(nb_epoch):
        print("epoch", i)
        model.fit(X,y, epochs = 1, batch_size = batch_size, verbose = 0, shuffle = False)
        model.reset_states()
    return model

#make a one step forcast
def forecast_LSTM(model,batch_size, X):
    X = X.reshape(1,1,len(X))
    yhat = model.predict(X, batch_size = batch_size)
    return yhat[0,0]


#load the data set
df_train = pd.read_csv('Train_SU63ISt.csv', index_col = 1)
df_train = df_train.drop(columns = ['ID'])
df_train = df_train.astype('float64')
print(type(df_train))
print(df_train.head())
print(df_train.shape)
df_test = pd.read_csv('Test_0qrQsBZ.csv', index_col = 0)
df_test = df_test.drop(columns = ['Datetime'])
print(type(df_test))
print(df_test.head())
print(df_test.shape)

#transform data to stationary
diff_values = difference(df_train, 1)
print(type(diff_values))
print(diff_values.head())
print(diff_values.shape)

#transform data to be supervised learning
supervised = timeseries_to_supervised(diff_values,1)
print(supervised.head())
print(supervised.shape)

# train data set and test data set
train = supervised[:]
# test = supervised[13001:]
print(train)
print(train.shape)
# print(validate)
# print(validate.shape)

#transform the scale of train data set
scaler, train_scaled = scale(train)
print(train_scaled)
print(train_scaled.shape)
print(type(train_scaled))
# print(validate_scaled.shape)
# print(type(validate_scaled))
# print(validate_scaled)

#fit the model
LSTM_model = fit_LSTM(train_scaled, 1, 10, 1)
#forecast the entire training data set to build up state for forecasting #seeding the intial state for the model prior forcasting for the validate/test data.
train_reshaped = train_scaled[:, 0].reshape(len(train_scaled), 1, 1)  #this is the 3D input to the LSTM network- samples, timesteps, features
print(train_reshaped)
print(train_reshaped.shape)
LSTM_model.predict(train_reshaped, batch_size=1)

#walk-forward testing on testing data set
test_auto_scaled = np.zeros((df_test.shape[0],2))
# test_auto_scaled = test_auto_scaled.reshape(validate_scaled.shape[0],validate_scaled.shape[1])
test_auto_scaled[0,0] = train_scaled[-1,1]
print("\n\n\n\n\n\n\n\n\n\n\n\ntest_auto_scaled",test_auto_scaled)
print(type(test_auto_scaled))
print(test_auto_scaled.shape)


predictions = list()
for i in range(len(test_auto_scaled)):
    #make one-step forecast
    X = test_auto_scaled[i, 0:-1]
    print("X",X)
    # print("y",y)
    yhat = forecast_LSTM(LSTM_model, 1, X)
    # print("yhat",yhat)
    test_auto_scaled[i, -1] = yhat
    try:
        test_auto_scaled[i+1, 0] = yhat
    except:
        pass
    print("y"+str(i), test_auto_scaled[i, -1])
    try:
        print("x"+str(i+1), test_auto_scaled[i+1, 0])
    except:
        pass
    #doing walk forward dynamically
    #invert scale
    yhat = invert_scale(scaler, X, yhat)
    # groundtruth = invert_scale(scaler, X, y)
    #invert differencing
    yhat = inverse_difference(df_train, yhat, len(test_auto_scaled) + 1-i)
    # groundtruth = inverse_difference(df_train, y, len(validate_scaled) + 1-i)
    #store forecast
    predictions.append(yhat.values[0])
    # expected = df_train.iloc[len(train) + i]
    index_value = len(train) + i + 1
    print("index_value", index_value)
    # original_index = df_train.index.values[len(train) + i]
    print('Datetime=%d, Predicted=%f' % (df_test.index.values[i], yhat))
#
#
# # report performance
# groundtruths = df_train.iloc[13001:-1,0].values.tolist()
# # print("groundtruths: ", groundtruths)
# print(len(groundtruths))
print("\n\n\n\n\n\n\n\n\n\n\npredictions", predictions)
print(len(predictions))
rmse = sqrt(mean_squared_error(groundtruths, predictions))
print('Test RMSE: %.3f' % rmse)

df_test['Count'] = predictions
print(df_test)
# df_test.to_csv('Result.csv')


























