# LSTM

In [16]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import pandas as pd
sns.set_style("darkgrid")
pd.set_option('display.max_columns', None)
import warnings
warnings.filterwarnings("ignore")
import scipy.stats as stats
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from statsmodels.tools.eval_measures import rmse
from influxdb import *
%config InlineBackend.figure_format = 'retina'
%matplotlib inline

In [17]:
from math import sqrt
from numpy import array
from numpy import mean
from numpy import std
import keras
import pandas as pd
from pandas import DataFrame
from pandas import concat
from pandas import read_csv
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import LSTM
from matplotlib import pyplot

In [18]:
# split a univariate dataset into train/test sets
def train_test_split(data, n_test):
    return data[:-n_test], data[-n_test:]

In [19]:
# transform list into supervised learning format
def series_to_supervised(data, n_in=1, n_out=1):
    df = DataFrame(data)
    cols = list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
    # put it all together
    agg = concat(cols, axis=1)
    # drop rows with NaN values
    agg.dropna(inplace=True)
    return agg.values

In [20]:
# root mean squared error or rmse
def measure_rmse(actual, predicted):
    return sqrt(mean_squared_error(actual, predicted))

In [21]:
# difference dataset
def difference(data, interval):
    return [data[i] - data[i - interval] for i in range(interval, len(data))]

In [22]:
# fit a model
def model_fit(train, config):
    # unpack config
    n_input, n_nodes, n_epochs, n_batch, n_diff = config
    # prepare data
    if n_diff > 0:
        train = difference(train, n_diff)
    data = series_to_supervised(train, n_in=n_input)
    train_x, train_y = data[:, :-1], data[:, -1]
    train_x = train_x.reshape((train_x.shape[0], train_x.shape[1], 1))
    # define model
    model = Sequential()
    model.add(LSTM(n_nodes, activation='relu',input_shape=(n_input, 1)))
#     model.add(LSTM(int(n_nodes/2), activation='relu'))
    model.add(Dense(n_nodes, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(int(n_nodes/2), activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(1))
    
    #define the learning rate
    optimizer = keras.optimizers.Adam(lr=5e-3)
    model.compile(loss='mse', optimizer='adam')
    # fit
    model.fit(train_x, train_y, epochs=n_epochs, batch_size=n_batch, verbose=0)
    return model

In [23]:
# forecast with a pre-fit model
def model_predict(model, history, config):
    # unpack config
    n_input, _, _, _, n_diff = config
    # prepare data
    correction = 0.0
    if n_diff > 0:
        correction = history[-n_diff]
        history = difference(history, n_diff)
    x_input = array(history[-n_input:]).reshape((1, n_input, 1))
    # forecast
    yhat = model.predict(x_input, verbose=0)
    return correction + yhat[0]

In [24]:
# walk-forward validation for univariate data
def walk_forward_validation(data, n_test, cfg):
    predictions = list()
    # split dataset
    train, test = train_test_split(data, n_test)
    # fit model
    model = model_fit(train, cfg)
    # seed history with training dataset
    history = [x for x in train]
    # step over each time-step in the test set
    for i in range(len(test)):
        # fit model and make forecast for history
        yhat = model_predict(model, history, cfg)
        # store forecast in list of predictions
        predictions.append(yhat)
        # add actual observation to history for the next loop
        history.append(test[i])
    # estimate prediction error
    error = measure_rmse(test, predictions)
    print(' > %.3f' % error)
    return error , predictions, model


In [25]:
# repeat evaluation of a config
def repeat_evaluate(data, config, n_test, n_repeats=1):
    # fit and evaluate the model n times
    scores = [walk_forward_validation(data, n_test, config)[0] for _ in range(n_repeats)]
    pred = walk_forward_validation(data, n_test, config)[1]
    model_sum = walk_forward_validation(data, n_test, config)[2]
    
    return scores, pred, model_sum

In [26]:
# summarize model performance
def summarize_scores(name, scores):
    # print a summary
    scores_m, score_std = mean(scores), std(scores)
    print('%s: %.3f RMSE (+/- %.3f)' % (name, scores_m, score_std))
    # box and whisker plot
    pyplot.boxplot(scores)
    pyplot.show()


In [27]:
INFLUX_DB_IP = '10.12.97.178'
INFLUX_DB_PORT = 8086
INFLUX_DB = 'demoDb'
TARGET_MEASUREMENT = 'Energy24'

con_obj = InfluxDBClient(host=INFLUX_DB_IP, port=INFLUX_DB_PORT, database=INFLUX_DB)
query = 'select * from ' + TARGET_MEASUREMENT + ' where time > now() - 3d'
df = pd.DataFrame(con_obj.query(query).get_points())
df['time'] = df['time'].astype('datetime64[ns]')
df['time'] = df['time'] + datetime.timedelta(hours=5, minutes=30)
df = df.set_index('time')
df.index.freq = 'H' 
df[df['EM4'] < 0] = 0 
df = df[['EM4']]

In [28]:
data = df.values
# data split
n_test = 120
# define config 
'''
    n_input: The number of lag observations to use as input to the model.
    n_nodes: The number of LSTM units to use in the hidden layer.
    n_epochs: The number of times to expose the model to the whole training dataset.
    n_batch: The number of samples within an epoch after which the weights are updated.
    n_diff: The difference order or 0 if not used.
'''


config = [24, 50, 100, 20, 0]
# grid search
scores = repeat_evaluate(data, config, n_test)
# summarize scores
summarize_scores('lstm', scores[0])

Instructions for updating:
If using Keras pass *_constraint arguments to layers.



UnboundLocalError: local variable 'batch_index' referenced before assignment

In [None]:
pred1 = pd.DataFrame(scores[1])
pred1.index = df[-120:].index
pred1= pred1

In [None]:
plt.figure(figsize=(16,8))
df['EM4'][400:].plot(label='True')
df[-120:]['EM4'].plot(label='Test')
pred1[0].plot(label='Prediction')
plt.ylabel("Energy Consumption")
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(16,8))
df[-120:]['EM4'].plot(label='Test')
pred1[0].plot(label='Prediction')
plt.ylabel("Energy Consumption")
plt.legend()
plt.show()

In [None]:
model=scores[2]

In [None]:
model.save("LSTM_model.h5")

In [29]:
from keras.models import load_model
 
# load model
model = load_model('LSTM_model.h5')

def model_inference(model, past24hr_data):
    # past 24hr data
    n_input = 24
    # prepare data
    x_input = array(past24hr_data[-n_input:]).reshape((1, n_input, 1))
    # forecast
    yhat = model.predict(x_input, verbose=0)
    return yhat[0]   

OSError: Unable to open file (unable to open file: name = 'LSTM_model.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [30]:
model_inference(model, df.values)

NameError: name 'model_inference' is not defined

In [31]:
model.summary()

NameError: name 'model' is not defined

In [32]:
from keras.models import Model, Sequential
from keras import backend as K

def create_dropout_predict_function(model, dropout):
    """
    Create a keras function to predict with dropout
    model : keras model
    dropout : fraction dropout to apply to all layers
    
    Returns
    predict_with_dropout : keras function for predicting with dropout
    """
    
    # Load the config of the original model
    conf = model.get_config()
    # Add the specified dropout to all layers
    for layer in conf['layers']:
        # Dropout layers
        if layer["class_name"]=="Dropout":
            layer["config"]["rate"] = dropout
        # Recurrent layers with dropout
        elif "dropout" in layer["config"].keys():
            layer["config"]["dropout"] = dropout

    # Create a new model with specified dropout
    if type(model)==Sequential:
        # Sequential
        model_dropout = Sequential.from_config(conf)
    else:
        # Functional
        model_dropout = Model.from_config(conf)
    model_dropout.set_weights(model.get_weights()) 
    
    # Create a function to predict with the dropout on
    predict_with_dropout = K.function(model_dropout.inputs+[K.learning_phase()], model_dropout.outputs)
    
    return predict_with_dropout

In [33]:
import numpy as np
from keras.models import load_model

n_input = 24
x_input = array(df[-n_input:]).reshape((1, n_input, 1))

dropout = 0.7
num_iter = 20
num_samples = x_input.shape[0]

path_to_model = "LSTM_model.h5"
model = load_model(path_to_model)

predict_with_dropout = create_dropout_predict_function(model, dropout)

predictions = np.zeros((num_samples, num_iter))

for i in range(num_iter):
    predictions[:,i] = predict_with_dropout(x_input+[1])[0].reshape(-1)

OSError: Unable to open file (unable to open file: name = 'LSTM_model.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [34]:
pd.DataFrame(predictions).plot(kind='hist')

NameError: name 'predictions' is not defined

In [35]:
predictions

NameError: name 'predictions' is not defined

In [36]:
ci = 0.8
lower_lim = np.quantile(predictions, 0.5-ci/2, axis=1)
upper_lim = np.quantile(predictions, 0.5+ci/2, axis=1)

NameError: name 'predictions' is not defined