In [220]:
# multivariate multi-step encoder-decoder lstm
import sys
from math import sqrt
from numpy import split
from numpy import array, array_split
from pandas import read_csv
from sklearn.metrics import mean_squared_error
from matplotlib import pyplot
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import LSTM
from keras.layers import RepeatVector
from keras.layers import TimeDistributed
from keras.callbacks import ModelCheckpoint

# split a univariate dataset into train/test sets
def split_dataset(data):
	# split into standard weeks
	train, test = data[1:-1701], data[-1701:-7]
	print(train.shape)
	print(test.shape)
	# restructure into windows of weekly data
	train = array(split(train, len(train)/7))
	test = array(split(test, len(test)/7))
	return train, test

# evaluate one or more weekly forecasts against expected values
def evaluate_forecasts(actual, predicted):
	scores = list()
	# calculate an RMSE score for each day
	for i in range(actual.shape[1]):
		# calculate mse
		mse = mean_squared_error(actual[:, i], predicted[:, i])
		# calculate rmse
		rmse = sqrt(mse)
		# store
		scores.append(rmse)
	# calculate overall RMSE
	s = 0
	for row in range(actual.shape[0]):
		for col in range(actual.shape[1]):
			s += (actual[row, col] - predicted[row, col])**2
	score = sqrt(s / (actual.shape[0] * actual.shape[1]))
	return score, scores

# summarize scores
def summarize_scores(name, score, scores):
	s_scores = ', '.join(['%.1f' % s for s in scores])
	print('%s: [%.3f] %s' % (name, score, s_scores))

# convert history into inputs and outputs
def to_supervised(train, n_input, n_out=7):
	# flatten data
	data = train.reshape((train.shape[0]*train.shape[1], train.shape[2]))
	X, y = list(), list()
	in_start = 0
	# step over the entire history one time step at a time
	for _ in range(len(data)):
		# define the end of the input sequence
		in_end = in_start + n_input
		out_end = in_end + n_out
		# ensure we have enough data for this instance
		if out_end < len(data):
			X.append(data[in_start:in_end, :])
			y.append(data[in_end:out_end, 0])
		# move along one time step
		in_start += 1
	return array(X), array(y)

# train the model
def build_model(train, n_input):
	# prepare data
	train_x, train_y = to_supervised(train, n_input)
	# define parameters
	verbose, epochs, batch_size = 2, 100, 16
	n_timesteps, n_features, n_outputs = train_x.shape[1], train_x.shape[2], train_y.shape[1]
	# reshape output into [samples, timesteps, features]
	train_y = train_y.reshape((train_y.shape[0], train_y.shape[1], 1))
	# define model
	model = Sequential()
	model.add(LSTM(200, activation='relu', input_shape=(n_timesteps, n_features)))
	model.add(RepeatVector(n_outputs))
	model.add(LSTM(200, activation='relu', return_sequences=True))
	model.add(TimeDistributed(Dense(100, activation='relu')))
	model.add(TimeDistributed(Dense(1)))
	model.compile(loss='mse', optimizer='adam')
	# checkpoint
	filepath="models/multivariate_multistep_encoderdecoder_lstm_modified.h5"
	checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
	callbacks_list = [checkpoint]
	# fit network
	model.fit(train_x, train_y, epochs=epochs, batch_size=batch_size, verbose=verbose)
	return model

# make a forecast
def forecast(model, history, n_input):
	# flatten data
	data = array(history)
	data = data.reshape((data.shape[0]*data.shape[1], data.shape[2]))
	# retrieve last observations for input data
	input_x = data[-n_input:, :]
	# reshape into [1, n_input, n]
	input_x = input_x.reshape((1, input_x.shape[0], input_x.shape[1]))
	# forecast the next week
	yhat = model.predict(input_x, verbose=0)
	# we only want the vector forecast
	yhat = yhat[0]
	return yhat

# evaluate a single model
def evaluate_model(train, test, n_input):
	# fit model
	model = build_model(train, n_input)
	# history is a list of weekly data
	history = [x for x in train]
	# walk-forward validation over each week
	predictions = list()
	for i in range(len(test)):
		# predict the week
		yhat_sequence = forecast(model, history, n_input)
		# store the predictions
		predictions.append(yhat_sequence)
		# get real observation and add to history for predicting the next week
		history.append(test[i, :])
	# evaluate predictions days for each week
	predictions = array(predictions)
	score, scores = evaluate_forecasts(test[:, :, 0], predictions)
	return score, scores

In [221]:
# load the new file
# file should be household_power_consumption_days.csv
dataset = read_csv('qqq.csv', header=0, infer_datetime_format=True, parse_dates=['datetime'], index_col=['datetime'])
print(len(dataset.values))
dataset.tail()


3081


Unnamed: 0_level_0,QQQ.Open,QQQ.High,QQQ.Low,QQQ.Close,QQQ.Volume,QQQ.Adjusted
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-03-25,177.910004,178.839996,176.929993,178.220001,46336500,178.220001
2019-03-26,179.600006,180.690002,178.139999,179.050003,33248100,179.050003
2019-03-27,179.320007,179.720001,176.600006,177.899994,37096700,177.899994
2019-03-28,178.360001,178.979996,177.240005,178.309998,30368200,178.309998
2019-03-29,179.630005,179.830002,178.589996,179.660004,35177400,179.660004


In [None]:
# split into train and test
train, test = split_dataset(dataset.values)
# evaluate model and get scores
n_input = 7
score, scores = evaluate_model(train, test, n_input)
# summarize scores
summarize_scores('lstm', score, scores)
# plot scores
days = ['sun', 'mon', 'tue', 'wed', 'thr', 'fri', 'sat']
pyplot.plot(days, scores, marker='o', label='lstm')
pyplot.show()

(1379, 6)
(1694, 6)
Epoch 1/100
 - 7s - loss: 6271076250827.2988
Epoch 2/100
 - 2s - loss: 276022294829.5736
Epoch 3/100
 - 2s - loss: 102372689915.1238
Epoch 4/100
 - 2s - loss: 48146131893.7319
Epoch 5/100
 - 2s - loss: 36840948808.7678
Epoch 6/100
 - 2s - loss: 29621056296.6974
Epoch 7/100
 - 2s - loss: 90143318634.1509
Epoch 8/100
 - 2s - loss: 26155621385.7524
Epoch 9/100
 - 2s - loss: 11747381463.3026
Epoch 10/100
 - 2s - loss: 6713794407.7128
Epoch 11/100
 - 3s - loss: 3911431535.0037
Epoch 12/100
 - 2s - loss: 1102841897.1897
Epoch 13/100
 - 2s - loss: 825076140.0264
Epoch 14/100
 - 2s - loss: 1069317061.3802
Epoch 15/100
 - 2s - loss: 706376120.5363
Epoch 16/100
 - 2s - loss: 225259013.2821
Epoch 17/100
 - 2s - loss: 606056596.7945
Epoch 18/100
 - 2s - loss: 4767588946.3795
Epoch 19/100
 - 3s - loss: 19114542296.7560
Epoch 20/100
 - 2s - loss: 2270987874.9538
Epoch 21/100
 - 2s - loss: 1817135322.0220
Epoch 22/100
 - 2s - loss: 1041908963.2234
Epoch 23/100
 - 2s - loss: 171649

In [199]:
import keras
loaded_model = keras.models.load_model('models/multivariate_multistep_encoderdecoder_lstm_modified_.h5')

In [208]:
# loaded_model.predict(split_dataset(dataset.values)[1][-1:])
print((split_dataset(dataset.values)[1][-1:]).shape)
# print((split_dataset(dataset.values)[1][-1:]))
print(dataset.values[-7:])
loaded_model.predict(np.array([dataset.values[-7:]]))

(1, 7, 6)
[[2.72600006e+02 2.76450012e+02 2.68450012e+02 2.74019989e+02
  5.94710000e+06 2.74019989e+02]
 [2.72579987e+02 2.72799988e+02 2.64000000e+02 2.64529999e+02
  8.74560000e+06 2.64529999e+02]
 [2.59709991e+02 2.63179993e+02 2.54460007e+02 2.60420013e+02
  1.02150000e+07 2.60420013e+02]
 [2.64440002e+02 2.70260010e+02 2.64429993e+02 2.67769989e+02
  7.35090000e+06 2.67769989e+02]
 [2.68750000e+02 2.75369995e+02 2.68179993e+02 2.74829987e+02
  8.77920000e+06 2.74829987e+02]
 [2.77160004e+02 2.80329987e+02 2.75100006e+02 2.78619995e+02
  6.77410000e+06 2.78619995e+02]
 [2.78700012e+02 2.80160004e+02 2.74500000e+02 2.79859985e+02
  5.98390000e+06 2.79859985e+02]]


array([[[0.07524218],
        [0.07524218],
        [0.07524218],
        [0.07524218],
        [0.07524218],
        [0.07524218],
        [0.07524218]]], dtype=float32)