# LSTM (Long Short Term Memory)
LSTMs can be used to model univariate time series forecasting problems.

These are problems comprised of a single series of observations and a model is required to learn from the series of past observations to predict the next value in the sequence.

In [37]:
from numpy import array
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Bidirectional

from keras import layers

# Data processing

In [3]:
# split a univariate sequence into samples
def split_sequence(sequence, n_steps):
	X, y = list(), list()
	for i in range(len(sequence)):
		# find the end of this pattern
		end_ix = i + n_steps
		# check if we are beyond the sequence
		if end_ix > len(sequence)-1:
			break
		# gather input and output parts of the pattern
		seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
		X.append(seq_x)
		y.append(seq_y)
	return array(X), array(y)

In [4]:
# define input sequence
raw_seq = [10, 20, 30, 40, 50, 60, 70, 80, 90]

In [5]:
# choose a number of time steps
n_steps = 3
# split into samples
X, y = split_sequence(raw_seq, n_steps)

In [7]:
# reshape from [samples, timesteps] into [samples, timesteps, features]
n_features = 1
X = X.reshape((X.shape[0], X.shape[1], n_features))

In [9]:
X.shape

(6, 3, 1)

In [14]:
y.shape

(6,)

# Vanilla LSTM

A Vanilla LSTM is an LSTM model that has a single hidden layer of LSTM units, and an output layer used to make a prediction.

We can define a Vanilla LSTM for univariate time series forecasting as follows.

In [16]:
# define model
model = Sequential([
    LSTM(50, activation='relu', input_shape=(n_steps, n_features)),
    Dense(1) 
])

  super().__init__(**kwargs)


In [17]:
model.compile(optimizer='adam', loss='mse')

In [18]:
model.summary()

In [19]:
%%time 
epochs=200 # number of times a complete dataset is passed
# Using defaults (epochs=1, batch_size=32, verbose=1)
history = model.fit(
  X,
  y,
  epochs=epochs,
  verbose=0
)

CPU times: user 4.2 s, sys: 396 ms, total: 4.59 s
Wall time: 4.23 s


Verbose:
Default Value: 1

Explanation: The verbose argument controls the logging of information during training:

- 0: Silent mode; no output will be printed during training.
- 1: Progress bar mode; a detailed progress bar is shown for each epoch.
- 2: One line per epoch; no progress bar but one line is printed after each epoch showing the epoch number, loss, etc.

In [21]:
# demonstrate prediction
x_input = array([70, 80, 90])
x_input = x_input.reshape((1, n_steps, n_features))

(1, 3, 1)

In [22]:
yhat = model.predict(x_input, verbose=0)
print(yhat)

[[101.81673]]


# Stacked LSTM

Multiple hidden LSTM layers can be stacked one on top of another in what is referred to as a Stacked LSTM model.

An LSTM layer requires a three-dimensional input and LSTMs by default will produce a two-dimensional output as an interpretation from the end of the sequence.

In [23]:
# define model
model = Sequential([
    LSTM(50, activation='relu', return_sequences=True, input_shape=(n_steps, n_features)),
    LSTM(50, activation='relu'), # by default return_sequence=False
    Dense(1) 
])

  super().__init__(**kwargs)


In [24]:
model.compile(optimizer='adam', loss='mse')

In [25]:
model.summary()

In [26]:
%%time 
epochs=200 # number of times a complete dataset is passed
# Using defaults (epochs=1, batch_size=32, verbose=1)
history = model.fit(
  X,
  y,
  epochs=epochs,
  verbose=0
)

CPU times: user 4.91 s, sys: 637 ms, total: 5.55 s
Wall time: 5.05 s


In [27]:
# demonstrate prediction
x_input = array([70, 80, 90])
x_input = x_input.reshape((1, n_steps, n_features))

In [28]:
yhat = model.predict(x_input, verbose=0)
print(yhat)

[[105.1186]]


# Bidirectional LSTM

On some sequence prediction problems, it can be beneficial to allow the LSTM model to learn the input sequence both forward and backwards and concatenate both interpretations.

This is called a Bidirectional LSTM.

We can implement a Bidirectional LSTM for univariate time series forecasting by wrapping the first hidden layer in a wrapper layer called Bidirectional.

In [30]:
# define model
model = Sequential([
    Bidirectional(LSTM(50, activation='relu'), input_shape=(n_steps, n_features)),
    Dense(1) 
])

  super().__init__(**kwargs)


In [31]:
model.compile(optimizer='adam', loss='mse')

In [32]:
model.summary()

In [33]:
%%time 
epochs=200 # number of times a complete dataset is passed
# Using defaults (epochs=1, batch_size=32, verbose=1)
history = model.fit(
  X,
  y,
  epochs=epochs,
  verbose=0
)

CPU times: user 5 s, sys: 551 ms, total: 5.55 s
Wall time: 5.05 s


In [34]:
# demonstrate prediction
x_input = array([70, 80, 90])
x_input = x_input.reshape((1, n_steps, n_features))

In [35]:
yhat = model.predict(x_input, verbose=0)
print(yhat)

[[101.00175]]


## CNN LSTM

A convolutional neural network, or CNN for short, is a type of neural network developed for working with two-dimensional image data.

The CNN can be very effective at automatically extracting and learning features from one-dimensional sequence data such as univariate time series data.

In [38]:
# define input sequence
raw_seq = [10, 20, 30, 40, 50, 60, 70, 80, 90]
# choose a number of time steps
n_steps = 4
# split into samples
X, y = split_sequence(raw_seq, n_steps)
print(X.shape, y.shape)

(5, 4) (5,)


In [40]:
# reshape from [samples, timesteps] into [samples, subsequences, timesteps, features]
n_features = 1
n_seq = 2
n_steps = 2
X = X.reshape((X.shape[0], n_seq, n_steps, n_features))
X.shape

(5, 2, 2, 1)

In [41]:
# define model
model = Sequential([
    layers.TimeDistributed(layers.Conv1D(filters=64, kernel_size=1, activation='relu'), input_shape=(None, n_steps, n_features)),
    layers.TimeDistributed(layers.MaxPooling1D(pool_size=2)),
    layers.TimeDistributed(layers.Flatten()),
    LSTM(50, activation='relu'),
    Dense(1)
])

  super().__init__(**kwargs)


In [42]:
model.compile(optimizer='adam', loss='mse')

In [43]:
model.summary()

In [44]:
%%time 
epochs=200 # number of times a complete dataset is passed
# Using defaults (epochs=1, batch_size=32, verbose=1)
history = model.fit(
  X,
  y,
  epochs=epochs,
  verbose=0
)

CPU times: user 4.5 s, sys: 424 ms, total: 4.93 s
Wall time: 4.57 s


In [46]:
# demonstrate prediction
x_input = array([60, 70, 80, 90])
x_input = x_input.reshape((1, n_seq, n_steps, n_features))

In [47]:
yhat = model.predict(x_input, verbose=0)
print(yhat)

[[100.05422]]


# ConvLSTM

A type of LSTM related to the CNN-LSTM is the ConvLSTM, where the convolutional reading of input is built directly into each LSTM unit.

The ConvLSTM was developed for reading two-dimensional spatial-temporal data, but can be adapted for use with univariate time series forecasting.

In [48]:
# define input sequence
raw_seq = [10, 20, 30, 40, 50, 60, 70, 80, 90]
# choose a number of time steps
n_steps = 4
# split into samples
X, y = split_sequence(raw_seq, n_steps)
# reshape from [samples, timesteps] into [samples, timesteps, rows, columns, features]
n_features = 1
n_seq = 2
n_steps = 2
X = X.reshape((X.shape[0], n_seq, 1, n_steps, n_features))

In [49]:
# define model
model = Sequential([
    layers.ConvLSTM2D(filters=64, kernel_size=(1,2), activation='relu', input_shape=(n_seq, 1, n_steps, n_features)),
    layers.Flatten(),
    Dense(1) 
])

  super().__init__(**kwargs)


In [50]:
model.compile(optimizer='adam', loss='mse')

In [51]:
model.summary()

In [52]:
%%time 
epochs=200 # number of times a complete dataset is passed
# Using defaults (epochs=1, batch_size=32, verbose=1)
history = model.fit(
  X,
  y,
  epochs=epochs,
  verbose=0
)

CPU times: user 4.64 s, sys: 569 ms, total: 5.21 s
Wall time: 4.81 s


In [55]:
# demonstrate prediction
x_input = array([60, 70, 80, 90])
x_input = x_input.reshape((1, n_seq, 1, n_steps, n_features))

In [56]:
yhat = model.predict(x_input, verbose=0)
print(yhat)

[[103.30241]]


# Multivariate LSTM Models !!!

Multivariate time series data means data where there is more than one observation for each time step.

In [66]:
# split a multivariate sequence into samples
def split_sequences(sequences, n_steps):
	X, y = list(), list()
	for i in range(len(sequences)):
		# find the end of this pattern
		end_ix = i + n_steps
		# check if we are beyond the dataset
		if end_ix > len(sequences):
			break
		# gather input and output parts of the pattern
		seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1, -1]
		X.append(seq_x)
		y.append(seq_y)
	return array(X), array(y)

In [58]:
# define input sequence
in_seq1 = array([10, 20, 30, 40, 50, 60, 70, 80, 90])
in_seq2 = array([15, 25, 35, 45, 55, 65, 75, 85, 95])
out_seq = array([in_seq1[i]+in_seq2[i] for i in range(len(in_seq1))])

In [59]:
out_seq

array([ 25,  45,  65,  85, 105, 125, 145, 165, 185])

In [60]:
# convert to [rows, columns] structure
in_seq1 = in_seq1.reshape((len(in_seq1), 1))
in_seq2 = in_seq2.reshape((len(in_seq2), 1))
out_seq = out_seq.reshape((len(out_seq), 1))

In [61]:
out_seq

array([[ 25],
       [ 45],
       [ 65],
       [ 85],
       [105],
       [125],
       [145],
       [165],
       [185]])

In [64]:
from numpy import hstack

# horizontally stack columns
dataset = hstack((in_seq1, in_seq2, out_seq))
dataset

array([[ 10,  15,  25],
       [ 20,  25,  45],
       [ 30,  35,  65],
       [ 40,  45,  85],
       [ 50,  55, 105],
       [ 60,  65, 125],
       [ 70,  75, 145],
       [ 80,  85, 165],
       [ 90,  95, 185]])

In [67]:
# choose a number of time steps
n_steps = 3
# convert into input/output
X, y = split_sequences(dataset, n_steps)
# the dataset knows the number of features, e.g. 2
n_features = X.shape[2]

In [68]:
X.shape

(7, 3, 2)

In [69]:
# define model
model = Sequential([
    LSTM(50, activation='relu', input_shape=(n_steps, n_features)),
    Dense(1) 
])

  super().__init__(**kwargs)


In [70]:
model.compile(optimizer='adam', loss='mse')

In [71]:
model.summary()

In [72]:
%%time 
epochs=200 # number of times a complete dataset is passed
# Using defaults (epochs=1, batch_size=32, verbose=1)
history = model.fit(
  X,
  y,
  epochs=epochs,
  verbose=0
)

CPU times: user 4.27 s, sys: 426 ms, total: 4.69 s
Wall time: 4.55 s


In [73]:
# demonstrate prediction
x_input = array([[80, 85], [90, 95], [100, 105]])
x_input = x_input.reshape((1, n_steps, n_features))

In [74]:
yhat = model.predict(x_input, verbose=0)
print(yhat) #should be 205

[[206.14046]]


# Multi-Step LSTM Models
A time series forecasting problem that requires a prediction of multiple time steps into the future can be referred to as multi-step time series forecasting

In [75]:
# split a univariate sequence into samples
def split_sequence(sequence, n_steps_in, n_steps_out):
	X, y = list(), list()
	for i in range(len(sequence)):
		# find the end of this pattern
		end_ix = i + n_steps_in
		out_end_ix = end_ix + n_steps_out
		# check if we are beyond the sequence
		if out_end_ix > len(sequence):
			break
		# gather input and output parts of the pattern
		seq_x, seq_y = sequence[i:end_ix], sequence[end_ix:out_end_ix]
		X.append(seq_x)
		y.append(seq_y)
	return array(X), array(y)

In [81]:
# define input sequence
raw_seq = [10, 20, 30, 40, 50, 60, 70, 80, 90]
# choose a number of time steps
n_steps_in, n_steps_out = 3, 2
# split into samples
X, y = split_sequence(raw_seq, n_steps_in, n_steps_out)
print(X.shape, y.shape)
print(X)
print(y)

(5, 3) (5, 2)
[[10 20 30]
 [20 30 40]
 [30 40 50]
 [40 50 60]
 [50 60 70]]
[[40 50]
 [50 60]
 [60 70]
 [70 80]
 [80 90]]


In [82]:
# reshape from [samples, timesteps] into [samples, timesteps, features]
n_features = 1
X = X.reshape((X.shape[0], X.shape[1], n_features))
y = y.reshape((y.shape[0], y.shape[1], n_features))

In [84]:
print(X.shape, y.shape)

(5, 3, 1) (5, 2, 1)


In [96]:
# define model
model = Sequential([
    LSTM(100, activation='relu', return_sequences=True, input_shape=(n_steps_in, n_features)),
    LSTM(100, activation='relu'),
    Dense(n_steps_out)
])

In [97]:
model.compile(optimizer='adam', loss='mse')

In [98]:
model.summary()

In [99]:
%%time 
epochs=200 # number of times a complete dataset is passed
# Using defaults (epochs=1, batch_size=32, verbose=1)
history = model.fit(
  X,
  y,
  epochs=epochs,
  verbose=0
)

CPU times: user 5.38 s, sys: 533 ms, total: 5.91 s
Wall time: 5.32 s


In [104]:
# demonstrate prediction
x_input = array([70, 80, 90])
x_input = x_input.reshape((1, n_steps_in, n_features))

In [105]:
yhat = model.predict(x_input, verbose=0)
print(yhat)

[[106.375244 118.58283 ]]


# Multivariate Multi-Step LSTM Models

In [106]:
# split a multivariate sequence into samples
def split_sequences(sequences, n_steps_in, n_steps_out):
	X, y = list(), list()
	for i in range(len(sequences)):
		# find the end of this pattern
		end_ix = i + n_steps_in
		out_end_ix = end_ix + n_steps_out-1
		# check if we are beyond the dataset
		if out_end_ix > len(sequences):
			break
		# gather input and output parts of the pattern
		seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1:out_end_ix, -1]
		X.append(seq_x)
		y.append(seq_y)
	return array(X), array(y)

In [107]:
# define input sequence
in_seq1 = array([10, 20, 30, 40, 50, 60, 70, 80, 90])
in_seq2 = array([15, 25, 35, 45, 55, 65, 75, 85, 95])
out_seq = array([in_seq1[i]+in_seq2[i] for i in range(len(in_seq1))])
# convert to [rows, columns] structure
in_seq1 = in_seq1.reshape((len(in_seq1), 1))
in_seq2 = in_seq2.reshape((len(in_seq2), 1))
out_seq = out_seq.reshape((len(out_seq), 1))
# horizontally stack columns
dataset = hstack((in_seq1, in_seq2, out_seq))
# choose a number of time steps
n_steps_in, n_steps_out = 3, 2
# covert into input/output
X, y = split_sequences(dataset, n_steps_in, n_steps_out)
# the dataset knows the number of features, e.g. 2
n_features = X.shape[2]

In [112]:
# define model
model = Sequential([
    LSTM(100, activation='relu', return_sequences=True, input_shape=(n_steps_in, n_features)),
    LSTM(100, activation='relu'),
    Dense(n_steps_out)
])

In [113]:
model.compile(optimizer='adam', loss='mse')

In [114]:
model.summary()

In [115]:
%%time 
epochs=200 # number of times a complete dataset is passed
# Using defaults (epochs=1, batch_size=32, verbose=1)
history = model.fit(
  X,
  y,
  epochs=epochs,
  verbose=0
)

CPU times: user 5.2 s, sys: 587 ms, total: 5.79 s
Wall time: 5.22 s


In [116]:
# demonstrate prediction
x_input = array([[70, 75], [80, 85], [90, 95]])
x_input = x_input.reshape((1, n_steps_in, n_features))

In [117]:
yhat = model.predict(x_input, verbose=0)
print(yhat)

[[187.42732 207.27602]]
