# CS395 - Assignment 4
### Univarite LSTMs

Date: Feb 6, 2019  
By: Joshua Eli Swick

## Data Preparation

In [56]:
# import libs for all code below
from numpy import array
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Bidirectional
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import TimeDistributed
from tensorflow.keras.layers import Conv1D
from tensorflow.keras.layers import MaxPooling1D
from tensorflow.keras.layers import ConvLSTM2D

In [3]:
# split a univariate sequence into samples
def split_sequence(sequence, n_steps):
    X, y = list(), list()
    for i in range(len(sequence)):
        # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the sequence
        if end_ix > len(sequence)-1:
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)

In [4]:
# define input sequence
raw_seq = [10, 20, 30, 40, 50, 60, 70, 80, 90]
# choose a number of time steps
n_steps = 3
# split into samples
X, y = split_sequence(raw_seq, n_steps)
# summarize the data
for i in range(len(X)):
    print(X[i], y[i])

[10 20 30] 40
[20 30 40] 50
[30 40 50] 60
[40 50 60] 70
[50 60 70] 80
[60 70 80] 90


## Vanilla LSTM

### 1.
Run the given code 3 times. Show all outputs.

In [38]:
# reshape from [samples, timesteps] into [samples, timesteps, features]
n_features = 1
X = X.reshape((X.shape[0], X.shape[1], n_features))

In [37]:
# define model
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(n_steps, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X, y, epochs=200, verbose=0)

# demonstrate prediction
x_input = array([70, 80, 90])
x_input = x_input.reshape((1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(f"Vanilla LSTM output 1: {yhat}")

Vanilla LSTM output 1: [[102.019745]]


In [7]:
# define model
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(n_steps, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X, y, epochs=200, verbose=0)

# demonstrate prediction
x_input = array([70, 80, 90])
x_input = x_input.reshape((1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(f"Vanilla LSTM output 2: {yhat}")

Vanilla LSTM output 2: [[103.71929]]


In [16]:
# define model
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(n_steps, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X, y, epochs=200, verbose=0)

# demonstrate prediction
x_input = array([70, 80, 90])
x_input = x_input.reshape((1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(f"Vanilla LSTM output 3: {yhat}")

Vanilla LSTM output 3: [[101.76787]]


### 2.
Change the activation function from 'relu' to 2 others. Run 3 times, show all outputs.

In [8]:
# define model
model = Sequential()
# tanh activation
model.add(LSTM(50, activation='tanh', input_shape=(n_steps, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X, y, epochs=200, verbose=0)

# demonstrate prediction
x_input = array([70, 80, 90])
x_input = x_input.reshape((1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(f"Vanilla LSTM, TanH activation: {yhat}")

Vanilla LSTM, TanH activation: [[15.0022955]]


In [20]:
# define model
model = Sequential()
# tanh activation
model.add(LSTM(50, activation='tanh', input_shape=(n_steps, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X, y, epochs=200, verbose=0)

# demonstrate prediction
x_input = array([70, 80, 90])
x_input = x_input.reshape((1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(f"Vanilla LSTM, TanH activation: {yhat}")

Vanilla LSTM, TanH activation: [[13.831856]]


In [21]:
# define model
model = Sequential()
# tanh activation
model.add(LSTM(50, activation='tanh', input_shape=(n_steps, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X, y, epochs=200, verbose=0)

# demonstrate prediction
x_input = array([70, 80, 90])
x_input = x_input.reshape((1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(f"Vanilla LSTM, TanH activation: {yhat}")

Vanilla LSTM, TanH activation: [[10.571371]]


## Stacked LSTM

### 3.
Run give code 3 times. Show all outputs.

In [9]:
# define model
model = Sequential()
model.add(LSTM(50, activation='relu', return_sequences=True,
input_shape=(n_steps, n_features)))
model.add(LSTM(50, activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X, y, epochs=200, verbose=0)

# demonstrate prediction
x_input = array([70, 80, 90])
x_input = x_input.reshape((1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(f"Stacked LSTM, output 1: {yhat}")

Stacked LSTM, output 1: [[102.285774]]


In [23]:
# define model
model = Sequential()
model.add(LSTM(50, activation='relu', return_sequences=True,
input_shape=(n_steps, n_features)))
model.add(LSTM(50, activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X, y, epochs=200, verbose=0)

# demonstrate prediction
x_input = array([70, 80, 90])
x_input = x_input.reshape((1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(f"Stacked LSTM, output 2: {yhat}")

Stacked LSTM, output 2: [[103.066444]]


In [24]:
# define model
model = Sequential()
model.add(LSTM(50, activation='relu', return_sequences=True,
input_shape=(n_steps, n_features)))
model.add(LSTM(50, activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X, y, epochs=200, verbose=0)

# demonstrate prediction
x_input = array([70, 80, 90])
x_input = x_input.reshape((1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(f"Stacked LSTM, output 3: {yhat}")

Stacked LSTM, output 3: [[105.43467]]


### 4.
Change the activation function from 'relu' to 2 others. Run 3 times, show all outputs.

In [13]:
# define model
model = Sequential()
model.add(LSTM(50, activation='relu', return_sequences=True,
input_shape=(n_steps, n_features)))
model.add(LSTM(50, activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X, y, epochs=200, verbose=0)

# demonstrate prediction
x_input = array([70, 80, 90])
x_input = x_input.reshape((1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(f"Stacked LSTM, TanH activation, output 1: {yhat}")

Stacked LSTM, TanH activation, output 1: [[100.05167]]


In [14]:
# define model
model = Sequential()
model.add(LSTM(50, activation='relu', return_sequences=True,
input_shape=(n_steps, n_features)))
model.add(LSTM(50, activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X, y, epochs=200, verbose=0)

# demonstrate prediction
x_input = array([70, 80, 90])
x_input = x_input.reshape((1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(f"Stacked LSTM, TanH activation, output 2: {yhat}")

Stacked LSTM, TanH activation, output 2: [[101.3217]]


In [15]:
# define model
model = Sequential()
model.add(LSTM(50, activation='relu', return_sequences=True,
input_shape=(n_steps, n_features)))
model.add(LSTM(50, activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X, y, epochs=200, verbose=0)

# demonstrate prediction
x_input = array([70, 80, 90])
x_input = x_input.reshape((1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(f"Stacked LSTM, TanH activation, output 3: {yhat}")

Stacked LSTM, TanH activation, output 3: [[102.59919]]


### 5.
What are the primary differences between the Stacked LSTM and the Vanilla LSTM? Describe and explain what is occuring differently. When would you use the Stacked LSTM instead of the Vanilla LSTM and vise versa.

## Bidirectional LSTM

### 6.
Run the given code 3 times. Show all outputs.

In [18]:
# define model
model = Sequential()
model.add(Bidirectional(LSTM(50, activation='relu'), input_shape=(n_steps,
n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X, y, epochs=200, verbose=0)

# demonstrate prediction
x_input = array([70, 80, 90])
x_input = x_input.reshape((1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(f"Bidirectional LSTM, output 1: {yhat}")

Bidirectional LSTM, output 1: [[103.06263]]


In [19]:
# define model
model = Sequential()
model.add(Bidirectional(LSTM(50, activation='relu'), input_shape=(n_steps,
n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X, y, epochs=200, verbose=0)

# demonstrate prediction
x_input = array([70, 80, 90])
x_input = x_input.reshape((1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(f"Bidirectional LSTM, output 2: {yhat}")

Bidirectional LSTM, output 2: [[101.09529]]


In [20]:
# define model
model = Sequential()
model.add(Bidirectional(LSTM(50, activation='relu'), input_shape=(n_steps,
n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X, y, epochs=200, verbose=0)

# demonstrate prediction
x_input = array([70, 80, 90])
x_input = x_input.reshape((1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(f"Bidirectional LSTM, output 3: {yhat}")

Bidirectional LSTM, output 3: [[100.78896]]


### 7.
Change the activation function from 'relu' to 2 others. Run atleast 3 times, show outputs.

In [22]:
# define model
model = Sequential()
model.add(Bidirectional(LSTM(50, activation='tanh'), input_shape=(n_steps,
n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X, y, epochs=200, verbose=0)

# demonstrate prediction
x_input = array([70, 80, 90])
x_input = x_input.reshape((1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(f"Bidirectional LSTM, TanH activation, output 1: {yhat}")

Bidirectional LSTM, TanH activation, output 1: [[26.190388]]


In [23]:
# define model
model = Sequential()
model.add(Bidirectional(LSTM(50, activation='tanh'), input_shape=(n_steps,
n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X, y, epochs=200, verbose=0)

# demonstrate prediction
x_input = array([70, 80, 90])
x_input = x_input.reshape((1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(f"Bidirectional LSTM, TanH activation, output 2: {yhat}")

Bidirectional LSTM, TanH activation, output 2: [[26.09733]]


In [24]:
# define model
model = Sequential()
model.add(Bidirectional(LSTM(50, activation='tanh'), input_shape=(n_steps,
n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X, y, epochs=200, verbose=0)

# demonstrate prediction
x_input = array([70, 80, 90])
x_input = x_input.reshape((1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(f"Bidirectional LSTM, TanH activation, output 3: {yhat}")

Bidirectional LSTM, TanH activation, output 3: [[28.400946]]


### 8.
What are the primary differences between the Bidirectional LSTM and the Vanilla LSTM? Describe and explain what is occurring differently. When would you use the Bidirectional LSTM instead of the Vanilla
LSTM and vice versa?

## CNN LSTM

In [52]:
# choose a number of time steps
n_steps = 4
# split into samples
X, y = split_sequence(raw_seq, n_steps)
# reshape from [samples, timesteps] into [samples, subsequences, timesteps, features]
n_features = 1
n_seq = 2
n_steps = 2
X = X.reshape((X.shape[0], n_seq, n_steps, n_features))

### 9.
Run the given code 3 times. Show all outputs.

In [46]:
# define model
model = Sequential()
model.add(TimeDistributed(Conv1D(filters=64, kernel_size=1,activation='relu'),
                          input_shape=(None, n_steps, n_features)))
model.add(TimeDistributed(MaxPooling1D(pool_size=2)))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(50, activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X, y, epochs=500, verbose=0)

# demonstrate prediction
x_input = array([60, 70, 80, 90])
x_input = x_input.reshape((1, n_seq, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(f"CNN LSTM, output 1: {yhat}")

CNN LSTM, output 1: [[102.80783]]


In [47]:
# define model
model = Sequential()
model.add(TimeDistributed(Conv1D(filters=64, kernel_size=1,activation='relu'),
                          input_shape=(None, n_steps, n_features)))
model.add(TimeDistributed(MaxPooling1D(pool_size=2)))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(50, activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X, y, epochs=500, verbose=0)

# demonstrate prediction
x_input = array([60, 70, 80, 90])
x_input = x_input.reshape((1, n_seq, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(f"CNN LSTM, output 2: {yhat}")

CNN LSTM, output 2: [[102.35682]]


In [48]:
# define model
model = Sequential()
model.add(TimeDistributed(Conv1D(filters=64, kernel_size=1,activation='relu'),
                          input_shape=(None, n_steps, n_features)))
model.add(TimeDistributed(MaxPooling1D(pool_size=2)))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(50, activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X, y, epochs=500, verbose=0)

# demonstrate prediction
x_input = array([60, 70, 80, 90])
x_input = x_input.reshape((1, n_seq, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(f"CNN LSTM, output 3: {yhat}")

CNN LSTM, output 3: [[101.304375]]


### 10.
Change the activation function from 'relu' to 2 others. Run 3 times, show all outputs.

In [49]:
# define model
model = Sequential()
model.add(TimeDistributed(Conv1D(filters=64, kernel_size=1,activation='tanh'),
                          input_shape=(None, n_steps, n_features)))
model.add(TimeDistributed(MaxPooling1D(pool_size=2)))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(50, activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X, y, epochs=500, verbose=0)

# demonstrate prediction
x_input = array([60, 70, 80, 90])
x_input = x_input.reshape((1, n_seq, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(f"CNN LSTM, TanH activation, output 1: {yhat}")

CNN LSTM, TanH activation, output 1: [[94.09761]]


In [50]:
# define model
model = Sequential()
model.add(TimeDistributed(Conv1D(filters=64, kernel_size=1,activation='tanh'),
                          input_shape=(None, n_steps, n_features)))
model.add(TimeDistributed(MaxPooling1D(pool_size=2)))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(50, activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X, y, epochs=500, verbose=0)

# demonstrate prediction
x_input = array([60, 70, 80, 90])
x_input = x_input.reshape((1, n_seq, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(f"CNN LSTM, TanH activation, output 2: {yhat}")

CNN LSTM, TanH activation, output 2: [[93.87924]]


In [51]:
# define model
model = Sequential()
model.add(TimeDistributed(Conv1D(filters=64, kernel_size=1,activation='tanh'),
                          input_shape=(None, n_steps, n_features)))
model.add(TimeDistributed(MaxPooling1D(pool_size=2)))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(50, activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X, y, epochs=500, verbose=0)

# demonstrate prediction
x_input = array([60, 70, 80, 90])
x_input = x_input.reshape((1, n_seq, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(f"CNN LSTM, TanH activation, output 3: {yhat}")

CNN LSTM, TanH activation, output 3: [[96.94689]]


### 11.
What are the primary differences between the CNN LSTM and the Vanilla LSTM? Describe and explain what is occurring differently. When would you use the CNN LSTM instead of the Vanilla LSTM and vice versa?

## ConvLSTM

In [55]:
# choose a number of time steps
n_steps = 4
# split into samples
X, y = split_sequence(raw_seq, n_steps)
# reshape from [samples, timesteps] into [samples, timesteps, rows, columns, features]
n_features = 1
n_seq = 2
n_steps = 2
X = X.reshape((X.shape[0], n_seq, 1, n_steps, n_features))

### 12.
Run the given code 3 times. Show all outputs.

In [57]:
# define model
model = Sequential()
model.add(ConvLSTM2D(filters=64, kernel_size=(1,2), activation='relu',
                     input_shape=(n_seq, 1, n_steps, n_features)))
model.add(Flatten())
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X, y, epochs=500, verbose=0)

# demonstrate prediction
x_input = array([60, 70, 80, 90])
x_input = x_input.reshape((1, n_seq, 1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(f"Conv-LSTM, output 1: {yhat}")

Conv-LSTM, output 1: [[103.80588]]


In [58]:
# define model
model = Sequential()
model.add(ConvLSTM2D(filters=64, kernel_size=(1,2), activation='relu',
                     input_shape=(n_seq, 1, n_steps, n_features)))
model.add(Flatten())
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X, y, epochs=500, verbose=0)

# demonstrate prediction
x_input = array([60, 70, 80, 90])
x_input = x_input.reshape((1, n_seq, 1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(f"Conv-LSTM, output 2: {yhat}")

Conv-LSTM, output 2: [[103.72124]]


In [59]:
# define model
model = Sequential()
model.add(ConvLSTM2D(filters=64, kernel_size=(1,2), activation='relu',
                     input_shape=(n_seq, 1, n_steps, n_features)))
model.add(Flatten())
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X, y, epochs=500, verbose=0)

# demonstrate prediction
x_input = array([60, 70, 80, 90])
x_input = x_input.reshape((1, n_seq, 1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(f"Conv-LSTM, output 3: {yhat}")

Conv-LSTM, output 3: [[103.564896]]


### 13. 
Change the activation function from 'relu' to 2 others. Run 3 times, show all outputs.

In [60]:
# define model
model = Sequential()
model.add(ConvLSTM2D(filters=64, kernel_size=(1,2), activation='tanh',
                     input_shape=(n_seq, 1, n_steps, n_features)))
model.add(Flatten())
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X, y, epochs=500, verbose=0)

# demonstrate prediction
x_input = array([60, 70, 80, 90])
x_input = x_input.reshape((1, n_seq, 1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(f"Conv-LSTM, TanH activation, output 1: {yhat}")

Conv-LSTM, TanH activation, output 1: [[26.810284]]


In [61]:
# define model
model = Sequential()
model.add(ConvLSTM2D(filters=64, kernel_size=(1,2), activation='tanh',
                     input_shape=(n_seq, 1, n_steps, n_features)))
model.add(Flatten())
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X, y, epochs=500, verbose=0)

# demonstrate prediction
x_input = array([60, 70, 80, 90])
x_input = x_input.reshape((1, n_seq, 1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(f"Conv-LSTM, TanH activation, output 2: {yhat}")

Conv-LSTM, TanH activation, output 2: [[27.475664]]


In [62]:
# define model
model = Sequential()
model.add(ConvLSTM2D(filters=64, kernel_size=(1,2), activation='tanh',
                     input_shape=(n_seq, 1, n_steps, n_features)))
model.add(Flatten())
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X, y, epochs=500, verbose=0)

# demonstrate prediction
x_input = array([60, 70, 80, 90])
x_input = x_input.reshape((1, n_seq, 1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(f"Conv-LSTM, TanH activation, output 3: {yhat}")

Conv-LSTM, TanH activation, output 3: [[27.263178]]


### 14.
What are the primary differences between the Conv‐LSTM and the CNN LSTM? Describe and explain what is occurring differently. When would you use the Conv‐LSTM instead of the CNN LSTM and vice versa?

### 15.
Provide a single table that illustrates the different LSTM models, the activation functions used, and the average of the 3 results (predictions) you received to 3 decimal places. What are your general findings?

| LSTM Model | Activation Function Used | Avg. of 3 Results | General Findings |
| ---------- | ------------------------ | ----------------- | ---------------- |
| Vanilla    | relu                     | 100.000 ||
| Vanilla    | tanh                     | 100.000 ||
| Vanilla    | other                    | 100.000 ||
| Stacked    | relu                     | 100.000 ||
| Stacked    | tanh                     | 100.000 ||
| Stacked    | other                    | 100.000 ||
| Bidirectional | relu                  | 100.000 ||
| Bidirectional | tanh                  | 100.000 ||
| Bidirectional |other                  | 100.000 ||
| CNN        | relu                     | 100.000 ||
| CNN        | tanh                     | 100.000 ||
| CNN        | other                    | 100.000 ||
| Conv       | relu                     | 100.000 ||
| Conv       | tanh                     | 100.000 ||
| Conv       | other                    | 100.000 ||