In [None]:
# Setup, Version check and Common imports

# Python ≥3.8 is required
import sys
assert sys.version_info >= (3, 8)


# TensorFlow ≥2.0 is required
import tensorflow as tf
assert tf.__version__ >= "2.0"

# Common imports
import numpy as np
import os

from tensorflow import keras
from tensorflow.keras import layers

# to make this notebook's output stable across runs
np.random.seed(42)

import matplotlib.pyplot as plt

plt.rc('font', size=14)
plt.rc('axes', labelsize=14, titlesize=14)
plt.rc('legend', fontsize=14)
plt.rc('xtick', labelsize=10)
plt.rc('ytick', labelsize=10)

In [None]:
# This function generates univariate time series
# Being univariate, it generates a single value at every timestep
# By default, the amount of noise is set to 0.1. It can be modified by specifying a third parameter when calling the function
# It generates nr_series series, each one with n_steps values
# It returns a NumPy array of shape [Batch Size, Timesteps, 1]

def generate_time_series(nr_series, n_steps, noise_factor):
    freq1, freq2, offsets1, offsets2 = np.random.rand(4, nr_series, 1)
    time = np.linspace(0, 1, n_steps)
    series = 0.5 * np.sin((time - offsets1) * (freq1 * 10 + 10))  #   wave 1
    series += 0.2 * np.sin((time - offsets2) * (freq2 * 20 + 20)) # + wave 2
    series += noise_factor * (np.random.rand(nr_series, n_steps) - 0.5)   # + noise
    return series[..., np.newaxis].astype(np.float32)

In [None]:
# Generate the datasets for training, validation and testing

np.random.seed(42)

n_steps = 50
noise_factor = 0.1

series = generate_time_series(10000, n_steps + 1, noise_factor)
X_train, y_train = series[:7000, :n_steps], series[:7000, -1]
X_valid, y_valid = series[7000:9000, :n_steps], series[7000:9000, -1]
X_test, y_test = series[9000:, :n_steps], series[9000:, -1]

In [None]:
print('Training: ', X_train.shape)
print('Validation: ', X_valid.shape)
print('Testing: ', X_test.shape)


In [None]:
# Function to visualize time series

def plot_series(series, y=None, y_pred=None, x_label="t", y_label="x(t)"):
    plt.plot(series, ".-")
    if y is not None:
        plt.plot(n_steps, y, "bx", markersize=10)
    if y_pred is not None:
        plt.plot(n_steps, y_pred, "ro")
    plt.grid(True)
    if x_label:
        plt.xlabel(x_label, fontsize=16)
    if y_label:
        plt.ylabel(y_label, fontsize=16, rotation=0)
    plt.hlines(0, 0, 100, linewidth=1)
    plt.axis([0, n_steps + 1, -1, 1])

In [None]:
# Plot 3 examples of time series
# The X mark highlights the value to predict in the next step

fig, axes = plt.subplots(nrows=1, ncols=3, sharey=True, figsize=(12, 4))
for col in range(3):
    plt.sca(axes[col])
    plot_series(X_valid[col, :, 0], y_valid[col, 0],
                y_label=("$x(t)$" if col==0 else None))
plt.show()

**Baseline Approaches**

In [None]:
# Baseline 1

# The predictor returns the last value of the series
# We don't even need a model for this baseline

# Baseline 1 is evaluated in the test dataset

y_pred = X_test[:, -1]

difference = np.absolute(y_pred - y_test)
print('Min: ', np.min(difference))
print('Max: ', np.max(difference))

# MSE Test error

print('MSE: ', np.mean(keras.losses.mean_squared_error(y_test, y_pred)))

In [None]:
# Plot the error in the first test timeseries
# The red circle identifies the prediction and the X mark the correct value

# Try other timeseries
serie = 0

plot_series(X_test[serie, :, 0], y_test[serie, 0], y_pred[serie, 0])
plt.show()

In [None]:
# Baseline 2

# Rely on a simple fully connected network the linearly combines inputs to predict the next value
# Training finds the best weights of the linear combination

keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

# The Flatten layer linearizes inputs (the 5o values of the timeseries)
# The output is the next predicted value

model = keras.models.Sequential([
    layers.Flatten(input_shape=[50, 1]),
    layers.Dense(1)
])

In [None]:
model.summary()

In [None]:
model.compile(loss="mse", optimizer="adam")
history = model.fit(X_train, y_train, epochs=20,
                    validation_data=(X_valid, y_valid))

In [None]:
# MSE test error

model.evaluate(X_test, y_test)

In [None]:
# Plot the error in the first test timeseries
# The red circle identifies the prediction and the X mark the correct value

# Try other timeseries

serie = 0
y_pred = model.predict(X_valid)
plot_series(X_valid[serie, :, 0], y_valid[serie, 0], y_pred[serie, 0])
plt.show()

In [None]:
# Collect the absolute values of the weights of the neural network (at the end of training)


weights, bias = model.layers[1].get_weights()
weights = np.absolute(weights)

# Plot the weight values

plt.plot(weights)

Quiz:

1. Compare the performace of the 2 baselines

2. Analyze the weight values that appear in the plot. How do you interpret the pattern of the line?

**RNN Approaches**

In [None]:
# We already have MSE values obtained with two simple baseline approaches

# Now we can move to recurrent networks and check how they can improve performance

# https://www.tensorflow.org/guide/keras/rnn
# https://www.tensorflow.org/api_docs/python/tf/keras/layers/SimpleRNN

# RNN 1

# It relies on a single RNN cell that processes input sequentialy and outputs a prediction

keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)


# We don't have to specify the length of the input

modelR1 = keras.models.Sequential([
    layers.SimpleRNN(1, input_shape=[None, 1])
])

In [None]:

modelR1.summary()

In [None]:

modelR1.compile(loss="mse", optimizer="adam")

history = modelR1.fit(X_train, y_train, epochs=20,
                    validation_data=(X_valid, y_valid))

In [None]:
# Evaluate MSE on test set

modelR1.evaluate(X_test, y_test)

Quiz:

1. How many parameters does this network have? What are they used for?

2. Compare the performance with the baseline approaches. How do you analyze results?

In [None]:
#RNN 2

# Add several layers and several RNN per layer, aiming at enhancing performance (Deep RNN)

# Two recurrent layers, each with 3 RNN cells
# A final Dense layer with just one cell to output the prediction (a single value, do we don't need recurrent cells here)

keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

# Important detail
# Parameter return_sequences specifies whether the whole sequence or just the final value is passed to the next layer
# By default is has value False

modelR2 = keras.models.Sequential([

     #### Complete the model #####

    layers.Dense(1)
])


In [None]:
modelR2.summary()

In [None]:
modelR2.compile(loss="mse", optimizer="adam")

history = modelR2.fit(X_train, y_train, epochs=20,
                    validation_data=(X_valid, y_valid))

In [None]:
# Evalue MSE on test set

modelR2.evaluate(X_test, y_test)

Quiz:

1. How many parameters does this network have? What are they used for?

2. What is the activation function of the Dense layer? Why?

3. Compare the performance with the previous approaches. How do you analyze results?

3. Create a new variant of modelR2. It should keep the number of layers, but with a higher number of RNN cells in each one (e.g., 10). Train this variant and analyze its performance on the test set.

In [None]:
# RNN 3

# Replace simple RNN cells by LSTM and/or GRU cells

# https://www.tensorflow.org/api_docs/python/tf/keras/layers/LSTM
# https://www.tensorflow.org/api_docs/python/tf/keras/layers/GRU

# Seek for a good configuration for a Deep RNN containing LSTM and/or GRU cells. The model should comply with the following constraints:
#  1. Two hidden layers with LSTM / GRU cells
#  2. Maximum of 5000 trainable parameters

keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

modelR3 = keras.models.Sequential([

    ### Complete the Model ###

    layers.Dense(1)

])

In [None]:
modelR3.summary()

In [None]:
modelR3.compile(loss="mse", optimizer="adam")

history = modelR3.fit(X_train, y_train, epochs=20,
                    validation_data=(X_valid, y_valid))

In [None]:
# Evaluate MSE on test set

modelR3.evaluate(X_test, y_test)

Quiz:

1. How many parameters does this network have? What are they used for?

2. Compare the performance with the previous approaches. How do you analyze results?