In [1]:
%tensorflow_version 2.x

TensorFlow 2.x selected.


In [0]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

In [3]:
tf.__version__

'2.1.0-rc1'

In [0]:
# Recurrent Neurons and Layers

In [0]:
# Output types
#(seq to seq: gives a sequence of outputs)
#(seq to vec: gives 1 output from the last cell, ignores the rest)
#(vec to seq: a sequence from 1 input vector)
#(encoder to decoder: sequence to vector to sequence)

In [0]:
# Training RNN
#(BPTT: backprop through time: unroll the network and backprop like normal)
#(the gradients flow through all outputs, hough some output mght be ignored)

In [0]:
# Time Series Forecasting

In [0]:
# Example time series
def generate_time_series(batch_size, n_steps):
  freq1, freq2, offset1, offset2 = np.random.rand(4, batch_size, 1)
  time = np.linspace(0, 1, n_steps)
  series = 0.5*np.sin((time-offset1) * (freq1*10 + 10))
  series += 0.2*np.sin((time-offset2) * (freq2*20 + 20))
  series += 0.1*(np.random.rand(batch_size, n_steps)-0.5)
  return series[..., np.newaxis].astype(np.float32)

In [0]:
n_steps=50
series = generate_time_series(10000, n_steps+1)
X_train, y_train = series[:7000, :n_steps], series[:7000, -1]
X_valid, y_valid = series[7000:9000, :n_steps], series[7000:9000, -1]
X_test, y_test = series[9000:, :n_steps], series[9000:, -1]

In [0]:
# Benchmark
#(using naive forecasting)
y_pred = X_valid[:, -1]
np.mean(keras.losses.mean_squared_error(y_valid, y_pred))

0.020888668

In [0]:
#(using a simple linear regression net)
model = keras.models.Sequential([
                                keras.layers.Flatten(input_shape=[50, 1]),
                                keras.layers.Dense(1)
])

model.compile(loss=keras.losses.mean_squared_error, optimizer="adam")
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_valid, y_valid))

Train on 7000 samples, validate on 2000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7ff0923755f8>

In [0]:
model.evaluate(X_test, y_test)



0.0035688097048550845

In [0]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense

In [0]:
# Simple RNN
model = Sequential([ 
                    SimpleRNN(1, input_shape=[None, 1])
])
model.compile(optimizer="adam", loss="mse")
model.fit(X_train, y_train, epochs=20, batch_size=32, verbose=0, validation_data=(X_valid, y_valid))

<tensorflow.python.keras.callbacks.History at 0x7ff0921b76a0>

In [0]:
model.evaluate(X_valid, y_valid)



0.14203766334056855

In [0]:
# Deep RNN

In [0]:
model = Sequential([ 
                    SimpleRNN(20, return_sequences=True, input_shape=[None, 1]),
                    SimpleRNN(20),
                    Dense(1)
])

In [0]:
model.compile(optimizer="adam", loss="mse")
model.fit(X_train, y_train, epochs=20, validation_data=(X_valid, y_valid))

Train on 7000 samples, validate on 2000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7ff07893b588>

In [0]:
model.evaluate(X_valid, y_valid)



0.002605174858123064

In [0]:
# Forecasting several steps ahead
#(use the old model, keeps predicting with the predicted values)
series = generate_time_series(1, n_steps+10)
X_new, y_new = series[:, :n_steps], series[:, n_steps:]
X = X_new
for step_ahead in range(10):
  y_pred_one = model.predict(X[:, step_ahead:])[:, np.newaxis, :]
  X = np.concatenate([X, y_pred_one], axis=1)
y_pred = X[:, n_steps:]

In [0]:
#(predicting a vector of 10 all at once)
#(requiring trnasforming the targets to be vectors of 10)
series = generate_time_series(10000, n_steps+10)
X_train, y_train = series[:7000, :n_steps], series[:7000, -10:, 0]
X_valid, y_valid = series[7000:9000, :n_steps], series[7000:9000, -10:, 0]
X_test, y_test = series[9000:, :n_steps], series[9000:, -10:, 0]

model = Sequential([ 
                    SimpleRNN(20, return_sequences=True, input_shape=[None, 1]),
                    SimpleRNN(20),
                    Dense(10)
])

model.compile(optimizer="adam", loss="mse")
model.fit(X_train, y_train, epochs=20, validation_data=(X_valid, y_valid))

y_pred = model.predict(X_new)

Train on 7000 samples, validate on 2000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [0]:
#(seq to seq approach: forecasts next 10 steps at every step)
#(more error gradients to train)
n_steps=50
series=generate_time_series(10000, n_steps+10)
X_train = series[:7000, :n_steps]
X_valid = series[7000:9000, :n_steps]
X_test = series[9000:, :n_steps]
y = np.empty((10000, n_steps, 10))   # target becomes a 10D vectors
for step_ahead in range(1, 10+1):
  y[:, :, step_ahead-1] = series[:, step_ahead:step_ahead+n_steps, 0]
y_train = y[:7000]
y_valid = y[7000:9000]
y_test = y[9000:]


from tensorflow.keras.layers import TimeDistributed

model = Sequential([ 
                    SimpleRNN(20, return_sequences=True, input_shape=[None, 1]),
                    SimpleRNN(20, return_sequences=True),
                    TimeDistributed(Dense(10))   #timedis is a wrapper that applies the wrapped layer at every step of the seq
])

#(the above will have every outputs calculated in metric, while only the last one is important)
def last_time_step_mse(y_true, y_pred):
  return keras.metrics.mean_squared_error(y_true[:, -1], y_pred[:, -1])

model.compile(loss="mse",
              optimizer=keras.optimizers.Adam(lr=0.01),
              metrics=[last_time_step_mse])

history = model.fit(X_train, y_train, epochs=20,
                    validation_data=(X_valid, y_valid))

Train on 7000 samples, validate on 2000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [0]:
#(for error bars seen in time series forecasting, use MC Dropout to get mean and std)

In [0]:
# Handling Long Sequences

In [0]:
# Unstable gradients
#(techinques like good initializer, faster optimizers, dropout still work, non-saturating activation functions dont)
#(non-saturating is bad because gradients get multiplied further down the time steps, leads to exploding gradients)
#(as such, saturating functions like tanh are better, or maybe use gradient clipping)
#(batch norm also doesnt work, since it will use the same scaling and offset on both hidden states and inputs, using it on inputs alone provide minimal pmprovement)
#(instead, use layer normalization, which also normalized, but mean and std are calculated feature-wise (instead of sample/batch wise))

In [0]:
# Add layer normalization to a RNN cell
class LNSimpleRNNCell(keras.layers.Layer):
  def __init__(self, units, activation="tanh", **kwargs):
    super().__init__(**kwargs)
    self.state_size = units
    self.output_size = units
    self.simple_rnn_cell = keras.layers.SimpleRNNCell(units, activation=None)
    self.layer_norm = keras.layers.LayerNormalization()
    self.activation = keras.activations.get(activation)
  def call(self, inputs, states):
    outputs, new_states = self.simple_rnn_cell(inputs, states)
    norm_outputs = self.activation(self.layer_norm(outputs))
    return norm_outputs, [norm_outputs]

In [0]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import RNN, TimeDistributed, Dense

model = Sequential([ 
                    RNN(LNSimpleRNNCell(20), return_sequences=True, input_shape=[None, 1]),
                    RNN(LNSimpleRNNCell(20), return_sequences=True),
                    TimeDistributed(Dense(10))
])

In [7]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
rnn_2 (RNN)                  (None, None, 20)          480       
_________________________________________________________________
rnn_3 (RNN)                  (None, None, 20)          860       
_________________________________________________________________
time_distributed (TimeDistri (None, None, 10)          210       
Total params: 1,550
Trainable params: 1,550
Non-trainable params: 0
_________________________________________________________________


In [0]:
# Short term memory problem

In [0]:
# LSTM cell
#(the state is split into 2 vectors, short-term and long-term, short term also acts as the output)
#(a main layer that takes the input and previous state)
#(a forget gate layer that drops parts of the long-term state (a log function output 0-1, multiply element wise))
#(an input gate layer that adds to the long-term state)
#(an output gate that outputs state and output)

In [0]:
# LSTM in keras
# Using lstm layer (optimized)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, LSTMCell, TimeDistributed, Dense, RNN

model = Sequential([ 
                    LSTM(20, return_sequences=True, input_shape=[None, 1]),
                    LSTM(20, return_sequences=True),
                    TimeDistributed(Dense(10))
])

# Alternatively use RNN leyer with lstmcell arg (for custom cell)
model = Sequential([ 
                    RNN(LSTMCell(20), return_sequences=True, input_shape=[None, 1]),
                    RNN(LSTMCell(20), return_sequences=True),
                    TimeDistributed(Dense(10))
])

In [0]:
# Peephole connections
#(let the gates look at the long term state as well)

In [0]:
# GRU cell
#(a peephole variant)
#(both states are in 1 output vector)
#(1 gate do both forgetting and adding, so only forgetting ỏ adding one at a time)
#(a seperate gate controls the previous state)

In [0]:
# 1D conv layers
model = Sequential([ 
                    Conv1D(filters=20, kernel_size=4, strides=2, padding="VALID", input_shape=[None, 1]),
                    GRU(20, return_sequences=True),
                    GRU(20, return_sequences=True),
                    TimeDistributed(Dense(10))
])

model.compile(loss="mse", optimizer="adam", metrics=[last_time_step_mse])
history = model.fit(X_train, y_train[:, 3::2], epochs=20,
                    validation_data=(X_valid, y_valid[:, 3::2]))

In [0]:
# WaveNet
#(stacks of 1D conv layers with kernel size of 2)
#(doubling dilation rate at every layer)
model = Sequential([ 
                    InputLayer(input_shape=[None, 1])
])
for rate in (1, 2, 4, 8)*2:
  model.add(Conv1D(filters=20, kernel_size=2, padding="CAUSAL", activation="relu", dilation_rate=rate))
model.add(Conv1D(filters=10, kernel_size=1))
model.compile(loss="mse", optimizer="adam", metrics=[last_time_step_mse])
model.fit(X_train, y_train, epochs=20, validation_data=(X_valid, y_valid))