#Imports

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

#Utilities

In [None]:
def plot_series(time, series, format="-", start=0, end=None):
    """
    Visualizes time series data

    Args:
      time (array of int) - contains the time steps
      series (array of int) - contains the measurements for each time step
      format - line style when plotting the graph
      start - first time step to plot
      end - last time step to plot
    """

    # Setup dimensions of the graph figure
    plt.figure(figsize=(10, 6))

    if type(series) is tuple:

      for series_num in series:
        # Plot the time series data
        plt.plot(time[start:end], series_num[start:end], format)

    else:
      # Plot the time series data
      plt.plot(time[start:end], series[start:end], format)

    # Label the x-axis
    plt.xlabel("Time")

    # Label the y-axis
    plt.ylabel("Value")

    # Overlay a grid on the graph
    plt.grid(True)

    # Draw the graph on screen
    plt.show()

def trend(time, slope=0):
    """
    Generates synthetic data that follows a straight line given a slope value.

    Args:
      time (array of int) - contains the time steps
      slope (float) - determines the direction and steepness of the line

    Returns:
      series (array of float) - measurements that follow a straight line
    """

    # Compute the linear series given the slope
    series = slope * time

    return series

def seasonal_pattern(season_time):
    """
    Just an arbitrary pattern, you can change it if you wish

    Args:
      season_time (array of float) - contains the measurements per time step

    Returns:
      data_pattern (array of float) -  contains revised measurement values according
                                  to the defined pattern
    """

    # Generate the values using an arbitrary pattern
    data_pattern = np.where(season_time < 0.4,
                    np.cos(season_time * 2 * np.pi),
                    1 / np.exp(3 * season_time))

    return data_pattern

def seasonality(time, period, amplitude=1, phase=0):
    """
    Repeats the same pattern at each period

    Args:
      time (array of int) - contains the time steps
      period (int) - number of time steps before the pattern repeats
      amplitude (int) - peak measured value in a period
      phase (int) - number of time steps to shift the measured values

    Returns:
      data_pattern (array of float) - seasonal data scaled by the defined amplitude
    """

    # Define the measured values per period
    season_time = ((time + phase) % period) / period

    # Generates the seasonal data scaled by the defined amplitude
    data_pattern = amplitude * seasonal_pattern(season_time)

    return data_pattern

def noise(time, noise_level=1, seed=None):
    """Generates a normally distributed noisy signal

    Args:
      time (array of int) - contains the time steps
      noise_level (float) - scaling factor for the generated signal
      seed (int) - number generator seed for repeatability

    Returns:
      noise (array of float) - the noisy signal
    """

    # Initialize the random number generator
    rnd = np.random.RandomState(seed)

    # Generate a random number for each time step and scale by the noise level
    noise = rnd.randn(len(time)) * noise_level

    return noise

#Generate synthetic data

In [None]:
# Parameters
time = np.arange(4 * 365 + 1, dtype="float32")
baseline = 10
amplitude = 40
slope = 0.05
noise_level = 5

# Create the series
series = baseline + trend(time, slope) + seasonality(time, period=365, amplitude=amplitude)

# Update with noise
series += noise(time, noise_level, seed=42)

# Plot the results
plot_series(time, series)

#Split dataset

In [None]:
#define split time
split_time = 1000

#train set
time_train = time[:split_time]
x_train = series[:split_time]

#validation set
time_valid = time[split_time:]
x_valid = series[split_time:]

#Prepare features and labels

In [None]:
#parameters

window_size = 20
batch_size = 32
shuffle_buffer_size = 1000

In [None]:
#dataset window function
def windowed_dataset(series, window_size, batch_size, shuffle_buffer)
  dataset = tf.data.Dataset.from_tensor_slices(series) #generate a TF dataset
  dataset = dataset.window(window_size+1, shift=1, drop_remainder=True) #window the data
  dataset = dataset.flat_map(lambda window: window.batch(window_size + 1)) #flatten the windows
  dataset = dataset.map(lambda window: (window[:-1], window[-1])) #tuples w/ features + labels
  dataset = dataset.shuffle(shuffle_buffer) #shuffle the windows
  dataset = dataset.batch(batch_size).prefetch(1) #create batches of windows
  return dataset

In [None]:
#generate dataset windows
dataset = windowed_dataset(x_train, window_size, batch_size, shuffle_buffer_size)

#Build the Model

We make a model made of `SimpleRNN` layers (which just routs its output back to the input).
- It expects 2-dimensional tensor input with the shape `[batch, timesteps, feature]`. ith that, you need to reshape your window from `(32, 20)` to `(32, 20, 1)` using Lambda layers.
- SimpleRNN uses tahn by default (outputs -1 to 1), so we use a lambda layer to scale the output by 100.

In [None]:
#build the model
model_tune = tf.keras.models.Sequential([
    tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1),
                           input_shape=[window_size]),
    tf.keras.layers.SimpleRNN(40, return_sequences=True),
    tf.keras.layers.SimpleRNN(40),
    tf.keras.layers.Dense(1),
    tf.keras.layers.Lambda(lambda x: x * 100.0)
])

#Tune the learning rate

In [None]:
#Set the learning rate scheduler
lr_schedule = tf.keras.callbacks.LearningRateScheduler(
    lambda epoch: 1e-8 * 10**(epoch/20))

#initialize optimizer
optimizer = tf.keras.optimizers.SGD(momentum=0.9)

#set the training parameters
model_tune.compile(loss=tf.keras.losses.Huber(), optimizer=optimizer)

#train the model
history = model_tune.fit(dataset, epochs=100, callbacks=[lr_schedule])

We can visualize the results to pick an optimal learning rate.

In [None]:
# Define the learning rate array
lrs = 1e-8 * (10 ** (np.arange(100) / 20))

# Set the figure size
plt.figure(figsize=(10, 6))

# Set the grid
plt.grid(True)

# Plot the loss in log scale
plt.semilogx(lrs, history.history["loss"])

# Increase the tickmarks size
plt.tick_params('both', length=10, width=1, which='both')

# Set the plot boundaries
plt.axis([1e-8, 1e-3, 0, 50])

We can change the boundaries of the graph so it will look closer.

In [None]:
# Set the figure size
plt.figure(figsize=(10, 6))

# Set the grid
plt.grid(True)

# Plot the loss in log scale
plt.semilogx(lrs, history.history["loss"])

# Increase the tickmarks size
plt.tick_params('both', length=10, width=1, which='both')

# Set the plot boundaries
plt.axis([1e-7, 1e-4, 0, 20])

We can choose an optimal learning rate based on the results.

#Train the model

In [None]:
# Build the model
model = tf.keras.models.Sequential([
  tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1),
                      input_shape=[window_size]),
  tf.keras.layers.SimpleRNN(40, return_sequences=True),
  tf.keras.layers.SimpleRNN(40),
  tf.keras.layers.Dense(1),
  tf.keras.layers.Lambda(lambda x: x * 100.0)
])

# Set the learning rate
learning_rate = 1e-6

# Set the optimizer
optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.9)

# Set the training parameters
model.compile(loss=tf.keras.losses.Huber(),
              optimizer=optimizer,
              metrics=["mae"])

# Train the model
history = model.fit(dataset,epochs=100)

#Model Prediction

In [None]:
#Initialize a list
forecast = []

#reduce the original series
forecast_series = series[split_time - window_size:]

#use model to predict data points per window size
for time in range(len(forecast_series) - window_size):
  forecast.append(model.predict(forecast_series[time:time + window_size][np.newaxis]))

#convert to numpy array
results = np.array(forecast).squeeze()

#plot results
plot_series(time_valid, (x_valid, results))

- You can optimize this step by leveraging Tensorflow models' capability to process batches. Instead of running the for-loop above which **processes a single window at a time**, you can pass in an entire batch of windows and let the model process that in parallel.
- It does not shuffle the windows. That's because we want the output to be in its proper sequence so we can compare it properly to the validation set.

In [None]:
def model_forecast(model, series, window_size, batch_size):
    """Uses an input model to generate predictions on data windows

    Args:
      model (TF Keras Model) - model that accepts data windows
      series (array of float) - contains the values of the time series
      window_size (int) - the number of time steps to include in the window
      batch_size (int) - the batch size

    Returns:
      forecast (numpy array) - array containing predictions
    """

    # Generate a TF Dataset from the series values
    dataset = tf.data.Dataset.from_tensor_slices(series)

    # Window the data but only take those with the specified size
    dataset = dataset.window(window_size, shift=1, drop_remainder=True)

    # Flatten the windows by putting its elements in a single batch
    dataset = dataset.flat_map(lambda w: w.batch(window_size))

    # Create batches of windows
    dataset = dataset.batch(batch_size).prefetch(1)

    # Get predictions on the entire dataset
    forecast = model.predict(dataset)

    return forecast