In [4]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
plt.rcParams.update({"figure.figsize": (8, 5), "figure.dpi": 120})
import os

In [6]:
# Creaet the function to label windowed data
def get_labelled_windows(x, horizon=8):
    """
    Creates labels for windowed dataset
    E.g. if horizon=1;
    Input: [0, 1, 2, 3, 4, 5, 6, 7] -> Output: ([0, 1, 2, 3, 4, 5, 6], [7])
    """
    return x[:, :-horizon], x[:, -horizon:]

def make_windows(x, window_size=8, horizon=8):
    """
    Turns a 1D array into a 2D array of sequential labelled windows of window_size with horizon 
    size labels. 
    """
    #1. Create a window of specific window_size (add the horizon on the end for labelling later)
    window_step = np.expand_dims(np.arange(window_size+horizon), axis=0)
    #2. Create a 2D array of multiple window steps (minus 1 to account for 0 indexing)
    window_indexes = window_step + np.expand_dims(np.arange(len(x) - (window_size+horizon-1)), axis=0).T 
    #print(f'Window indexes: \n{window_indexes, window_indexes.shape}')
    #3. Index on the target array (a time series) with 2D array of multiple window steps
    windowed_array = x[window_indexes]
    #print(windowed_array)
    #4. Get the labelled windows
    windows, labels = get_labelled_windows(windowed_array, horizon=horizon)
    return windows, labels

In [7]:
# Split train set and test set. 
def make_train_test_split(windows, labels, split_size=1):
    """
      Splits matching pairs of windows and labels into train and test splits.
  
      Returns:
      train_windows, test_windows, train_labels, test_labels
    """
    split_size = len(windows) - split_size
    train_windows = windows[:split_size]
    train_labels = labels[:split_size]
    test_windows = windows[split_size:]
    test_labels = labels[split_size:]
    return train_windows, test_windows, train_labels, test_labels

In [8]:
# Create the function to take in model predictions and truth values and return evaluation metrics
def evaluate_preds(y_true, y_pred):
    # Make sure float32 datatype for metric calculations
    y_true = tf.cast(y_true, dtype=tf.float32)
    y_pred = tf.cast(y_pred, dtype=tf.float32)

    # Calculate various evaluation metrics
    mae = tf.keras.metrics.mean_absolute_error(y_true, y_pred)[0]
    #print(f'MAE: {mae}')
    mse = tf.keras.metrics.mean_squared_error(y_true, y_pred)[0]
    #print(f'MSE: {mse}')
    rmse = tf.sqrt(mse)
    #print(f'RMSE: {rmse}')
    mape = tf.keras.metrics.mean_absolute_percentage_error(y_true, y_pred)[0]
    #print(f'MAPE: {mape}')

    return {"mae": mae.numpy(),
            "mse": mse.numpy(),
            "rmse": rmse.numpy(),
            "mape": mape.numpy()}

In [9]:
# Function to predict the model result
def make_preds(model, input_data):
    """
    Uses model to make predictions input_data.
    """
    forecast = model.predict(input_data)
    return tf.squeeze(forecast) # return 1D array of predictions

In [10]:
# Create a function to plot time series data
def plot_time_series(cfips, timesteps, values, format="-", start=0, end=None, label=None):
    """
    Plots timesteps (a series of points in time) against values (a series of values across timesteps).

    Parameters
    -----------
    timesteps : array of timesteps values
    values : array of values across time
    format : style of plot, default "."
    start : where to start the plot (setting a value will index from start of timesteps & values)
    end : where to end the plot (similar to start but for the end)
    label : label to show on plot about values
    """
    # Plot the series
    plt.plot(timesteps[start:end], values[start:end], format, label=label)
    plt.title(f'Cfips: {cfips}', fontsize=18)
    plt.xlabel("time", fontsize=14)
    plt.ylabel('MicroBusiness Density', fontsize=14)
    if label:
        plt.legend(fontsize=14) # make label bigger
    plt.grid(True)