## Preliminaries

### Paths

In [None]:
import os
import pathlib
import sys

In [None]:
if not 'google.colab' in str(get_ipython()):
    
    notebooks = os.path.split(os.getcwd())[0]
    root = str(pathlib.Path(notebooks).parent)
    sys.path.append(root)

<br>
<br>

### Libraries

In [None]:
%matplotlib inline

import datetime

import logging
import collections

import numpy as np
import pandas as pd

os.environ["CUDA_VISIBLE_DEVICES"]="-1"  
import tensorflow as tf

import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

import IPython
import IPython.display


<br>
<br>

### Custom

In [None]:
import src.prototyping.Settings

import src.modelling.DataStreams

import src.modelling.DataSplitting
import src.modelling.DataReconstructions
import src.modelling.Differences
import src.modelling.DataNormalisation

import src.modelling.WindowGenerator
import src.modelling.ModellingSteps

<br>

Settings:

> matplotlib.rcParams.update({'font.size': 13})<br>
> matplotlib.rcParams['text.usetex'] = False

> plt.rcParams['figure.constrained_layout.use'] = False        

> sns.set(font_scale=1)<br>
> sns.axes_style('whitegrid', {"axes.facecolor": ".9"})


In [None]:
src.prototyping.Settings.Settings().aesthetics()

<br>

### Logging

In [None]:
logging.basicConfig(level=logging.INFO,
                    format='\n%(message)s\n%(asctime)s.%(msecs)03d\n',
                    datefmt='%Y-%m-%d %H:%M:%S')
logger = logging.getLogger(__name__)

<br>
<br>

## Data: A Collection of Trusts

<br>

### The Splits


The training, validating, and testing data

In [None]:
Fraction = collections.namedtuple(typename='Fraction', field_names=['training', 'validating', 'testing'])
training, validating, testing = src.modelling.DataStreams.DataStreams(root=root, fraction=Fraction._make((0.75, 0.15, 0.10))).exc()

<br>

The split shapes

In [None]:
logger.info('training data %s', training.shape)
logger.info('validating data %s', validating.shape)
logger.info('testing data %s', testing.shape)

In [None]:
logger.info(training.columns)

<br>
<br>

### Variable Settings

<br>


**Reconstructions**

In [None]:
reconstructions = src.modelling.DataReconstructions.DataReconstructions()

In [None]:
training = reconstructions.exc(blob=training)
validating = reconstructions.exc(blob=validating)
testing = reconstructions.exc(blob=testing)

<br>

The shapes

In [None]:
logger.info('training data %s', training.shape)
logger.info('validating data %s', validating.shape)
logger.info('testing data %s', testing.shape)

In [None]:
logger.info(training.columns)

<br>
<br>

**Difference**

In [None]:
differences = src.modelling.Differences.Differences()

In [None]:
training = differences.exc(blob=training)
validating = differences.exc(blob=validating)
testing = differences.exc(blob=testing)

logger.info('training data %s', training.shape)
logger.info('validating data %s', validating.shape)
logger.info('testing data %s', testing.shape)

<br>
<br>

**Sample Graphs**

In [None]:
excerpt = training.copy()[['group', 'estimatedNewAdmissions', 'newDeaths28DaysByDeathDate']]
excerpt.iloc[:10000, 1:3].plot(subplots = True, ylim=(-100, 100), figsize=(6.5, 5.9));

<br>
<br>

**Normalisation**

* Via the means & deviations of the training data

In [None]:
normalisation = src.modelling.DataNormalisation.DataNormalisation(reference=training)

training_ = normalisation.normalise(blob=training)
validating_ = normalisation.normalise(blob=validating)
testing_ = normalisation.normalise(blob=testing)

<br>

The normalised values

In [None]:
logger.info('Training: %s', training_.shape)
logger.info('Validating: %s', validating_.shape)
logger.info('Testing: %s', testing_.shape)

In [None]:
training_.drop(columns='point', inplace=True)
validating_.drop(columns='point', inplace=True)
testing_.drop(columns='point', inplace=True)

logger.info(training_.columns)

<br>
<br>

## Windows


### Window


<br>

**Case**

> * Predict `output_steps` days into the future, based on `input_width` days of history

Therefore

* $sequence\_length = total\_window\_size = input\_width + output\_steps$

Noting that

* $sequence\_stride = 1$
* $batch\_size = 32$

**always**.

<br>

Hence, **defining a *modelling arguments* class** for declaring a set of model arguments, including windowing arguments

In [None]:
Arguments = collections.namedtuple(typename='Arguments', 
                                   field_names=['input_width', 'label_width', 'shift', 'training_', 'validating_', 'testing_', 'label_columns'])

<br>

**Initialising a *modelling arguments* class**

In [None]:
widths = range(27, 40)
output_steps = 15
input_width = widths[0]

In [None]:
arguments = Arguments(input_width=input_width, label_width=output_steps, shift=output_steps, 
                      training_=training_, validating_=validating_, testing_=testing_, 
                      label_columns=['estimatedNewAdmissions'])

<br>

**Initialising a modelling window generator**

In [None]:
window = src.modelling.WindowGenerator.WindowGenerator(
    input_width=arguments.input_width, label_width=arguments.label_width, shift=arguments.shift,
    training=arguments.training_, validating=arguments.validating_, testing=arguments.testing_, 
    label_columns=arguments.label_columns)

In [None]:
logger.info(window)

<br>

**Specifications**

In [None]:
logger.info(window.train.element_spec)
logger.info(window.validate.element_spec)
logger.info(window.test.element_spec)

<br>
<br>

## Modelling

In [None]:
n_features = training_.shape[1]

<br>

Initiate a generic modelling steps instance

In [None]:
steps = src.modelling.ModellingSteps.ModellingSteps()

<br>

Diagnostics

In [None]:
validations = pd.DataFrame(columns=['method', 'history', 'ahead', 'loss', 'mae'])
tests = pd.DataFrame(columns=['method', 'history', 'ahead', 'loss', 'mae'])

<br>
<br>

### Convolution

In [None]:
convolution_width = input_width

In [None]:
convolution = tf.keras.Sequential([    
    tf.keras.layers.Lambda(lambda x: x[:, -convolution_width:, :]),    
    tf.keras.layers.Conv1D(256, activation='relu', kernel_size=(convolution_width)),
    tf.keras.layers.Dense(output_steps * n_features, kernel_initializer=tf.initializers.zeros()),
    tf.keras.layers.Reshape([output_steps, n_features])
])

convolution_ = steps.modelling(model=convolution, window=window)

In [None]:
validations.loc[validations.shape[0], :] = ['CNN', input_width, output_steps] + convolution_.model.evaluate(window.validate, verbose=0)
tests.loc[tests.shape[0], :] = ['CNN', input_width, output_steps] + convolution_.model.evaluate(window.test, verbose=0)

<br>
<br>

### RNN

In [None]:
lstm = tf.keras.Sequential([    
    tf.keras.layers.LSTM(32, return_sequences=False),    
    tf.keras.layers.Dense(output_steps * n_features, kernel_initializer=tf.initializers.zeros()),    
    tf.keras.layers.Reshape([output_steps, n_features])
])

lstm_ = steps.modelling(model=lstm, window=window)

In [None]:
validations.loc[validations.shape[0], :] = ['LSTM', input_width, output_steps] + lstm_.model.evaluate(window.validate, verbose=0)
tests.loc[tests.shape[0], :] = ['LSTM', input_width, output_steps] + lstm_.model.evaluate(window.test, verbose=0)

<br>
<br>

### GRU

```python
gru = tf.keras.Sequential([
    tf.keras.layers.GRU(32, input_shape=(input_width, n_features), return_sequences=True, activation='relu', kernel_initializer=tf.initializers.HeUniform()),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.GRU(16, return_sequences=True, activation='relu'),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.GRU(output_steps * n_features, activation='relu'),
    tf.keras.layers.Reshape([output_steps, n_features])
])

gru_ = steps.modelling(model=gru, window=window)
```

```python
validations.loc[validations.shape[0], :] = ['GRU', input_width, output_steps] + gru_.model.evaluate(window.validate, verbose=0)
tests.loc[tests.shape[0], :] = ['GRU', input_width, output_steps] + gru_.model.evaluate(window.test, verbose=0)
```

<br>
<br>

## Performance

### Endpoints

In [None]:
validations

In [None]:
tests

<br>

### Histories

**Convolution**

In [None]:
convolution_history = pd.DataFrame(convolution_.history)
convolution_history.loc[:, 'method'] = 'CNN'
convolution_history.loc[:, 'history'] = input_width
convolution_history.loc[:, 'ahead'] = output_steps
convolution_history

<br>

**LSTM**

In [None]:
lstm_history = pd.DataFrame(data=lstm_.history)
lstm_history.loc[:, 'method'] = 'LSTM'
lstm_history.loc[:, 'history'] = input_width
lstm_history.loc[:, 'ahead'] = output_steps
lstm_history

<br>

**GRU**



```python
gru_history = pd.DataFrame(data=gru_.history)
gru_history.loc[:, 'method'] = 'GRU'
gru_history.loc[:, 'history'] = input_width
gru_history.loc[:, 'ahead'] = output_steps
gru_history
```