## Preliminaries

### Paths

In [4]:
import os
import pathlib
import sys

In [5]:
if not 'google.colab' in str(get_ipython()):
    
    parts = pathlib.Path(os.getcwd()).parts    
    limit = max([index for index, value in enumerate(parts) if value == 'infections'])    
    parent = os.path.join(*list(parts[:(limit + 1)]))
    
    sys.path.append(os.path.join(parent, 'src'))


In [6]:
parent

'J:\\library\\premodelling\\projects\\infections'

<br>
<br>

### Libraries

In [None]:
%matplotlib inline

import datetime

import logging
import collections

import numpy as np
import pandas as pd


<br>

### Custom

In [None]:
import src.modelling.DataStreams
import src.modelling.DataReconstructions
import src.modelling.Differences
import src.modelling.DataNormalisation
import src.modelling.Estimates

<br>
<br>

### Logging

In [None]:
logging.basicConfig(level=logging.INFO,
                    format='\n\n%(message)s\n%(asctime)s.%(msecs)03d',
                        datefmt='%Y-%m-%d %H:%M:%S')
logger = logging.getLogger(__name__)

<br>
<br>

## Part II

### Setting-Up

A class for the data splitting fractions

In [None]:
Fraction = collections.namedtuple(
    typename='Fraction',
    field_names=['training', 'validating', 'testing'])

<br>

**Modelling Arguments**

> * Predict `output_steps` days into the future, based on `input_width` days of history

Herein

* $input\_width \in widths$  $\qquad$  [$widths$ is a range of input window values (days)]
* $output\_steps = 15$ days
  
And

* $label\_width = output\_steps$


In [None]:
Arguments = collections.namedtuple(
    typename='Arguments',
    field_names=['input_width', 'label_width', 'shift', 'training_', 'validating_', 'testing_', 'label_columns'])

In [None]:
widths = range(18, 40)
output_steps = 15

<br>
<br>

### Training, Validating, Testing Data

Foremost: The data sets for training, validating, and testing

In [None]:
training, validating, testing = src.modelling.DataStreams.DataStreams(root=parent, fraction=Fraction._make(
        (0.75, 0.15, 0.10))).exc()

In [None]:
logger.info(training.columns)

<br>
<br>

### Reconstruction

Reconstructions: Each data set is a concatenation of records from various NHS Trusts, however because the aim is a single predicting/forecasting model for all trusts, the data should be reconstructed ...

In [None]:
reconstructions = src.modelling.DataReconstructions.DataReconstructions()
training = reconstructions.exc(blob=training)
validating = reconstructions.exc(blob=validating)
testing = reconstructions.exc(blob=testing)

<br>
<br>

### Differences

Using difference values rather than actual values

In [None]:
differences = src.modelling.Differences.Differences()
training = differences.exc(blob=training)
validating = differences.exc(blob=validating)
testing = differences.exc(blob=testing)

<br>
<br>

### Normalisation

In [None]:
normalisation = src.modelling.DataNormalisation.DataNormalisation(reference=training)
training_ = normalisation.normalise(blob=training)
validating_ = normalisation.normalise(blob=validating)
testing_ = normalisation.normalise(blob=testing)

training_.drop(columns='point', inplace=True)
validating_.drop(columns='point', inplace=True)
testing_.drop(columns='point', inplace=True)

<br>
<br>

### Modelling

In [None]:
arguments = Arguments(input_width=None, label_width=output_steps, shift=output_steps,
                      training_=training_, validating_=validating_, testing_=testing_,
                      label_columns=['estimatedNewAdmissions'])

validations, tests = src.modelling.Estimates.Estimates(
    n_features=training_.shape[1], 
    output_steps=output_steps).exc(widths=widths, arguments=arguments)

logger.info(validations)
logger.info(tests)

<br>
<br>

### Delete DAG Diagrams

In [None]:
%%bash

rm -rf *.pdf