# Oze challenge example
## Imports

In [None]:
import os
from pathlib import Path
import numpy as np
import skorch
import torch
from skorch.callbacks import EarlyStopping
from matplotlib import pyplot as plt

from time_series_predictor import TimeSeriesPredictor
from time_series_models import BenchmarkLSTM
from oze_dataset import npz_check, OzeNPZDataset

## Config

In [None]:
plot_config = {}
plot_config['training progress'] = True
plot_config['prediction on training data'] = True
plot_config['forecast'] = True

forecast_config = {}
forecast_config['include history'] = True
forecast_config['steps ahead'] = 500

predictor_config = {}
predictor_config['epochs'] = 300
predictor_config['learning rate'] = 2e-2
predictor_config['hidden dim'] = 20
predictor_config['layers num'] = 4
predictor_config['patience'] = 30
predictor_config['dropout'] = 0.2
predictor_config['train shuffle'] = True
predictor_config['weight decay'] = 1E-5
predictor_config['bidirectional'] = True
predictor_config['train split'] = 10

config = {}
config['plot'] = plot_config
config['forecast'] = forecast_config
config['predictor'] = predictor_config
config['predict on training data enabled'] = True
config['forecast enabled'] = True

## Time Series Predictor instantiation

In [None]:
tsp = TimeSeriesPredictor(
    BenchmarkLSTM(
        hidden_dim=config['predictor']['hidden dim'],
        num_layers=config['predictor']['layers num'],
        dropout = config['predictor']['dropout'],
        bidirectional=config['predictor']['bidirectional']
    ),
    # Shuffle training data on each epoch
    iterator_train__shuffle=config['predictor']['train shuffle'],
    optimizer__weight_decay=config['predictor']['weight decay'],
    early_stopping=EarlyStopping(patience=config['predictor']['patience']),
    lr=config['predictor']['learning rate'],
    max_epochs=config['predictor']['epochs'],
    train_split=skorch.dataset.CVSplit(config['predictor']['train split']),
    optimizer=torch.optim.Adam
)

## Training process

In [None]:
credentials = {'user_name': os.environ.get('CHALLENGE_USER_NAME'), 'user_password': os.environ.get('CHALLENGE_USER_PASSWORD')}
ds = OzeNPZDataset(
    dataset_path=npz_check(
        Path(os.path.abspath(''), 'datasets'),
        'dataset',
        credentials=credentials
    )
)
# Slice matrix to speed up 
ds.x = ds.x[-100:, :, :]
ds.y = ds.y[-100:, :, :]
tsp.fit(ds)


### Plot training evolution

In [None]:
if config['plot']['training progress']:
    history_length = len(tsp.ttr.regressor_['regressor'].history)
    train_loss = np.zeros((history_length, 1))
    valid_loss = np.zeros((history_length, 1))
    for epoch in tsp.ttr.regressor_['regressor'].history:
        epoch_number = epoch['epoch']-1
        train_loss[epoch_number] = epoch['train_loss']
        valid_loss[epoch_number] = epoch['valid_loss']
    _, axes_one = plt.subplots(figsize=(20, 20))
    plt.plot(train_loss, 'o-', label='training')
    plt.plot(valid_loss, 'o-', label='validation')
    axes_one.set_xlabel('Epoch')
    axes_one.set_ylabel('MSE')
    plt.legend()

## Prediction on training data

In [None]:
if config['predict on training data enabled']:
    # Select training example
    idx = np.random.randint(0, len(tsp.dataset))
    dataloader = tsp.ttr.regressor['regressor'].get_iterator(tsp.dataset)
    x, y = dataloader.dataset[idx]

    # Run predictions
    netout = tsp.sample_predict(x)

    d_output = netout.shape[1]
    for idx_output_var in range(d_output):
        # Select real passengers data
        y_true = y[:, idx_output_var]

        y_pred = netout[:, idx_output_var]

        if config['plot']['prediction on training data']:
            plt.figure(figsize=(20, 20))
            axes_two = plt.subplot(d_output, 1, idx_output_var+1)

            plt.plot(y_true, label="Truth")
            plt.plot(y_pred, label="Prediction")
            plt.title(tsp.dataset.labels['X'][idx_output_var])
            plt.legend()

## Future forecast

In [None]:
# Run forecast
if config['forecast enabled']:
    netout = tsp.forecast(config['forecast']['steps ahead'],
                          include_history=config['forecast']['include history'])

    d_output = netout.shape[-1]
    # Select any training example just for comparison
    idx = np.random.randint(0, len(ds))
    dataloader = tsp.ttr.regressor['regressor'].get_iterator(tsp.dataset)
    x, y = dataloader.dataset[idx]
    for idx_output_var in range(d_output):
        # Select real passengers data
        y_true = y[:, idx_output_var]

        y_pred = netout[idx, :, idx_output_var]

        if config['plot']['forecast']:
            plt.figure(figsize=(20, 20))
            axes_three = plt.subplot(d_output, 1, idx_output_var+1)

            if config['forecast']['include history']:
                plot_args = [y_pred]
            else:
                y_pred_index = [i+tsp.dataset.get_x_shape()[1]+1 for i in range(len(y_pred))]
                plot_args = [y_pred_index, y_pred]
            plt.plot(*plot_args, label="Prediction + forecast")
            plt.plot(y_true, label="Truth")
            plt.title(tsp.dataset.labels['X'][idx_output_var])
            plt.legend()