In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import os
if os.getcwd().endswith('notebooks'):
    os.chdir('..')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import json
from src.training import investigate_predictions, calculate_metrics, load_config, DATA_TIME_INTERVAL

Model comparison LSTM, GRU, RNN

In [6]:
model_names = ['000-baseline', '001-baseline', '002-rnn'] # change

model_results = []
for model_name in model_names:
    config = load_config(model_name)

    for i in range(config['repetitions'] if 'repetitions' in config else 1):
        with open(f'./models/{model_name}_{i+1}_meta.json') as f:
            data = json.load(f)
            model_results.append({
                'model_name': model_name,
                **{metric: scores[data['best_epoch']] for metric, scores in data['val_scores'].items()}
            })

model_results = pd.DataFrame(model_results)

In [7]:
means = model_results.groupby('model_name').mean().round(2)
stds = model_results.groupby('model_name').std().round(2)

table = means.astype(str).add(' ± ').add(stds.astype(str))
table.columns = table.columns.str.upper()
table = table.rename(columns={'GMSE': 'gMSE', 'GRMSE': 'gRMSE'})
table = table[['RMSE', 'MAPE', 'gRMSE']]
table.index.name = None
table

Unnamed: 0,RMSE,MAPE,gRMSE
000-baseline,12.33 ± 0.05,0.05 ± 0.0,14.56 ± 0.22
001-baseline,13.88 ± 0.02,0.06 ± 0.0,16.13 ± 0.14
002-rnn,12.11 ± 0.05,0.05 ± 0.0,14.2 ± 0.17


In [8]:
table.to_latex()

'\\begin{tabular}{llll}\n\\toprule\n & RMSE & MAPE & gRMSE \\\\\n\\midrule\n000-baseline & 12.33 ± 0.05 & 0.05 ± 0.0 & 14.56 ± 0.22 \\\\\n001-baseline & 13.88 ± 0.02 & 0.06 ± 0.0 & 16.13 ± 0.14 \\\\\n002-rnn & 12.11 ± 0.05 & 0.05 ± 0.0 & 14.2 ± 0.17 \\\\\n\\bottomrule\n\\end{tabular}\n'

Hidden states comparison

In [14]:
model_names = ['002-rnn', '011-rnn_hs512', '012-rnn_hs512_256_128', '013-rnn_hs_1024','014-rnn_hs_128'] # change

model_results = []
for model_name in model_names:
    config = load_config(model_name)

    for i in range(config['repetitions'] if 'repetitions' in config else 1):
        with open(f'./models/{model_name}_{i+1}_meta.json') as f:
            data = json.load(f)
            model_results.append({
                'model_name': model_name,
                **{metric: scores[data['best_epoch']] for metric, scores in data['val_scores'].items()}
            })

model_results = pd.DataFrame(model_results)

In [15]:
means = model_results.groupby('model_name').mean().round(2)
stds = model_results.groupby('model_name').std().round(2)

table = means.astype(str).add(' ± ').add(stds.astype(str))
table.columns = table.columns.str.upper()
table = table.rename(columns={'GMSE': 'gMSE', 'GRMSE': 'gRMSE'})
table = table[['RMSE', 'MAPE', 'gRMSE']]
table.index.name = None
table

Unnamed: 0,RMSE,MAPE,gRMSE
002-rnn,12.11 ± 0.05,0.05 ± 0.0,14.2 ± 0.17
011-rnn_hs512,12.1 ± 0.03,0.05 ± 0.0,14.16 ± 0.2
012-rnn_hs512_256_128,12.21 ± 0.08,0.05 ± 0.0,14.42 ± 0.12
013-rnn_hs_1024,12.12 ± 0.06,0.05 ± 0.0,14.16 ± 0.1
014-rnn_hs_128,12.37 ± 0.03,0.05 ± 0.0,14.56 ± 0.07


In [16]:
table.to_latex()

'\\begin{tabular}{llll}\n\\toprule\n & RMSE & MAPE & gRMSE \\\\\n\\midrule\n002-rnn & 12.11 ± 0.05 & 0.05 ± 0.0 & 14.2 ± 0.17 \\\\\n011-rnn_hs512 & 12.1 ± 0.03 & 0.05 ± 0.0 & 14.16 ± 0.2 \\\\\n012-rnn_hs512_256_128 & 12.21 ± 0.08 & 0.05 ± 0.0 & 14.42 ± 0.12 \\\\\n013-rnn_hs_1024 & 12.12 ± 0.06 & 0.05 ± 0.0 & 14.16 ± 0.1 \\\\\n014-rnn_hs_128 & 12.37 ± 0.03 & 0.05 ± 0.0 & 14.56 ± 0.07 \\\\\n\\bottomrule\n\\end{tabular}\n'

Number layer comparison

In [27]:
model_names = ['011-rnn_hs512', '021-rnn_layer_2', '022-rnn_layer_3']  # change

model_results = []
for model_name in model_names:
    config = load_config(model_name)

    for i in range(config['repetitions'] if 'repetitions' in config else 1):
        with open(f'./models/{model_name}_{i + 1}_meta.json') as f:
            data = json.load(f)
            model_results.append({
                'model_name': model_name,
                **{metric: scores[data['best_epoch']] for metric, scores in data['val_scores'].items()}
            })

model_results = pd.DataFrame(model_results)

In [28]:
means = model_results.groupby('model_name').mean().round(2)
stds = model_results.groupby('model_name').std().round(2)

table = means.astype(str).add(' ± ').add(stds.astype(str))
table.columns = table.columns.str.upper()
table = table.rename(columns={'GMSE': 'gMSE', 'GRMSE': 'gRMSE'})
table = table[['RMSE', 'MAPE', 'gRMSE']]
table.index.name = None
table

Unnamed: 0,RMSE,MAPE,gRMSE
011-rnn_hs512,12.1 ± 0.03,0.05 ± 0.0,14.16 ± 0.2
021-rnn_layer_2,12.09 ± 0.04,0.05 ± 0.0,14.08 ± 0.06
022-rnn_layer_3,12.08 ± 0.05,0.05 ± 0.0,14.0 ± 0.09


In [29]:
table.to_latex()

'\\begin{tabular}{llll}\n\\toprule\n & RMSE & MAPE & gRMSE \\\\\n\\midrule\n011-rnn_hs512 & 12.1 ± 0.03 & 0.05 ± 0.0 & 14.16 ± 0.2 \\\\\n021-rnn_layer_2 & 12.09 ± 0.04 & 0.05 ± 0.0 & 14.08 ± 0.06 \\\\\n022-rnn_layer_3 & 12.08 ± 0.05 & 0.05 ± 0.0 & 14.0 ± 0.09 \\\\\n\\bottomrule\n\\end{tabular}\n'

RNN dropout

In [34]:
model_names = ['022-rnn_layer_3', '031-rnn_dropout_0.1', '032-rnn_dropout_0.2', '033-rnn_dropout_0.3'] # change

model_results = []
for model_name in model_names:
    config = load_config(model_name)

    for i in range(config['repetitions'] if 'repetitions' in config else 1):
        with open(f'./models/{model_name}_{i+1}_meta.json') as f:
            data = json.load(f)
            model_results.append({
                'model_name': model_name,
                **{metric: scores[data['best_epoch']] for metric, scores in data['val_scores'].items()}
            })

model_results = pd.DataFrame(model_results)

In [35]:
means = model_results.groupby('model_name').mean().round(2)
stds = model_results.groupby('model_name').std().round(2)

table = means.astype(str).add(' ± ').add(stds.astype(str))
table.columns = table.columns.str.upper()
table = table.rename(columns={'GMSE': 'gMSE', 'GRMSE': 'gRMSE'})
table = table[['RMSE', 'MAPE', 'gRMSE']]
table.index.name = None
table

Unnamed: 0,RMSE,MAPE,gRMSE
022-rnn_layer_3,12.08 ± 0.05,0.05 ± 0.0,14.0 ± 0.09
031-rnn_dropout_0.1,12.12 ± 0.03,0.05 ± 0.0,14.16 ± 0.02
032-rnn_dropout_0.2,12.22 ± 0.06,0.05 ± 0.0,14.22 ± 0.08
033-rnn_dropout_0.3,12.39 ± 0.09,0.05 ± 0.0,14.3 ± 0.09


In [36]:
table.to_latex()

'\\begin{tabular}{llll}\n\\toprule\n & RMSE & MAPE & gRMSE \\\\\n\\midrule\n022-rnn_layer_3 & 12.08 ± 0.05 & 0.05 ± 0.0 & 14.0 ± 0.09 \\\\\n031-rnn_dropout_0.1 & 12.12 ± 0.03 & 0.05 ± 0.0 & 14.16 ± 0.02 \\\\\n032-rnn_dropout_0.2 & 12.22 ± 0.06 & 0.05 ± 0.0 & 14.22 ± 0.08 \\\\\n033-rnn_dropout_0.3 & 12.39 ± 0.09 & 0.05 ± 0.0 & 14.3 ± 0.09 \\\\\n\\bottomrule\n\\end{tabular}\n'

Fully connected layer dropout

In [40]:
model_names = ['022-rnn_layer_3', '041-fc_layer_dropout_0.1', '042-fc_layer_dropout_0.3', '043-fc_layer_dropout_0.4'] # change

model_results = []
for model_name in model_names:
    config = load_config(model_name)

    for i in range(config['repetitions'] if 'repetitions' in config else 1):
        with open(f'./models/{model_name}_{i+1}_meta.json') as f:
            data = json.load(f)
            model_results.append({
                'model_name': model_name,
                **{metric: scores[data['best_epoch']] for metric, scores in data['val_scores'].items()}
            })

model_results = pd.DataFrame(model_results)

In [41]:
means = model_results.groupby('model_name').mean().round(2)
stds = model_results.groupby('model_name').std().round(2)

table = means.astype(str).add(' ± ').add(stds.astype(str))
table.columns = table.columns.str.upper()
table = table.rename(columns={'GMSE': 'gMSE', 'GRMSE': 'gRMSE'})
table = table[['RMSE', 'MAPE', 'gRMSE']]
table.index.name = None
table

Unnamed: 0,RMSE,MAPE,gRMSE
022-rnn_layer_3,12.08 ± 0.05,0.05 ± 0.0,14.0 ± 0.09
041-fc_layer_dropout_0.1,12.08 ± 0.05,0.05 ± 0.0,14.0 ± 0.09
042-fc_layer_dropout_0.3,12.08 ± 0.05,0.05 ± 0.0,14.0 ± 0.09
043-fc_layer_dropout_0.4,12.08 ± 0.05,0.05 ± 0.0,14.0 ± 0.09


In [42]:
table.to_latex()

'\\begin{tabular}{llll}\n\\toprule\n & RMSE & MAPE & gRMSE \\\\\n\\midrule\n022-rnn_layer_3 & 12.08 ± 0.05 & 0.05 ± 0.0 & 14.0 ± 0.09 \\\\\n041-fc_layer_dropout_0.1 & 12.08 ± 0.05 & 0.05 ± 0.0 & 14.0 ± 0.09 \\\\\n042-fc_layer_dropout_0.3 & 12.08 ± 0.05 & 0.05 ± 0.0 & 14.0 ± 0.09 \\\\\n043-fc_layer_dropout_0.4 & 12.08 ± 0.05 & 0.05 ± 0.0 & 14.0 ± 0.09 \\\\\n\\bottomrule\n\\end{tabular}\n'

Prediction horizon

In [7]:
model_names = ['022-rnn_layer_3', '051-pred_horizon_15', '052-pred_horizon_45', '053-pred_horizon_60'] # change

model_results = []
for model_name in model_names:
    config = load_config(model_name)

    for i in range(config['repetitions'] if 'repetitions' in config else 1):
        with open(f'./models/{model_name}_{i+1}_meta.json') as f:
            data = json.load(f)
            model_results.append({
                'model_name': model_name,
                **{metric: scores[data['best_epoch']] for metric, scores in data['val_scores'].items()}
            })

model_results = pd.DataFrame(model_results)

In [8]:
means = model_results.groupby('model_name').mean().round(2)
stds = model_results.groupby('model_name').std().round(2)

table = means.astype(str).add(' ± ').add(stds.astype(str))
table.columns = table.columns.str.upper()
table = table.rename(columns={'GMSE': 'gMSE', 'GRMSE': 'gRMSE'})
table = table[['RMSE', 'MAPE', 'gRMSE']]
table.index.name = None
table

Unnamed: 0,RMSE,MAPE,gRMSE
022-rnn_layer_3,12.08 ± 0.05,0.05 ± 0.0,14.0 ± 0.09
051-pred_horizon_15,7.53 ± 0.02,0.03 ± 0.0,8.67 ± 0.12
052-pred_horizon_45,15.94 ± 0.06,0.07 ± 0.0,18.83 ± 0.24
053-pred_horizon_60,19.27 ± 0.08,0.08 ± 0.0,23.08 ± 0.16


In [9]:
table.to_latex()

'\\begin{tabular}{llll}\n\\toprule\n & RMSE & MAPE & gRMSE \\\\\n\\midrule\n022-rnn_layer_3 & 12.08 ± 0.05 & 0.05 ± 0.0 & 14.0 ± 0.09 \\\\\n051-pred_horizon_15 & 7.53 ± 0.02 & 0.03 ± 0.0 & 8.67 ± 0.12 \\\\\n052-pred_horizon_45 & 15.94 ± 0.06 & 0.07 ± 0.0 & 18.83 ± 0.24 \\\\\n053-pred_horizon_60 & 19.27 ± 0.08 & 0.08 ± 0.0 & 23.08 ± 0.16 \\\\\n\\bottomrule\n\\end{tabular}\n'

Train horizon

In [10]:
model_names = ['022-rnn_layer_3', '061-train_horizon_45', '062-train_horizon_75'] # change

model_results = []
for model_name in model_names:
    config = load_config(model_name)

    for i in range(config['repetitions'] if 'repetitions' in config else 1):
        with open(f'./models/{model_name}_{i+1}_meta.json') as f:
            data = json.load(f)
            model_results.append({
                'model_name': model_name,
                **{metric: scores[data['best_epoch']] for metric, scores in data['val_scores'].items()}
            })

model_results = pd.DataFrame(model_results)

In [11]:
means = model_results.groupby('model_name').mean().round(2)
stds = model_results.groupby('model_name').std().round(2)

table = means.astype(str).add(' ± ').add(stds.astype(str))
table.columns = table.columns.str.upper()
table = table.rename(columns={'GMSE': 'gMSE', 'GRMSE': 'gRMSE'})
table = table[['RMSE', 'MAPE', 'gRMSE']]
table.index.name = None
table

Unnamed: 0,RMSE,MAPE,gRMSE
022-rnn_layer_3,12.08 ± 0.05,0.05 ± 0.0,14.0 ± 0.09
061-train_horizon_45,12.1 ± 0.06,0.05 ± 0.0,14.13 ± 0.13
062-train_horizon_75,12.09 ± 0.05,0.05 ± 0.0,14.23 ± 0.05


In [12]:
table.to_latex()

'\\begin{tabular}{llll}\n\\toprule\n & RMSE & MAPE & gRMSE \\\\\n\\midrule\n022-rnn_layer_3 & 12.08 ± 0.05 & 0.05 ± 0.0 & 14.0 ± 0.09 \\\\\n061-train_horizon_45 & 12.1 ± 0.06 & 0.05 ± 0.0 & 14.13 ± 0.13 \\\\\n062-train_horizon_75 & 12.09 ± 0.05 & 0.05 ± 0.0 & 14.23 ± 0.05 \\\\\n\\bottomrule\n\\end{tabular}\n'