In [None]:
import sys
sys.path.append('..')
%load_ext autoreload
%autoreload 2
%matplotlib inline


In [None]:
import pickle
from datetime import datetime

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns


from dataloader.builder import build_dataset
from model.mlp import MLP
from model.ensemble import MLPEnsemble
from uncertainty_estimator.masks import build_masks, DEFAULT_MASKS
from experiment_setup import build_estimator
from analysis.metrics import get_uq_metrics
from experiments.utils.data import scale, split_ood, multiple_kfold

plt.rcParams['figure.facecolor'] = 'white'

In [None]:
import torch
torch.cuda.set_device(1)

In [None]:
config = {
    'nn_runs': 100,
    'runs': 2,
    'max_runs': 20,
    'k_folds': 10,
    'verbose': False,
    'layers': [8, 256, 256, 128, 1],
    'epochs': 10_0,
    'validation_step': 50,
    'acc_percentile': 0.1,
    'patience': 3,
    'dropout_rate': 0.2,
    'dropout_uq': 0.5,
    'batch_size': 256,
    'dataset': 'ccpp',
    'l2_reg': 1e-4,
    'ood_percentile': 90,
    'with_ensemble': True
}


### Get data

In [None]:
dataset = build_dataset(config['dataset'], val_split=0.01) 
x_all, y_all = dataset.dataset('train')
x_set, y_set, x_ood, y_ood = split_ood(x_all, y_all, config['ood_percentile'])
config['layers'][0] = x_all.shape[-1]



In [None]:
masks = build_masks(DEFAULT_MASKS)


### Defining support functions


In [None]:
# TODO: refactor evaluations to make it more DRY
def evaluate_ensemble(model, x_val, y_val, y_scaler, tag='standard'):
    predictions = model(x_val).cpu().numpy()
    errors = np.abs(predictions - y_val)
    scaled_errors = y_scaler.inverse_transform(predictions) - y_scaler.inverse_transform(y_val)
    rmse = np.sqrt(np.mean(np.square(scaled_errors)))
    results = []
    
    estimator = build_estimator('eue', model)
    for run in range(config['runs']):
        estimations = estimator.estimate(x_val)
        acc, ndcg, ll = get_uq_metrics(estimations, errors, config['acc_percentile'])
        results.append([acc, ndcg, ll, rmse, 'eue', tag])
    
def evaluate_masks(model, masks, x_val, y_val, y_scaler, tag='standard'):
    predictions = model(x_val).cpu().numpy()
    errors = np.abs(predictions - y_val)
    scaled_errors = y_scaler.inverse_transform(predictions) - y_scaler.inverse_transform(y_val)
    rmse = np.sqrt(np.mean(np.square(scaled_errors)))
    results = []

    for name, mask in masks.items():
        estimator = build_estimator(
            'mcdue_masked', model, nn_runs=config['nn_runs'], dropout_mask=mask,
            dropout_rate=config['dropout_uq'])

        for run in range(config['runs']):
            estimations = estimator.estimate(x_val)
            acc, ndcg, ll = get_uq_metrics(estimations, errors, config['acc_percentile'])
            results.append([acc, ndcg, ll, rmse, name, tag])

            if hasattr(mask, 'reset'):
                mask.reset()

    return results


### Train and generate results

In [None]:
mask_results = [] 
kfold_iterator = multiple_kfold(config['k_folds'], len(x_set), config['max_runs'])

for m, (train_idx, val_idx) in enumerate(kfold_iterator):
    x_train, y_train = x_set[train_idx], y_set[train_idx]
    x_val, y_val = x_set[val_idx], y_set[val_idx]
    print("Model", m+1)

    x_train, x_val, x_scaler = scale(x_train, x_val)
    y_train, y_val, y_scaler = scale(y_train, y_val)
    x_ood_scaled = x_scaler.transform(x_ood)
    y_ood_scaled = y_scaler.transform(y_ood)
    
    optimizer={'type': 'Adadelta', 'weight_decay': config['l2_reg']}
    model = MLP(config['layers'], optimizer=optimizer)
    train_opts = ['patience', 'dropout_rate', 'epochs', 'batch_size', 'validation_step']
    train_config = {k: config[k] for k in config if k in train_opts}
    model.fit((x_train, y_train), (x_val, y_val), **train_config)
    
    results = evaluate_masks(model, masks, x_val, y_val, y_scaler)
    mask_results.extend(results)
    results = evaluate_masks(model, masks, x_ood_scaled, y_ood_scaled, y_scaler, tag='ood')
    mask_results.extend(results)
    
    model = MLPEnsemble(config['layers'], n_models=5, reduction='mean')
    model.fit((x_train, y_train), (x_val, y_val), **train_config)
    eue = build_estimator('eue', model)
    results = evaluate_ensemble(model, x_val, y_val, y_scaler)
    mask_results.extend(results)
    results = evaluate_ensemble(model, x_ood_scaled, y_ood_scaled, y_scaler, tag='ood')
    mask_results.extend(results)
    
    

### Plot the results 

In [None]:
mask_df = pd.DataFrame(mask_results, columns=['Acc', 'NDCG', 'LL', 'RMSE', 'Mask', 'Tag'])

fig = plt.figure(figsize=(16, 16))
plt.suptitle(config['dataset'], fontsize=16)
fig.subplots_adjust(top=0.95)


def boxplot(df, x_label, y_label, i, bottom=0, top=1):
    plt.subplot(4, 2, i)
    plt.xticks(rotation=45)
    if bottom is not None:
        plt.ylim(bottom, top)
    sns.boxplot(data=df, x=x_label, y=y_label, hue='Tag')

boxplot(mask_df, 'Mask', 'Acc', 1, 0, 1)
boxplot(mask_df, 'Mask', 'NDCG', 2, 0, 1)
boxplot(mask_df[mask_df.Tag=='standard'], 'Mask', 'LL', 3, None)
boxplot(mask_df[mask_df.Tag=='ood'], 'Mask', 'LL', 4, None)
boxplot(mask_df, None, 'RMSE', 5, 0, 1.2*mask_df.RMSE.max())


In [None]:
timestamp = datetime.now().strftime("%y-%m-%d__%H_%M")
with open(f"data/results_{config['dataset']}_{timestamp}.pkl", 'wb') as f:
    pickle.dump(mask_results, f)
    