In [1]:
import sys
sys.path.append('..')
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

from dataloader.boston_housing import BostonHousingData
from dataloader.concrete import ConcreteData
from dataloader.energy_efficiency import EnergyEfficiencyData
from experiment_setup import get_model, build_estimator
from analysis.metrics import uq_accuracy, uq_ndcg, uq_nll
from uncertainty_estimator.masks import BasicMask, LHSMask, MirrorMask, DecorrelationMask

plt.rcParams['figure.facecolor'] = 'white'

In [3]:
config = {
    'random_seed': 43,
    'nn_runs': 100,
    'runs': 3,
    'model_runs': 10,
    'verbose': False,
    'use_cache': False,
    'layers': [13, 512, 512, 256, 1],
    'epochs': 30_000,
    'acc_percentile': 0.1,
    'patience': 3,
    'dropout_train': 0.2,
    'dropout_uq': 0.3,
    'batch_size': 32,
    'dataset': 'boston_housing',
    'scale': True
}

In [4]:
# Load data
datasets = {
    'boston_housing': BostonHousingData,
    'concrete': ConcreteData,
    'energy_efficiency': EnergyEfficiencyData}

dataset = datasets[config['dataset']]()
    
model_paths = ["model/data/{}_{}.ckpt".format(config['dataset'], i) for i in range(config['model_runs'])]  
x_train, y_train = dataset.dataset('train')
x_val, y_val = dataset.dataset('val')

In [5]:
# Normalize dataset
def scale(train, val):
    scaler = StandardScaler()
    scaler.fit(train)
    train = scaler.transform(train)
    val = scaler.transform(val)
    return train, val, scaler

if config['scale']:
    x_train, x_val, _ = scale(x_train, x_val)
    y_train, y_val, y_scaler = scale(y_train, y_val)
else:
    y_scaler = None

In [None]:
# Train models
def plot_evaluations(model, x, y, x_, y_):
    predictions = model(x).cpu().numpy()
    plt.figure(figsize=(10, 10))
    plt.plot((min(y), max(y)), (min(y), max(y)))
    plt.scatter(model(x).cpu().numpy(), y)
    plt.scatter(model(x_).cpu().numpy(), y_)


for i in range(config['model_runs']):
    model = get_model(
        config['layers'], model_paths[i], (x_train, y_train), (x_val, y_val),
        retrain=True, verbose=config['verbose'], patience=config['patience'],
        dropout_rate=config['dropout_train'], epochs=config['epochs'], batch_size=config['batch_size'])
    plot_evaluations(model, x_val, y_val, x_train, y_train)
    print("Model", i+1, "loss", model.evaluate((x_val, y_val), y_scaler))

In [None]:
masks = {
    'vanilla': None,
    'basic_mask': BasicMask(),
    'lhs': LHSMask(config['nn_runs']),
    'lhs_shuffled': LHSMask(config['nn_runs'], shuffle=True),
    'mirror_random': MirrorMask(),
    'decorrelating': DecorrelationMask(),
    'decorr_sc': DecorrelationMask(scaling=True, dry_run=False)
}


In [None]:
# Evaluate different masks

def get_metrics(estimations, errors, acc_percentile=0.1):
    acc = uq_accuracy(estimations, errors, acc_percentile)
    ndcg = uq_ndcg(errors, estimations)
    nll = uq_nll(errors, estimations)
    return acc, ndcg, nll

def evaluate_masks(model, masks, x_val, y_val, y_scaler=None):
    predictions = model(x_val).cpu().numpy()
    errors = np.abs(predictions - y_val)
    results = []

    for name, mask in masks.items():
        estimator = build_estimator(
            'mcdue_masked', model, nn_runs=config['nn_runs'], dropout_mask=mask,
            dropout_rate=config['dropout_uq'])

        for run in range(config['runs']):
            estimations = estimator.estimate(x_val)
            acc, ndcg, nll = get_metrics(estimations, errors, config['acc_percentile'])
            results.append([acc, ndcg, nll, name])

            if hasattr(mask, 'reset'):
                mask.reset()
                
    return results

mask_results = []
for model_run in range(config['model_runs']):
    print(f"===Estimate on model {model_run+1}====")
    model = get_model(config['layers'], model_paths[model_run]) 
    results = evaluate_masks(model, masks, x_val, y_val)
    mask_results.extend(results)
    


In [None]:
# Plot the results

mask_df = pd.DataFrame(mask_results, columns = ['acc', 'ndcg', 'nll', 'mask'])

plt.figure(figsize=(16, 6))
def boxplot(df, x_label, y_label, i, bottom=0, top=1):
    plt.subplot(1, 3, i)
    plt.xticks(rotation=45)
    if bottom is not None:
        plt.ylim(bottom, top)
    sns.boxplot(data=df, x=x_label, y=y_label)
    
boxplot(mask_df, 'mask', 'acc', 1, 0, 0.8)
boxplot(mask_df, 'mask', 'ndcg', 2, 0, 0.9)
boxplot(mask_df, 'mask', 'nll', 3, None)