In [1]:
import sys
sys.path.append('..')
%load_ext autoreload
%autoreload 2
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams['axes.facecolor'] = 'white'

In [2]:
import numpy as np
import pandas as pd
import seaborn as sns

from dataloader.boston_housing import BostonHousingData
from experiment_setup import get_model, build_estimator
from analysis.metrics import uq_accuracy, uq_ndcg, uq_nll
from uncertainty_estimator.masks import BasicMask, LHSMask, MirrorMask, DecorrelationMask

In [3]:
dataset = BostonHousingData()
df = dataset.df
x_train, y_train = dataset.dataset('train')
x_val, y_val = dataset.dataset('val')

In [4]:
config = {
    'random_seed': 43,
    'nn_runs': 100,
    'runs': 10,
    'model_runs': 5,
    'verbose': True,
    'use_cache': False,
    'layers': [13, 512, 256, 256, 128, 1],
    'epochs': 30_000,
    'acc_percentile': 0.1,
    'patience': 10,
    'dropout_train': 0.2,
    'dropout_uq': 0.3,
    'batch_size': 100
}

model_paths = [f"model/data/boston_housing_{i}.ckpt" for i in range(config['model_runs'])]

In [None]:
if not config['use_cache']:
    for i in range(config['model_runs']):
        model = get_model(
            config['layers'], model_paths[i], (x_train, y_train), (x_val, y_val),
            retrain=True, verbose=config['verbose'], patience=config['patience'],
            dropout_rate=config['dropout_train'], epochs=config['epochs'], batch_size=config['batch_size'])
        predictions = model(x_val).cpu().numpy()
        plt.figure(figsize=(10, 10))
        plt.plot((0, max(predictions)), (0, max(predictions)))
        plt.scatter(predictions, y_val)
        print("Model", i+1, "loss", model.val_loss)

In [None]:
masks = {
    'vanilla': None,
    'basic_mask': BasicMask(),
    'lhs': LHSMask(config['nn_runs']),
    'lhs_shuffled': LHSMask(config['nn_runs'], shuffle=True),
    'mirror_random': MirrorMask(),
    'decorrelating': DecorrelationMask(),
    'decorr_sc': DecorrelationMask(scaling=True, dry_run=False)
}


In [None]:
mask_results = []

for model_run in range(config['model_runs']):
    print(f"===Estimate on model {model_run+1}====")
    model = get_model(
        config['layers'], model_paths[model_run],
        (x_train, y_train), (x_val, y_val), epochs=config['epochs'])   
    predictions = model(x_val).cpu().numpy()
    errors = np.abs(predictions - y_val)

    for name, mask in masks.items():
        estimator = build_estimator(
            'mcdue_masked', model, nn_runs=config['nn_runs'], dropout_mask=mask,
            dropout_rate=config['dropout_uq'])

        for run in range(config['runs']):
            estimations = estimator.estimate(x_val)
            acc = uq_accuracy(estimations, errors, config['acc_percentile'])
            ndcg = uq_ndcg(errors, estimations)
            nll = uq_nll(errors, estimations)
            mask_results.append([acc, ndcg, nll, name])

            if hasattr(mask, 'reset'):
                mask.reset()
                
    nngp = build_estimator('nngp', model, nn_runs=config['nn_runs'], dropout_rate=config['dropout_uq'])
    for run in range(config['runs']):
        estimations = estimator.estimate(x_val, x_train, y_train)
        acc = uq_accuracy(estimations, errors, config['acc_percentile'])
        ndcg = uq_ndcg(errors, estimations)
        nll = uq_nll(errors, estimations)
        mask_results.append([acc, ndcg, nll, 'nngp'])


In [None]:
mask_df = pd.DataFrame(mask_results, columns = ['acc', 'ndcg', 'nll', 'mask'])

plt.figure(figsize=(16, 6))
def boxplot(df, x_label, y_label, i, bottom=0, top=1):
#     plt.figure()
    plt.subplot(1, 3, i)
    plt.xticks(rotation=45)
    if bottom is not None:
        plt.ylim(bottom, top)
    sns.boxplot(data=mask_df, x=x_label, y=y_label)
    
boxplot(mask_df, 'mask', 'acc', 1, 0, 0.8)
boxplot(mask_df, 'mask', 'ndcg', 2, 0, 0.9)
boxplot(mask_df, 'mask', 'nll', 3, None)