In [None]:
import sys
sys.path.append('..')
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import scipy.linalg as la
from dppy.finite_dpps import FiniteDPP

from dataloader.builder import build_dataset
from model.mlp import MLP
from experiment_setup import get_model, build_estimator
from analysis.metrics import uq_accuracy, uq_ndcg, uq_nll
from uncertainty_estimator.masks import BasicMask, LHSMask, MirrorMask, DecorrelationMask, DPPMask, DPPAdaptiveMask
from uncertainty_estimator.masks import build_masks 
from analysis.metrics import get_uq_metrics

plt.rcParams['figure.facecolor'] = 'white'

In [None]:
config = {
    'random_seed': 43,
    'nn_runs': 10,
    'runs': 3,
    'model_runs': 1,
    'verbose': False,
    'use_cache': False,
    'layers': [13, 128, 128, 64, 1],
#     'layers': [8, 256, 256, 128, 1],
    'epochs': 30_000,
    'acc_percentile': 0.1,
    'patience': 3,
    'dropout_train': 0.2,
    'dropout_uq': 0.5,
    'batch_size': 32,
    'dataset': 'boston_housing',
    'scale': True,
    'l2_reg': 1e-5
}

In [None]:
# Helper functions
def scale(train, val):
    scaler = StandardScaler()
    scaler.fit(train)
    train = scaler.transform(train)
    val = scaler.transform(val)
    return train, val, scaler

#### Try the dpp

In [None]:
dataset = build_dataset('boston_housing')

In [None]:
x_train, y_train = dataset.dataset('train')
x_val, y_val = dataset.dataset('val')
if config['scale']:
    x_train, x_val, _ = scale(x_train, x_val)
    y_train, y_val, y_scaler = scale(y_train, y_val)
else:
    y_scaler = None

In [None]:
config['dropout_uq'] = 0.5
config['patience'] = 5


model = MLP(config['layers'], l2_reg=config['l2_reg'])
train_opts = ['patience', 'dropout_rate', 'epochs', 'batch_size', 'validation_step']
train_config = {k: config[k] for k in config if k in train_opts}
model.fit((x_train, y_train), (x_val, y_val), **train_config)

In [None]:
masks = build_masks(['vanilla', 'mirror_random', 'decorrelating'], nn_runs=config['nn_runs'])
masks['dpp_adaptive'] = DPPAdaptiveMask()

In [None]:
# config['dropout_uq'] = 0.95

In [None]:
def evaluate_masks(model, masks, x_val, y_val, y_scaler):
    predictions = model(x_val).cpu().numpy()
    errors = np.abs(predictions - y_val)
    scaled_errors = y_scaler.inverse_transform(predictions) - y_scaler.inverse_transform(y_val)
    rmse = np.sqrt(np.mean(np.square(scaled_errors)))
    results = []

    for name, mask in masks.items():
        estimator = build_estimator(
            'mcdue_masked', model, nn_runs=config['nn_runs'], dropout_mask=mask,
            dropout_rate=config['dropout_uq'])

        for run in range(config['runs']):
            if name == 'dpp_adaptive':
                print(mask.ranks)
            estimations = estimator.estimate(x_val)
            acc, ndcg, nll = get_uq_metrics(estimations, errors, config['acc_percentile'])
            results.append([acc, ndcg, nll, rmse, name])

            if hasattr(mask, 'reset'):
                mask.reset()

    return results
mask_results = evaluate_masks(model, masks, x_val, y_val, y_scaler)

In [None]:
mask = masks['dpp_adaptive']
estimator = build_estimator(
    'mcdue_masked', model, nn_runs=config['nn_runs'], dropout_mask=mask,
    dropout_rate=config['dropout_uq'])

estimator.estimate(x_val)

In [None]:
# Plot the results
mask_df = pd.DataFrame(mask_results, columns=['acc', 'ndcg', 'nll', 'rmse', 'mask'])

fig = plt.figure(figsize=(16, 16))
plt.suptitle(config['dataset'], fontsize=16)
fig.subplots_adjust(top=0.95)


def boxplot(df, x_label, y_label, i, bottom=0, top=1):
    plt.subplot(2, 2, i)
    plt.xticks(rotation=45)
    if bottom is not None:
        plt.ylim(bottom, top)
    sns.boxplot(data=df, x=x_label, y=y_label)

boxplot(mask_df, 'mask', 'acc', 1, 0, 0.8)
boxplot(mask_df, 'mask', 'ndcg', 2, 0, 0.9)
boxplot(mask_df, 'mask', 'nll', 3, None)
boxplot(mask_df, None, 'rmse', 4, 0, 1.2*mask_df.rmse.max())
