In [1]:
import sys
sys.path.append('..')
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

from dataloader.rosen import RosenData
from experiment_setup import get_model, set_random, build_estimator
from analysis.metrics import uq_accuracy, uq_ndcg, uq_nll
from uncertainty_estimator.masks import BasicMask, LHSMask, MirrorMask, DecorrelationMask
from dataloader.toy import ToyQubicData, ToySinData

plt.rcParams['figure.facecolor'] = 'white'

In [3]:
config = {
    'random_seed': 43,
    'n_dim': 10,
    'nn_runs': 100,
    'runs': 2,
    'model_runs': 10,
    'data_size': 2000,
    'data_split': [0.4, 0.6, 0, 0],
    'verbose': False,
    'use_cache': False,
    'layers': [10, 256, 256, 128, 1],
    'epochs': 30_000,
    'acc_percentile': 0.1,
    'patience': 10,
    'dropout_train': 0.2,
    'dropout_uq': 0.5
}

In [4]:
# Get data
rosen = RosenData(
    config['n_dim'], config['data_size'], config['data_split'],
    use_cache=config['use_cache'])

x_train, y_train = rosen.dataset('train')
x_val, y_val = rosen.dataset('train')

# set_random(config['random_seed'])

In [5]:
# Train models
model_paths = [f"model/data/rosen_visual_{i}.ckpt" for i in range(config['model_runs'])]
if not config['use_cache']:
    for i in range(config['model_runs']):
        model = get_model(
            config['layers'], model_paths[i], (x_train, y_train), (x_val, y_val),
            epochs=config['epochs'], retrain=True, verbose=config['verbose'], patience=config['patience'],
            dropout_rate=config['dropout_train'])
        print("Model", i+1, "loss", model.val_loss)


In [6]:
masks = {
    'vanilla': None,
    'basic_mask': BasicMask(),
    'lhs': LHSMask(config['nn_runs']),
    'lhs_shuffled': LHSMask(config['nn_runs'], shuffle=True),
    'mirror_random': MirrorMask(),
    'decorrelating': DecorrelationMask(),
    'decorr_sc': DecorrelationMask(scaling=True, dry_run=False)
}

In [7]:
# Evaluate masks on models
mask_results = []

for model_run in range(config['model_runs']):
    print(f"===Estimate on model {model_run+1}====")
    model = get_model(
        config['layers'], model_paths[model_run],
        (x_train, y_train), (x_val, y_val), epochs=config['epochs'])   
    predictions = model(x_val).cpu().numpy()
    errors = np.abs(predictions - y_val)

    for name, mask in masks.items():
        estimator = build_estimator(
            'mcdue_masked', model, nn_runs=config['nn_runs'], dropout_mask=mask,
            dropout_rate=config['dropout_uq'])

        for run in range(config['runs']):
            estimations = estimator.estimate(x_val)
            acc = uq_accuracy(estimations, errors, config['acc_percentile'])
            ndcg = uq_ndcg(errors, estimations)
            nll = uq_nll(errors, estimations)
            mask_results.append([acc, ndcg, nll, name])

            if hasattr(mask, 'reset'):
                mask.reset()
                
    nngp = build_estimator('nngp', model, nn_runs=config['nn_runs'], dropout_rate=config['dropout_uq'])
    for run in range(config['runs']):
        estimations = estimator.estimate(x_val, x_train, y_train)
        acc = uq_accuracy(estimations, errors, config['acc_percentile'])
        ndcg = uq_ndcg(errors, estimations)
        nll = uq_nll(errors, estimations)
        mask_results.append([acc, ndcg, nll, 'nngp'])

In [8]:
# Plot results
mask_df = pd.DataFrame(mask_results, columns = ['acc', 'ndcg', 'nll', 'mask'])

plt.figure(figsize=(16, 6))
def boxplot(df, x_label, y_label, i, bottom=0, top=1):
#     plt.figure()
    plt.subplot(1, 3, i)
    plt.xticks(rotation=45)
    if bottom is not None:
        plt.ylim(bottom, top)
    sns.boxplot(data=mask_df, x=x_label, y=y_label)
    
boxplot(mask_df, 'mask', 'acc', 1, 0, 0.8)
boxplot(mask_df, 'mask', 'ndcg', 2, 0, 0.9)
boxplot(mask_df, 'mask', 'nll', 3, None)

### Visualizing on toy data

In [9]:
# Generate dataset
# dataset = 'qubic'
dataset = 'sin'

if dataset == 'qubic':
    x_train, y_train = ToyQubicData(use_cache=config['use_cache']).dataset('train')
    x_val, y_val = ToyQubicData(use_cache=config['use_cache']).dataset('val')
    x_true, y_true = ToyQubicData().dataset('ground_truth')
else:
    x_train, y_train = ToySinData(use_cache=config['use_cache']).dataset('train')
    x_val, y_val = ToySinData(use_cache=config['use_cache']).dataset('val')
    x_true, y_true = ToySinData().dataset('ground_truth')
    
plt.plot(x_true, y_true)
plt.scatter(x_train, y_train, color='red')
plt.scatter(x_val, y_val, color='green')

In [11]:
# Train model
layers = config['layers']
layers[0] = 1
model = get_model(
    layers, 'model/data/toy.ckpt',
    (x_train, y_train), (x_train, y_train), retrain=True, l2_reg=1e-5,
    verbose=True, patience=10, validation_step=50, batch_size=5, dropout_rate=config['dropout_train'])

y_pred = model(x_true).cpu().numpy()
plt.figure(figsize=(12, 8))
plt.plot(x_true, y_true, alpha=0.5)
plt.scatter(x_train, y_train, color='red')
plt.scatter(x_true, y_pred, color='green', marker='+')

In [13]:
# Evaluate UQ and plot the results
plt.figure(figsize=(16, 25))

def make_uq_graph(name, estimations):
    plt.title(name)
    plt.plot(x_true, y_true, alpha=0.5)
    plt.scatter(x_true, y_pred, color='green', marker='+')
    plt.fill_between(np.ravel(x_true), np.ravel(y_pred)-estimations, np.ravel(y_pred)+estimations, alpha=0.3, color='green')
    plt.scatter(x_train, y_train, color='red')


for i, (name, mask) in enumerate(masks.items()):
    if hasattr(mask, 'reset'):
        mask.reset()
    estimator = build_estimator(
                'mcdue_masked', model, nn_runs=config['nn_runs'], dropout_mask=mask,
                dropout_rate=config['dropout_uq'])

    estimations = estimator.estimate(x_true)
    plt.subplot(4, 2, i+1)
    make_uq_graph(name, estimations)

    
nngp = build_estimator('nngp', model, nn_runs=config['nn_runs'], dropout_rate=config['dropout_uq'])
estimations = nngp.estimate(x_true, x_train)
plt.subplot(4, 2, len(masks)+1)
make_uq_graph('nngp', estimations)