In [1]:
import sys
sys.path.append('..')
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

from dataloader.boston_housing import BostonHousingData
from dataloader.concrete import ConcreteData
from dataloader.energy_efficiency import EnergyEfficiencyData
from experiment_setup import get_model, build_estimator
from analysis.metrics import uq_accuracy, uq_ndcg, uq_nll
from uncertainty_estimator.masks import BasicMask, LHSMask, MirrorMask, DecorrelationMask, DPPMask

plt.rcParams['figure.facecolor'] = 'white'

In [3]:
config = {
    'random_seed': 43,
    'nn_runs': 10,
    'runs': 1,
    'model_runs': 1,
    'verbose': False,
    'use_cache': False,
    'layers': [13, 256, 128, 64, 1],
    'epochs': 30_000,
    'acc_percentile': 0.1,
    'patience': 3,
    'dropout_train': 0.2,
    'dropout_uq': 0.3,
    'batch_size': 32,
    'dataset': 'boston_housing',
    'scale': True
}

In [4]:
# Load data
datasets = {
    'boston_housing': BostonHousingData,
    'concrete': ConcreteData,
    'energy_efficiency': EnergyEfficiencyData}

dataset = datasets[config['dataset']]()
    
model_paths = ["model/data/{}_{}.ckpt".format(config['dataset'], i) for i in range(config['model_runs'])]  
x_train, y_train = dataset.dataset('train')
x_val, y_val = dataset.dataset('val')

In [5]:
# Normalize dataset
def scale(train, val):
    scaler = StandardScaler()
    scaler.fit(train)
    train = scaler.transform(train)
    val = scaler.transform(val)
    return train, val, scaler

if config['scale']:
    x_train, x_val, _ = scale(x_train, x_val)
    y_train, y_val, y_scaler = scale(y_train, y_val)
else:
    y_scaler = None

#### Try the dpp

In [6]:
from numpy.random import rand, randn
from scipy.linalg import qr

from dppy.finite_dpps import FiniteDPP

In [7]:
model = get_model(config['layers'], model_paths[0])
# model = get_model(
#     config['layers'], model_paths[0], (x_train, y_train), (x_val, y_val),
#     retrain=True, verbose=config['verbose'], patience=config['patience'],
#     dropout_rate=config['dropout_train'], epochs=config['epochs'], batch_size=config['batch_size'])

In [14]:
config['nn_runs'] = 1

mask = DPPMask()

estimator = build_estimator(
            'mcdue_masked', model, nn_runs=config['nn_runs'], dropout_mask=mask,
            dropout_rate=config['dropout_uq'])

In [15]:
estimator.estimate(x_val)

In [16]:
K = mask.corrs

In [18]:
plt.figure(figsize=(7, 6))
sns.heatmap(K)

In [22]:
DPP = FiniteDPP('correlation', **{'K': K})
for _ in range(100):
    result = DPP.sample_exact_k_dpp(16)
print(DPP.list_of_samples)