In [None]:
import sys
sys.path.append('..')
%load_ext autoreload
%autoreload 2

In [None]:
import pickle

import numpy as np
import matplotlib.pyplot as plt
from dppy.finite_dpps import FiniteDPP

from experiment_setup import get_model, set_random, build_estimator
from uncertainty_estimator.masks import build_masks, DEFAULT_MASKS, BASIC_MASKS
from dataloader.toy import ToyQubicData, ToySinData
from model.mlp import MLP
from model.dense import Dense
from model.trainer import Trainer
import seaborn as sns
import torch

from sklearn.cluster import SpectralCoclustering

# torch.cuda.set_device(1)

In [None]:
plt.rcParams['figure.facecolor'] = 'white'

In [None]:
config = {
    'nn_runs': 50,
    'verbose': False,
    'use_cache': True, 
    'layers': [1, 64, 64, 32, 1],
    'patience': 500,
    'dropout_train': 0.3,
    'dropout_uq': 0.5
}

### Visualizing on toy data

#### Generate dataset


In [None]:
dataset = 'sin'

data_class = ToySinData

x_train, y_train = data_class(use_cache=config['use_cache']).dataset('train')
x_val, y_val = data_class(use_cache=config['use_cache']).dataset('val')
x_true, y_true = data_class().dataset('ground_truth')
    
plt.plot(x_true, y_true)
plt.scatter(x_train, y_train, color='red')
plt.scatter(x_val, y_val, color='green')


#### Train model


In [None]:
model = Dense(config['layers']).float()

trainer = Trainer(
    model, batch_size=15, dropout_train=config['dropout_train'],
    loss=torch.nn.functional.mse_loss, regression=True)
trainer.fit(
    (x_train, y_train), (x_val, y_val), patience=config['patience'],
     verbose=False, epochs=10000)

x_ = np.concatenate((x_true, x_train))
y_ = trainer.predict(x_)
plt.figure(figsize=(22, 12))
plt.plot(x_true, y_true, alpha=0.5)
plt.scatter(x_train, y_train, color='red')
plt.scatter(x_, y_, color='green', marker='+')


In [None]:
masks = build_masks(['l_dpp_htnorm'])

In [None]:
y_pred = trainer.predict(x_true)

In [None]:
plt.figure(figsize=(16, 30))

def make_uq_graph(name, estimations):
    plt.title(name)
    plt.plot(x_true, y_true, alpha=0.5)
    plt.scatter(x_true, y_pred, color='green', marker='+')
    plt.fill_between(np.ravel(x_true), np.ravel(y_pred)-estimations, np.ravel(y_pred)+estimations, alpha=0.3, color='green')
    plt.scatter(x_train, y_train, color='red')

dropout_runs = {}

for i, (name, mask) in enumerate(masks.items()):
    if hasattr(mask, 'reset'):
        mask.reset()
    trainer.train()
    estimator = build_estimator(
        'mcdue_masked', trainer, keep_runs=True, nn_runs=config['nn_runs'],
        dropout_mask=mask, dropout_rate=config['dropout_uq'])
    
    estimations = estimator.estimate(x_true)
    dropout_runs[name] = estimator.last_mcd_runs()
    plt.subplot(6, 2, i+1)
    make_uq_graph(name, estimations)



### Dropout predictions


In [None]:
def plot_dropout_runs(name, predictions, n=50):
    plt.figure(figsize=(20, 10))
    plt.title(name)
    plt.plot(x_true, y_true)
    plt.scatter(x_train, y_train, color='red')
    for prediction_line in predictions[:n]:
        plt.plot(x_true, prediction_line, alpha=0.5)

In [None]:
for name, runs in dropout_runs.items():
    plot_dropout_runs(name, runs.T, 200)


In [None]:
dpp = masks[list(masks.keys())[-1]]

In [None]:
samples = dpp.dpps[0].list_of_samples
lengths = [len(sample) for sample in samples]
print(sum(lengths)/len(lengths)/config['layers'][1])
samples



### Drawing correlations

In [None]:
corr = dpp.layer_correlations[0]

In [None]:
plt.figure(figsize=(10, 8))
sns.heatmap(corr)

In [None]:
for n in range(30, 30):
    plt.figure(figsize=(14, 12))
    model = SpectralCoclustering(n_clusters=n, random_state=0)
    model.fit(corr)
    fit_data = corr[np.argsort(model.row_labels_)]
    fit_data = fit_data[:, np.argsort(model.column_labels_)]
    sns.heatmap(fit_data)


### Check normalization
You need to re-imlement layer_runs to use it
(Just save values on middle layer in attribute dictionary layer_runs)

In [None]:
def get_norm(corr):
    E = np.eye(len(corr))
    L = corr 
    K = np.dot(L, np.linalg.inv(L + E))
    return np.reciprocal(np.diag(K))

def get_averages(samples, values, norm=None):
    avgs = []
    for sample in samples:
        sample_mask = np.zeros_like(values)
        if norm is None:
            sample_mask[sample] = len(values) / len(sample)
        else:
            sample_mask[sample] = norm[sample] 
        sample_values = values * sample_mask
        avgs.append(np.average(sample_values))
    return avgs


In [None]:
corr = dpp.layer_correlations[0]
layer_runs = mask.layer_runs[0]
true_avgs, naive_avgs, ht_avgs = [], [], []
naive_stds, ht_stds = [], []

norm = mask.norm[0].detach().cpu().numpy()

for i in range(75):
    print(i, end=' ')
    values = layer_runs[0][i]

    dpp_1 = FiniteDPP('likelihood', **{'L': corr})
    [dpp_1.sample_exact() for _ in range(20)]
    samples = dpp_1.list_of_samples

    true_avgs.append(np.average(values))
    
    naive_avgs.append(np.average(get_averages(samples, values)))
    naive_stds.append(np.std(get_averages(samples, values)))
    
    ht_avgs.append(np.average(get_averages(samples, values, norm)))
    ht_stds.append(np.std(get_averages(samples, values, norm)))


In [None]:
import pandas as pd
df = pd.DataFrame({
    'Naive': np.array(naive_avgs) - np.array(true_avgs),
    'HT': np.array(ht_avgs) - np.array(true_avgs)})
plt.figure(figsize=(12, 8))
plt.title("Deviation of average for different normalizations, toy sinus")
sns.barplot(data=df)


In [None]:
df = pd.DataFrame({
    'Naive': np.array(naive_stds),
    'HT': np.array(ht_stds)})
plt.figure(figsize=(12, 8))
plt.title("Deviation std of average for different normalizations, toy sinus")
sns.barplot(data=df)





