In [None]:
import functools
import importlib
import math

from matplotlib import pyplot as plt
from matplotlib.patches import Ellipse
import numpy as np
import pandas as pd
from scipy.stats import multivariate_normal
from sklearn.metrics.pairwise import euclidean_distances
import torch

from calibration import data
from calibration import dists
from calibration import plot
from calibration import vae
from calibration import utils
from calibration import uci

## Data Set

In [None]:
BINS = 20
SAMPLES = 10000

In [None]:
utils.seed()
refset = data.PITHistDataset(SAMPLES, BINS)
len(refset)

In [None]:
for x, y in refset:
    _, (ax1, ax2) = plt.subplots(nrows=2)
    plot.pit_hist(ax1, x, BINS)
    plot.dists(ax2, *data.y2dists(*y))
    plt.show()
    plt.close()

## Variational Auto-Encoder

In [None]:
checkpoint = torch.load("models/decent-tree-636.pt")
hyperparams = checkpoint["hyperparams"]
model = vae.VAE(inputs=hyperparams["bins"],
                neurons=hyperparams["neurons"],
                latents=hyperparams["latents"],
                beta=hyperparams["beta"])
model.load_state_dict(checkpoint["model_state_dict"])
model

In [None]:
with torch.no_grad():
    mu_z, ln_var_z = model.encoder(refset.X)
    mu_x, ln_var_x = model.decoder(mu_z)

In [None]:
ln_pxz = vae.likelihood(mu_x, ln_var_x, refset.X)

### Reconstructions

In [None]:
for i in ln_pxz.argsort(descending=False):
    x, y = refset[i]
    _, (ax1, ax2) = plt.subplots(nrows=2)
    ax1.set_title(f"{i}: {ln_pxz[i].item()}")
    plot.pit_hist(ax1, x, BINS, label="original")
    plot.pit_hist(ax1, mu_x[i], BINS)
    ax1.legend()
    plot.dists(ax2, *data.y2dists(*y))
    plt.show()
    plt.close()

### Uniform PIT histogram

In [None]:
uniform = torch.full((BINS, ), 1 / BINS)
z_uniform, _ = model.encoder(uniform)
x_uniform, _ = model.decode(z_uniform)
_, ax = plt.subplots()
plot.pit_hist(ax, uniform, BINS, label="original")
plot.pit_hist(ax, x_uniform, BINS)
ax.legend()

### Protein Data Set

In [None]:
(_, proteinset), _ = uci.get_dataset("protein", seed=50, validation=False, preparation=True)
X_protein, y_protein = proteinset.tensors
nn = uci.NeuralNetwork(X_protein.shape[-1], {"loss": "nll", "neurons": 64, "hiddens": 1})
nn.load("nll-1-1-5432")
y_pred_protein = nn.predict(proteinset)
pit_values_protein = uci.normal_pit(*y_pred_protein, y_protein.cpu()).squeeze()
pit_hist_protein = data.pit_hist(pit_values_protein, BINS)
z_protein, _ = model.encode(pit_hist_protein)
mu_protein, _ = model.decode(z_protein)
_, ax = plt.subplots()
plot.pit_hist(ax, pit_hist_protein, BINS, label="original")
plot.pit_hist(ax, mu_protein, BINS)
ax.legend()

### 3-D Projection

In [None]:
%matplotlib widget

In [None]:
fig, ax, ax_pick, ax_press = plot.get_grid(projection="3d")
cb = ax.scatter(mu_z[:, 0], mu_z[:, 1], zs=mu_z[:, 2], c=refset.y[:, 2], picker=True)
plt.colorbar(cb)
plot_fn = functools.partial(plot.pit_hist, n_bins=BINS)
fig.canvas.mpl_connect("pick_event", functools.partial(plot.on_pick, ax=ax_pick, dataset=refset, model=model, plot_fn=plot_fn))

## Nearest Neigbours

In [None]:
utils.seed(71)
testset = data.PITHistDataset(100, BINS)

js = euclidean_distances(testset.X, refset.X).argmin(axis=1)
for j, (x, y) in zip(js, testset):
    mu, sigma = model.encode(x)
    p = multivariate_normal.pdf(mu_z, mu, sigma ** 2)
    _, (ax1, ax2) = plt.subplots(nrows=2)
    plot.pit_hist(ax1, x, BINS)
    plot.pdf(ax2, data.y2dists(*y)[1])
    plot.pdf(ax2, data.y2dists(*refset.y[j])[1], linestyle="dotted", label="original")
    plot.pdf(ax2, data.y2dists(*refset.y[p.argmax()])[1], linestyle="dashed", label="latent")
    ax2.legend()
    plt.show()
    plt.close()