In [None]:
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import os
import pickle
import scipy
import scipy.integrate as integrate
import time 
from tqdm import tqdm

import sys 
sys.path.append('../../')
from model_comparison.utils import *
from model_comparison.mdns import *
from model_comparison.models import BaseModel

from delfi.distribution.mixture import MoG
%matplotlib inline

In [None]:
mpl_params = {'legend.fontsize': 18,
              'legend.frameon': False,
                      'axes.titlesize': 21,
                      'axes.labelsize': 19,
                      'xtick.labelsize': 15,
                      'ytick.labelsize': 15,
             'figure.figsize' : (18, 5)}

mpl.rcParams.update(mpl_params)

## Bayesian model comparison with Gaussian vs. Laplace model 

The task is to decide whether the observed data comes from Normal or from a Laplace distribution. 

The difficulty in this task comes only from the amount of data available. The priors should be chosen in a way that they do not favor one or the other model

### Visulization 

In [None]:
# thetas to evaluate 
thetas = np.linspace(-10, 10, 1000)

# example background model 
example_m0 = scipy.stats.norm(0, 2)
# signal model 
example_m1 = scipy.stats.laplace(0, 2)
# draw example observed data 
xo = example_m1.rvs(1)

In [None]:
plt.plot(thetas, example_m0.pdf(thetas), label='background')
plt.plot(thetas, example_m1.pdf(thetas), label='signal')
plt.axvline(xo, label='data', color='r')
plt.legend();

## Set up models

In [None]:
class GaussianModel(BaseModel):
    def __init__(self, mu, dim_param=1, sample_size=10, n_workers=1, seed=None):
        super().__init__(dim_param=dim_param, sample_size=sample_size, n_workers=n_workers, seed=seed)
        self.mu = mu
        self.posterior = None

    def gen_single(self, params):
        # in multiprocessing the parameter vector additionally contains a seed
        if self.run_parallel:
            scale, seed = params
            self.rng.seed(int(seed))
        else:
            scale = params
        return self.rng.normal(loc=self.mu, scale=scale, size=self.sample_size)
    
class LaplaceModel(BaseModel): 
    def __init__(self, mu, dim_param=1, sample_size=10, n_workers=1, seed=None):
        super().__init__(dim_param=dim_param, sample_size=sample_size, n_workers=n_workers, seed=seed)
        self.mu = mu
        self.posterior = None

    def gen_single(self, params):
        # in multiprocessing the parameter vector additionally contains a seed
        if self.run_parallel:
            scale, seed = params
            self.rng.seed(int(seed))
        else:
            scale = params
        return self.rng.laplace(loc=self.mu, scale=scale, size=self.sample_size)

In [None]:
sample_size = 100
ntrain = 100000
ntest = 100

# background model prior 
prior_m0 = scipy.stats.invgamma(2)
# signal model prior 
prior_m1 = scipy.stats.invgamma(2)

# models 
m0 = GaussianModel(mu=0, sample_size=sample_size)
m1 = LaplaceModel(mu=.3, sample_size=sample_size)

## Generate parameters from the priors 

In [None]:
n = ntrain + ntest
params_m0 = prior_m0.rvs(size=int(n / 2))
params_m1 = prior_m1.rvs(size=int(n / 2))

## Generate data from models and calculate summary stats, prepare test data 

In [None]:
data_m0 = m0.gen(params_m0)
data_m1 = m1.gen(params_m1)

In [None]:
# shuffle and set up model index target vector 
x_all = np.vstack((data_m0, data_m1))

# define model indices
m_all = np.hstack((np.zeros(data_m0.shape[0]), np.ones(data_m1.shape[0]))).squeeze().astype(int)

# get shuffled indices 
shuffle_indices = np.arange(n)
np.random.shuffle(shuffle_indices)

# shuffle the data 
x_all = x_all[shuffle_indices, ]
m_all = m_all[shuffle_indices].tolist()

x, xtest = x_all[:ntrain, :], x_all[ntrain:, :]
m, mtest = m_all[:ntrain], m_all[ntrain:]

# calculate summary stats
sx = x #np.var(x, axis=1).reshape(ntrain, 1) #calculate_stats_toy_examples(x)
sx_test = xtest #np.var(xtest, axis=1).reshape(ntest, 1) # calculate_stats_toy_examples(xtest)
# use training norm to normalize test data 
sx_zt, training_norm = normalize(sx)
sx_test_zt, training_norm = normalize(sx_test, training_norm)

## Set up the NN and train it 

In [None]:
model = ClassificationMDN(n_input=sample_size, n_hidden_units=200, n_hidden_layers=1)
optimizer = torch.optim.Adam(model.parameters(), lr=0.05)
trainer = Trainer(model, optimizer, verbose=True, classification=True)

n_epochs = 10
n_minibatch = int(ntrain / 100)

# train with training data
loss_trace = trainer.train(sx_zt, m, n_epochs=n_epochs, n_minibatch=n_minibatch)
plt.plot(loss_trace)
plt.ylabel('loss')
plt.xlabel('iterations');

### Visualize the NN input output function

In [None]:
# normalize 
stats_space = np.linspace(0, 10, 100)
sx_vis, training_norm = normalize(stats_space, training_norm)
# predict probs of entire range
probs_vis = model.predict(sx_vis.reshape((stats_space.size, 1)))

In [None]:
fig, ax = plt.subplots(figsize=(18, 5))
ax.plot(stats_space, prior_m0.pdf(stats_space), label='m0 prior')
ax.plot(stats_space, prior_m1.pdf(stats_space), label='m1 prior')
ax.plot(stats_space, probs_vis[:, 0], label='NN output: $p(m0 \;|\; x))$')
ax.plot(stats_space, probs_vis[:, 1], label='NN output: $p(m1 \;|\; x))$')
ax.legend();

## Approximate the exact posterior

In [None]:
def marli_lapace(scale, data, model_mean, prior): 
    likelihood = np.prod(scipy.stats.laplace.pdf(data, loc=model_mean, scale=scale))
    return likelihood * prior.pdf(scale)

def marli_normal(scale, data, model_mean, prior): 
    likelihood = np.prod(scipy.stats.norm.pdf(data, loc=model_mean, scale=scale))
    return likelihood * prior.pdf(scale)

# def marginal_likelihood_integrant(mu, xo, model_distr, model_std, prior_distr): 
#     return np.prod(model_distr.pdf(xo, loc=mu, scale=model_distr.std()) * prior_distr.pdf(mu)

In [None]:
marli0 = np.array([integrate.quad(marli_normal, 0, 4, 
                                  args=(xo, m0.mu, prior_m0))[0] for xo in xtest])
marli1 = np.array([integrate.quad(marli_lapace, 0, 4, 
                                  args=(xo, m1.mu, prior_m1))[0] for xo in xtest])

In [None]:
plt.plot(sx_test[:, 0], np.log(marli0), 'o')
plt.plot(sx_test[:, 0], np.log(marli1), 'o')

In [None]:
pphat = model.predict(sx_test_zt)[:, 1]

In [None]:
plt.plot(pp0, pphat, 'o')

In [None]:
plt.hist(pp0);
plt.hist(pphat);

In [None]:
np.abs(np.array(mtest) - pphat).mean()

In [None]:
plt.plot(mtest, '-o')
plt.plot(pphat, '-o')
plt.plot(pp0, '-o')
# plt.plot(np.linspace(0, 1, 100), np.linspace(0, 1, 100))

In [None]:
pp0 = marli0 / (marli0 + marli1)
pp1 = marli1 / (marli0 + marli1)
mask = np.array(mtest)==1
# plt.scatter(x=sx_test[mask, 0], y=np.log(sx_test[mask, 1]), c=pp0[mask], marker='o')
plt.scatter(x=sx_test[:, 0], y=(sx_test[:, 1]), c=pp1, marker='o')
plt.colorbar()

In [None]:
plt.plot(xos, marli0[:, 0])
plt.plot(xos, marli1[:, 0])
plt.plot(xos, p_m1_given_xo)
plt.plot(xos, 1 - p_m1_given_xo)
# plt.plot(thetas, probs_vis[:, 1], label='NN output: $p(m1 \;|\; x))$')
# plt.plot(thetas, probs_vis[:, 0], label='NN output: $p(m1 \;|\; x))$')
