In [None]:
%matplotlib inline
import numpy as np
from matplotlib import pyplot as plt
import timeit

import delfi.distribution as dd
from delfi.inference import SNPEC as APT

from util import save_results, load_results
from util import init_g_gauss as init_g
from util import load_setup_gauss as load_setup
from util import load_gt_gauss as load_gt
from util import draw_sample_uniform_prior_33 as rej_sampler

from util import NoiseStats

from snl.util.plot import plot_hist_marginals

seed = 52
noise_dim = 52
n_noise_comps = 20

# simulation setup
setup_dict = load_setup()

pars_true, obs_stats = load_gt(generator=init_g(seed=seed))
print('pars_true : ', pars_true)
print('obs_stats : ', obs_stats)


# define source for additive uninformative noise

- mixture of n_noise_comps multivarate-t distributions in noise_dim dimensions

In [None]:
rng = np.random
rng.seed(seed)

# means of mixture components
noise_means_prior = dd.Gaussian(m = np.zeros(noise_dim), S=np.eye(noise_dim), seed=seed+1)
noise_ms = [noise_means_prior.gen(1).reshape(-1) for i in range(n_noise_comps)]

# 'cov' matrices of mixture componens
cholesky_factors = [np.tril(rng.normal(size=(noise_dim, noise_dim))) + np.diag(np.exp(rng.normal(size=noise_dim)))
                    + 3. * np.eye(noise_dim) for i in range(n_noise_comps)]
noise_Ss = [3 * np.dot(ch, ch.T) / noise_dim for ch in cholesky_factors]
noise_ms = [15 * rng.normal(size=noise_dim) for i in range(n_noise_comps)]

# degrees of freedom for mixture components
noise_dofs = [2 for i in range(n_noise_comps)]

# mixture of t with uniform mixture coefficients
noise_distribution = dd.MoT(a=np.ones(n_noise_comps) / n_noise_comps, ms=noise_ms, Ss=noise_Ss, dofs=noise_dofs, seed=seed)
noise_distribution.ndim = noise_dim

# generator
g = init_g(seed=seed)
g.summary = NoiseStats(noise_source=noise_distribution, n_signal=8, seed=seed)
g.model.dim_param = 5

# compute augmented obersvation xo by stacking actual xo with added noise. Don't forget to also permute !
obs_stats = np.hstack((obs_stats, noise_distribution.gen(1).reshape(1,-1)))[0,g.summary.idx]
assert obs_stats.size ==  g.summary.n_summary

print('permutation indices', g.summary.idx)

fig = plot_hist_marginals(noise_distribution.gen(1000))
fig.set_figwidth(16)
fig.set_figheight(16)
fig.show()

# fit APT

In [None]:
if setup_dict['train_on_all']:
    epochs=[setup_dict['epochs']//(r+1) for r in range(setup_dict['n_rounds'])]
else:
    epochs=setup_dict['epochs']

# control MAF seed
rng = np.random
rng.seed(seed)

res = APT(g,
          obs=obs_stats,
          n_hiddens=setup_dict['n_hiddens'],
          seed=seed,
          reg_lambda=setup_dict['reg_lambda'],
          pilot_samples=setup_dict['pilot_samples'],
          svi=setup_dict['svi'],
          n_mades=setup_dict['n_mades'],
          act_fun=setup_dict['act_fun'],
          mode=setup_dict['mode'],
          rng=rng,
          batch_norm=setup_dict['batch_norm'],
          verbose=setup_dict['verbose'],
          #upper=setup_dict['upper'], # box-constraints for support
          #lower=setup_dict['lower'], # of MAF outputs (maf.y)
          prior_norm=setup_dict['prior_norm'])

print('conditional density estimator', res.network)

# train
t = timeit.time.time()
print('fitting model with SNPC-C')
logs, tds, posteriors = res.run(
                    n_train=setup_dict['n_train'],
                    proposal=setup_dict['proposal'],
                    moo=setup_dict['moo'],
                    n_null = setup_dict['n_null'],
                    n_rounds=setup_dict['n_rounds'],
                    train_on_all=setup_dict['train_on_all'],
                    minibatch=setup_dict['minibatch'],
                    epochs=epochs)
print('fitting time : ', timeit.time.time() - t)

In [None]:
for r in np.arange(0, len(logs), 4):
    
    posterior = posteriors[r]
    samples = rej_sampler(posterior, 1000) # fast parallel rejection sampler
    
    fig = plot_hist_marginals(
                   samples,
                   gt=pars_true, 
                   lims=[-3,3])
    
    fig.set_figheight(12)
    fig.set_figwidth(12)
    fig.suptitle('APT posterior estimates, round r = '+str(r+1), fontsize=14)
    fig.show()
    print('negative log-probability of ground-truth pars \n', -posterior.eval(pars_true, log=True))

# fit SNL

In [None]:
import snl.simulators.gaussian as sim_gauss

# replciate generator g with SNL package

model = sim_gauss.Model() # APT model and prior actually are 
prior = sim_gauss.Prior() # just thin wrappers around these objects!

stats = NoiseStats(noise_source=noise_distribution, n_signal=8, seed=seed)
stats.idx = g.summary.idx # ensure that permutations match with APT and the permuted xo !

print('permutation indices', stats.idx)

sim_model = lambda ps, rng: stats.calc(model.sim(ps, rng=rng)) 

In [None]:
from snl.inference.nde import SequentialNeuralLikelihood
from snl.ml.models.mafs import ConditionalMaskedAutoregressiveFlow
import sys

infer = SequentialNeuralLikelihood(prior=prior, sim_model=sim_model)

# control MAF seed
rng = np.random
rng.seed(seed)

maf = ConditionalMaskedAutoregressiveFlow(n_inputs=prior.n_dims, 
                                          n_outputs=obs_stats.size, 
                                          n_hiddens=setup_dict['n_hiddens'], 
                                          act_fun=setup_dict['act_fun'], 
                                          n_mades=setup_dict['n_mades'], 
                                          batch_norm=False,           
                                          mode=setup_dict['mode'],   
                                          rng=rng)

# control sampler seed
rng = np.random
rng.seed(seed+1)

t = timeit.time.time()

learned_model = infer.learn_likelihood(obs_xs=obs_stats.flatten(), 
                       model=maf, 
                       n_samples=setup_dict['n_train'], 
                       n_rounds=setup_dict['n_rounds'],
                       train_on_all=setup_dict['train_on_all'],
                       logger=sys.stdout, 
                       rng=rng)

print(timeit.time.time() -  t)

In [None]:
# final posterior estimate for SNL requires MCMC sampling
import snl.inference.mcmc as mcmc

thin = 10 # default value

log_posterior = lambda t: learned_model.eval([t, obs_stats.flatten()]) + prior.eval(t)
sampler = mcmc.SliceSampler(prior.gen(), log_posterior, thin=thin)

sampler.gen(max(200 // thin, 1), rng=rng)  # burn in
samples = sampler.gen(1000)

fig = plot_hist_marginals(data=samples, lims=[-3,3])
fig.set_figwidth(16)
fig.set_figheight(16)
fig.show()

In [None]:
# track posteriors across previous rounds via generated proposal paramters (first round is prior)
for r in range(0, len(infer.all_ps), 4):
    fig=plot_hist_marginals(infer.all_ps[r], lims=[-3,3])
    fig.set_figwidth(16)
    fig.set_figheight(16)
    fig.suptitle('SNL posterior estimate, round r' + str(r))
    plt.show()