# GLM


- GLM example as simple multivariate posterior estimation problem with available ground-truth (MCMC)
- setup and code from NIPS 2017 paper

In [None]:
import delfi.distribution as dd
import delfi.generator as dg
import delfi.inference as infer
import delfi.utils.io as io
import delfi.summarystats as ds
import lfimodels.glm.utils as utils
import matplotlib.pyplot as plt
import numpy as np

from lfimodels.glm.GLM import GLM
from lfimodels.glm.GLMStats import GLMStats
from delfi.utils.viz import plot_pdf

import timeit

%matplotlib inline

seed = 42


# load setup from NIPS 2017 paper

len_filter = 9 # number of GLM filter parameters (= dim. of parameters)
duration = 100 # simulation length (longer = tighter posteriors)

true_params, labels_params = utils.obs_params(len_filter)
obs = utils.obs_data(true_params, seed=seed, duration=duration)
obs_stats = utils.obs_stats(true_params, seed=seed)

# basic approach to controlling generator seeds
def init_g(seed):
    m = GLM(seed=seed, duration=duration, len_filter=len_filter)
    p = utils.smoothing_prior(n_params=m.n_params, seed=seed)
    s = GLMStats(n_summary=m.n_params)
    return dg.Default(model=m, prior=p, summary=s)

# MCMC comparison (this might take a while the first time !)
rerun = False  # if False, will try loading file from disk
try:
    assert rerun == False, 'rerun requested'
    sam = np.load('sam.npz')['arr_0']
except:
    sam = utils.pg_mcmc(true_params, obs)
    np.savez('sam.npz', sam)
    

# SNPE parameters
    
# training schedule
n_train=3000
n_rounds=5

# fitting setup
minibatch=100
epochs=50

# network setup
n_hiddens=[50]
reg_lambda=0.01

# convenience
pilot_samples=0
svi=False
verbose=True
prior_norm=False


## baseline: SNPE-A

- SNPE-A hard to beat on this problem with Gaussian prior and Gaussian $q^*$.

In [None]:
g = init_g(seed=seed)

res = infer.CDELFI(g, 
                 obs=obs_stats, 
                 n_hiddens=n_hiddens, 
                 seed=seed, 
                 reg_lambda=reg_lambda,
                 pilot_samples=pilot_samples,
                 svi=svi,
                 verbose=verbose,
                 prior_norm=prior_norm)

t = timeit.time.time()

logs_A, tds_A, posteriors_A = res.run(n_train=n_train, 
                    n_rounds=n_rounds, 
                    minibatch=minibatch, 
                    epochs=epochs)

print(timeit.time.time() -  t)


# quick look at problem setup, posterior vs. prior

In [None]:
fig,_ = plot_pdf(posteriors_A[-1], 
         pdf2=g.prior,
         samples=sam, 
         gt=true_params, 
         resolution=100,
         figsize=(16,16));
fig.suptitle('final posterior estimate vs MCMC samples and prior', fontsize=14)
fig.show()


## SNPE-C

- version with rounds: first round is SNPE-A, then 

    - after every round, set $\tilde{p}(\theta) = q^*(\theta|x_0)$
    
    - sample synthetic data set $\mathcal{D} = \{(\theta_n,x_n)\}_{n=1}^N$  for this round, with $(\theta_n, x_n) \sim p(x|\theta)\tilde{p}(\theta)$
    
    - for every gradient step, sample alternatives $\theta'_{nj}, j = 1 \ldots, n_{null}$ from $\theta'_{nj}$ depending on chosen rule: 
        - moo='resample' : $\ \theta'_{nj}\sim Unif[\{\theta_m\}_{n\neq{}m}]$, i.e. $\theta'_{nj}$ are resampled (without replacement) from the $\theta_n\sim \tilde{p}(\theta)$ in the same minibatch. 
        - moo='prior' : $\ \theta'_{nj}\sim p(\theta)$ 
        - moo='p_tilda' : $\ \theta'_{nj}\sim \tilde{p}(\theta)$ with _fixed_ $\tilde{p}(\theta) = q^*(\theta|x_0)$  (default)
        - moo='q_phi_xo': $\ \theta'_{nj}\sim q^*(\theta | x_o)$ with _current_ $q^*$
        - moo='q_phi_x' : $\ \theta'_{nj}\sim q^*(\theta | x_n)$ with _current_ $q^*$
        - note that the two 'q_phi' rules violate the view of drawing $\theta_n, \theta'_{nj}$ iid ! atm it is not perfectly clear what they implement (prbly some form of VI).
    
    - construct uniform discrete proposals over $\{\theta_n\} \cup \{\theta'_{nj}\}_{j=1}^{n_{null}}$ and compute SNPE-C loss and gradients
    
    - do SGD with minibatches over $minibatch$ many $(\theta_n, x_n)$
    
    
- takes longer than SNPE-A because 

    - for every batch, need to sample $minibath * n_{null}$ many $\theta'_{nj}$ from $minibatch$ many different MoGs $q^*(\theta|x_n)$ !
    - for every batch, need to evaluate $minibath * n_{null}$ many terms $q^*(\theta'_{nj} | x_n)$ (versus $minibatch$ many for SNPE-A)


In [None]:
n_null = minibatch-1 # number of alternative parameters theta_ni for each data pair (theta_n, x_n)

g = init_g(seed=seed)
res = infer.SNPEC(g,
                 obs=obs_stats,
                 n_hiddens=n_hiddens,
                 seed=seed,
                 reg_lambda=reg_lambda,
                 pilot_samples=pilot_samples,
                 svi=svi,
                 verbose=verbose,
                 prior_norm=prior_norm)

t = timeit.time.time()

logs_C, tds_C, posteriors_C = res.run(n_train=n_train,
                    n_rounds=n_rounds,
                    minibatch=minibatch,
                    epochs=epochs,
                    n_null=n_null,
                    proposal='discrete',
                    moo='resample'
                    )

print(timeit.time.time() -  t)


# round-by-round comparison of posterior estimates
- on the first round, SNPE-C = SNPE-A ! (want samples from $q^*_{F(\phi,x_0)}( \cdot{} | x_O)$ to not be much worse than samples from prior, which they are for initial $\phi$ !) 

In [None]:
for r in range(len(logs_C)):
    
    posterior_A = posteriors_A[r]
    posterior_C = posteriors_C[r]
    
    fig,_=plot_pdf(posterior_C, 
                   pdf2=posterior_A,
                   samples=sam, 
                   gt=true_params, 
                   resolution=100,
                   figsize=(16,16));
    
    fig.suptitle('SNPE-C (upper triangle) vs SNPE-A (lower triangle), round r = '+str(r+1), fontsize=14)