In [None]:
%%capture
import delfi.distribution as dd
import delfi.generator as dg
import delfi.inference as infer
import delfi.summarystats as ds
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np

from lfimodels.ricker.Ricker import Ricker
from lfimodels.ricker.RickerStats import RickerStats

# LFI for Ricker model

http://www.maths.lth.se/matstat/staff/umberto/SyntheticLikelihoods_MSCproject/wood_2010.pdf

In [None]:
n_params = 3

# problem setup
T, burn_in = 50, 100
thetao = np.array([3.8, 0.3, 10.])
xo = {'data' : np.array([
    0,   0,   4,  81,   0,  27,  52,  19, 134,   0,
    0,  27,  84,   1, 166,   0,   1,   0,  37,  65,
    4,  50,   4, 121,   0,   0,  38,  36,  36,  55,
   19, 155,   0,   0,   0,  59,  18, 128,   0,   1,
   18, 138,   0,   0,  10, 148,   0,   0,   0,  22], dtype=np.int64) }
s = RickerStats(obs = xo['data'])
obs_stats = s.calc([xo])

# define generator init (new for every seed)
log_sig = False
log_phi = False
lthetao = thetao.copy()
if log_sig:
    lthetao[1] = -np.log(thetao[1])
if log_phi:
    lthetao[2] = -np.log(thetao[2])
    
def init_g(seed, log_sig=log_sig, log_phi=log_phi):
    m = Ricker(dim=n_params, 
               burnIn=burn_in, 
               T=T, 
               log_r0=0, 
               log_sig=log_sig,
               log_phi=log_phi,
               seed=seed)
    if log_sig and log_phi:        
        p = dd.StackedDistribution([
                dd.Uniform(lower=np.array([3.0]), upper=np.array([5.0]), seed=seed),
                dd.Gamma(alpha=1., beta=1., offset = -np.log(0.8), seed=seed+1),
                dd.Gamma(alpha=1., beta=1., offset = -np.log(20.), seed=seed+2)])
    elif log_sig:        
        p = dd.StackedDistribution([
                dd.Uniform(lower=np.array([3.0]), upper=np.array([5.0]), seed=seed),
                dd.Gamma(alpha=1., beta=1., offset = -np.log(0.8), seed=seed+1),
                dd.Uniform(lower=np.array([4.0]), upper=np.array([20.0]), seed=seed+2)])
    else:
        p = dd.Uniform(lower=np.array([3.0, 0.0,  4.0]), 
                       upper=np.array([5.0, 0.8, 20.0]),
                       seed=seed)
    return dg.Default(model=m, prior=p, summary=s)

plt.figure(figsize=(16,8))
x_range = np.arange(xo['data'].size).reshape(-1, 1) + 1
plt.plot(x_range, xo['data'], 'bo')
plt.plot(np.hstack([x_range,x_range]).T, np.hstack([np.zeros_like(x_range), xo['data'].reshape(-1,1)]).T, 'b')
plt.title('observed data (not summary stats!)')
plt.xlabel('time bin t')
plt.ylabel('observed count')
plt.axis([0, 51, 0, 180])
plt.show()

In [None]:
from lfimodels.abc_methods.run_abc import run_smc

g = init_g(seed=42, log_sig=False, log_phi=False)

_, pilots=g.gen(10000)
stats_mean, stats_std = pilots.mean(axis=0).reshape(1,13), pilots.std(axis=0).reshape(1,13)

class normed_summary():
    
    def calc(self, y):

        x = g.summary.calc(y)

        return (x-stats_mean)/stats_std

obs_statz =  (obs_stats.flatten() - stats_mean) /  stats_std   
    
seed = 90
all_ps, all_logweights, all_eps, all_nsims = run_smc(model=g.model, prior=g.prior, summary=normed_summary(), obs_stats=obs_statz, 
                                                     n_params=3, seed=seed,
                                                     n_particles=1e3, eps_init=1.0, maxsim=1e8)


In [None]:
#np.save('Ricker_SMCABC_xoGutmann_1e8simulations_results', 
        {'seed' : seed, 
         'all_ps' : all_ps,
         'all_logweights' : all_logweights, 
         'all_eps' : all_eps, 
         'all_nsims' : all_nsims,
         'model' : g.model,
         'prior' : g.prior,
         'summary' : normed_summary(),
         'obs_stats' : obs_statz,
         'n_particles' : 1e3,
         'maxsim' : 1e8,
         'eps_init' : 1.0})


In [None]:
plt.figure(figsize=(16,7))
plt.subplot(1,3,1)
plt.plot(th_post[:,0], iws, 'o')
plt.axis([3., 5., 0., 0.01])
plt.subplot(1,3,2)
plt.plot(th_post[:,1], iws, 'o')
plt.axis([0., 0.8, 0., 0.01])
plt.subplot(1,3,3)
plt.plot(th_post[:,2], iws, 'o')
plt.axis([4., 20., 0., 0.01])
plt.show()

In [None]:
from delfi.utils.viz import plot_pdf
import sklearn.mixture as skm

th_post = all_ps[-1]
iws = np.exp(all_logweights[-1])

km = skm.GMM(n_components=3, n_init=50, covariance_type='full')
km.fit(th_post)
post=dd.MoG(a=km.weights_, 
            ms=[m for m in km.means_], 
            Ss=[np.atleast_2d(km.covars_[i,:,:]) for i in range(km.weights_.size)])
post.ndim=th_post.shape[1]

idx_plot = np.random.choice(th_post.shape[0], np.min((1000,th_post.shape[0])), replace=False)

plt.figure(figsize=(16,6))
    
plt.subplot(1,3,1)
xx = np.linspace(3., 5., 100)
plt.plot(xx, post.eval(xx,ii=[0],log=False), 'k', linewidth=3., color=np.array([255, 150, 80])/256)
plt.hist(th_post[:,0], bins=xx[::4], weights=iws, normed=True, facecolor=np.array([80, 160, 220])/256)
plt.xticks([3., 3.5, 4., 4.5, 5.])
plt.tick_params(top='off', left='off', right='off', labelleft='on', labelbottom='on')
plt.plot([thetao[0],thetao[0]], [0,3], 'g:', color=np.array([0.3, 0.3, 0.3]), linewidth=2.)        
plt.xlabel('log r')
#plt.axis([3, 5, 0, 1.9])

plt.subplot(1,3,2)
xx = np.linspace(0, 0.8, 100)
plt.plot(xx, post.eval(xx,ii=[1],log=False), 'k', linewidth=3., color=np.array([255, 150, 80])/256)
plt.hist(th_post[:,1], bins=xx[::4], weights=iws, normed=True, facecolor=np.array([80, 160, 220])/256)
plt.xticks([0., 0.2, 0.4, 0.6, 0.8])
plt.tick_params(top='off', left='off', right='off', labelleft='on', labelbottom='on')
plt.plot([thetao[1],thetao[1]], [0,3], 'g:', color=np.array([0.3, 0.3, 0.3]), linewidth=2.)        
plt.xlabel('sigma')
#plt.axis([0, 0.8, 0, 2.2])

plt.subplot(1,3,3)
xx = np.linspace(4., 20., 200)
plt.plot(xx, post.eval(xx,ii=[2],log=False), 'k', linewidth=3., color=np.array([255, 150, 80])/256)
plt.hist(th_post[:,2], bins=xx[::4], weights=iws, normed=True, facecolor=np.array([80, 160, 220])/256)
plt.plot([thetao[2],thetao[2]], [0,.7], 'g:', color=np.array([0.3, 0.3, 0.3]), linewidth=2.)        
#plt.axis([4, 20, 0, 0.35])
plt.xticks([4., 8, 12., 16, 20.])
plt.tick_params(top='off', left='off', right='off', labelleft='on', labelbottom='on')
plt.xlabel('phi')

plt.suptitle('reference fit SMC-ABC (1e3 particles, 1e8 simulations)')

plt.savefig('SMCABC_1e8_samples_1k_particles_MoG.pdf')

plt.show()

In [None]:
from delfi.utils.viz import plot_pdf
import sklearn.mixture as skm

seeds = range(90,190)

n_samples = 1e6
thinning  = 1e5

posteriors = []
samples    = []

for i in range(len(seeds)):

    seed = seeds[i]
    
    g = init_g(seed=seed, log_phi=False, log_sig=False)
    th, x = g.gen(n_samples)

    stats_mean, stats_std = x.mean(axis=0), x.std(axis=0)
    x, obz = (x-stats_mean)/stats_std, (obs_stats.flatten()-stats_mean)/stats_std

    dx = x - obz
    dist = np.sum( dx**2, axis=1 )
    idx = np.argsort( dist )[:x.shape[0]//thinning]

    print('epsilon:', dist[idx[-1]])
    
    plt.plot(dist[idx])
    plt.show()

    th_post = th[idx,:]

    """
    post_1comp = dd.Gaussian(m=th_post.mean(axis=0), 
                       S=np.cov(th_post.T));


    km = skm.GMM(n_components=3, n_init=50, covariance_type='full')
    km.fit(th_post)
    post_3comp=dd.MoG(a=km.weights_, 
                ms=[m for m in km.means_], 
                Ss=[np.atleast_2d(km.covars_[i,:,:]) for i in range(km.weights_.size)])
    post_3comp.ndim=post_1comp.ndim
    posteriors.append(post_3comp)

    idx_plot = np.random.choice(th_post.shape[0], np.min((1000,idx.size)), replace=False)
    """

    """
    plot_pdf(post_3comp,
             pdf2=post_1comp, 
             figsize=(16,16),
             lims=[-1,1],
             samples=th_post[idx_plot,:].T,
             ticks=True);
    """
    samples.append(th_post)


In [None]:
#np.save('Ricker_rejABC_xoGutmann_1e8simulations_results', 
        {'seeds' : seeds, 
         'all_samples' : samples,
         'samples' : th_post,
         'stats_mean' : stats_mean, 
         'stats_std' : stats_std, 
         'n_samples' : n_samples,
         'model' : g.model,
         'prior' : g.prior,
         'summary' : normed_summary(),
         'obs_stats' : obs_stats})


In [None]:
from scipy.stats import gaussian_kde
class DKDE():    
    def __init__(self,ndim):
        self.kde = list([None for i in range(ndim)])
        self.ii = []
    def fit(self, y, i):
        self.ii.append(i)
        self.kde[i] = gaussian_kde(y[:,i])        
    def eval(self, xx, ii=None, log=False):
        assert len(ii)<2
        assert ii[0] in self.ii
        assert log==False
        return self.kde[ii[0]](xx)

km = skm.GMM(n_components=3, n_init=50, covariance_type='full')
km.fit(th_post)
post=dd.MoG(a=km.weights_, 
            ms=[m for m in km.means_], 
            Ss=[np.atleast_2d(km.covars_[i,:,:]) for i in range(km.weights_.size)])
post.ndim=th_post.shape[1]

plt.figure(figsize=(16,6))

plt.subplot(1,3,1)
i = 0
post_kde = DKDE(ndim=3)
post_kde.fit(th_post, i)
xx = np.linspace(3., 5., 100)
plt.plot(xx, post.eval(xx,ii=[0],log=False), linewidth=3., color='g')
plt.plot(xx, post_kde.eval(xx,ii=[0],log=False), color=np.array([255, 150, 80])/256, linewidth=3.)
plt.hist(th_post[:,0], bins=xx[::4], weights=iws, normed=True, facecolor=np.array([80, 160, 220])/256)
plt.xticks([3., 3.5, 4., 4.5, 5.])
plt.tick_params(top='off', left='off', right='off', labelleft='on', labelbottom='on')
plt.plot([thetao[0],thetao[0]], [0,3], 'g:', color=np.array([0.3, 0.3, 0.3]), linewidth=2.)        
plt.xlabel('log r')
plt.axis([3, 5, 0, 2.2])
plt.legend(['MoG', 'kde', 'hist'], loc=1)

plt.subplot(1,3,2)
i = 1
post_kde = DKDE(ndim=3)
post_kde.fit(th_post, i)
xx = np.linspace(0.0, 0.8, 100)
plt.plot(xx, post.eval(xx,ii=[1],log=False), linewidth=3., color='g')
plt.plot(xx, post_kde.eval(xx,ii=[1],log=False), color=np.array([255, 150, 80])/256, linewidth=3.)
plt.hist(th_post[:,1], bins=xx[::4], weights=iws, normed=True, facecolor=np.array([80, 160, 220])/256)
plt.xticks([0., 0.2, 0.4, 0.6, 0.8])
plt.tick_params(top='off', left='off', right='off', labelleft='on', labelbottom='on')
plt.plot([thetao[1],thetao[1]], [0,3], 'g:', color=np.array([0.3, 0.3, 0.3]), linewidth=2.)        
plt.xlabel('sigma')
plt.axis([0, 0.8, 0, 2.2])

plt.subplot(1,3,3)
i = 2
post_kde = DKDE(ndim=3)
post_kde.fit(th_post, i)
xx = np.linspace(4., 20., 200)
plt.plot(xx, post.eval(xx,ii=[2],log=False), 'g', linewidth=3.)
plt.plot(xx, post_kde.eval(xx,ii=[2],log=False), color=np.array([255, 150, 80])/256, linewidth=3.)
plt.hist(th_post[:,2], bins=xx[::4], weights=iws, normed=True, facecolor=np.array([80, 160, 220])/256)
plt.plot([thetao[2],thetao[2]], [0,.7], 'g:', color=np.array([0.3, 0.3, 0.3]), linewidth=2.)        
plt.axis([4, 20, 0, 0.6])
plt.xticks([4., 8, 12., 16, 20.])
plt.tick_params(top='off', left='off', right='off', labelleft='on', labelbottom='on')
plt.xlabel('phi')


plt.suptitle('reference fit rej. ABC (1e8 samples, 99.999% rejection rate)')
plt.savefig('rejABC_1e8_samples_1k_accept.pdf')


In [None]:
#np.save('Ricker_rejABC_xoGutmann_1e8simulations_posterior_fits', 
        {'posterior_kde' : post_kde,
         'posterior_mog' : post,
         'samples_post' : th_post,
         'obs_stats' : obs_stats})