In [None]:
%%capture
import delfi.distribution as dd
import delfi.generator as dg
import delfi.inference as infer
import delfi.summarystats as ds
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np

from lfimodels.ricker.Ricker import Ricker
from lfimodels.ricker.RickerStats import RickerStats

# LFI for Ricker model

http://www.maths.lth.se/matstat/staff/umberto/SyntheticLikelihoods_MSCproject/wood_2010.pdf

In [None]:
n_params = 3

# problem setup
T, burn_in = 50, 100
thetao = np.array([3.8, 0.3, 10.])
xo = {'data' : np.array([
    0,   0,   4,  81,   0,  27,  52,  19, 134,   0,
    0,  27,  84,   1, 166,   0,   1,   0,  37,  65,
    4,  50,   4, 121,   0,   0,  38,  36,  36,  55,
   19, 155,   0,   0,   0,  59,  18, 128,   0,   1,
   18, 138,   0,   0,  10, 148,   0,   0,   0,  22], dtype=np.int64) }
s = RickerStats(obs = xo['data'])
obs_stats = s.calc([xo])

# define generator init (new for every seed)
def init_g(seed):
    m = Ricker(dim=n_params, 
               burnIn=burn_in, 
               T=T, 
               log_r0=0, 
               seed=seed)
    p = dd.Uniform(lower=np.array([3.0, 0.0,  4.0]), 
                   upper=np.array([5.0, 0.8, 20.0]),
                   seed=seed)
    return dg.Default(model=m, prior=p, summary=s)

plt.figure(figsize=(16,8))
x_range = np.arange(xo['data'].size).reshape(-1, 1) + 1
plt.plot(x_range, xo['data'], 'bo')
plt.plot(np.hstack([x_range,x_range]).T, np.hstack([np.zeros_like(x_range), xo['data'].reshape(-1,1)]).T, 'b')
plt.title('observed data (not summary stats!)')
plt.xlabel('time bin t')
plt.ylabel('observed count')
plt.axis([0, 51, 0, 180])
plt.show()

In [None]:
from delfi.utils.viz import plot_pdf

rounds = 2
n_train = [150, 350]
algo = 'kSNPE'

seeds = range(90, 93)

for seed in seeds:
    
    trn_seed = seed
    kwargs = {'generator': init_g(seed), 
              'reg_lambda': 0.01,
              'n_components': 2, 
              'n_hiddens': [20], 
              'verbose' : True,
              'obs': obs_stats.copy(),
              'seed': trn_seed}

    # setup of learning schedule
    train = []
    for r in range(rounds):
        train.append(n_train[0])
    train[-1] = n_train[-1]
    round_cl=999
    minibatch=50
    n_gradients = 100000
    epochs = [n_gradients // (n_train//minibatch) for n_train in train]
    print('n_train :', train)
    print('epochs :', epochs)

    
    
    if algo == 'CDELFI':
        inf = infer.CDELFI(**kwargs)
        log, train_data, posteriors = inf.run(n_train=train,
                                              minibatch=minibatch,         
                                              n_rounds=len(train),
                                              epochs=epochs)
    elif algo == 'SNPE':
        inf = infer.SNPE(**kwargs,convert_to_T=3)
        log, train_data, posteriors = inf.run(n_train=train,
                                              minibatch=minibatch,                                              
                                              n_rounds=len(train),
                                              epochs=epochs, 
                                              round_cl=round_cl)        
    elif algo == 'kSNPE':
        inf = infer.SNPE(**kwargs,convert_to_T=3)
        log, train_data, posteriors = inf.run(n_train=train,
                                              minibatch=minibatch,                                              
                                              n_rounds=len(train),
                                              epochs=epochs, 
                                              kernel_loss='x_kl', 
                                              round_cl=round_cl)      

    for r in range(rounds):
        w = train_data[r][2]
        w /= w.sum()
        
        print('ESS', 1/np.sum(w**2))
        
        plot_pdf(posteriors[r],
                 pdf2=posteriors[0],
                 #samples=train_data[1][0][::1,:].T, 
                 gt=thetao, 
                 lims=np.array([
                    [3, 5],
                    [0, 0.8],
                    [4,20]            
                ]),
                 resolution=100,
                 ticks=True,
                 figsize=(16,16));


# Testbed

In [None]:
from ctypes import CDLL, c_double, c_int, RTLD_GLOBAL

path = '/home/marcel/Desktop/Projects/Biophysicality/code/lfi_models/lfimodels/ricker/'
#load the shared object file
ricker = CDLL(path + 'ricker.so')
n      = (c_double * T)(*np.zeros(T))

ricker.ricker(
    n,                                           # *n   
    (c_double * thetao.size)(*thetao.flatten()), # *theta
    (c_double * (T+burn_in))(*e),                # *e
    (c_int * 1)(burn_in),                        # *burn_in
    (c_int * 1)(T),                              # *n_t
    (c_int * 1)(1),                              # *n_reps
    (c_double * 1)(log_r0))                      # *n0
n = np.array(n)

n

In [None]:

acf_c  = CDLL(path + 'acf.so')
mat_c  = CDLL(path + 'mat.so', mode=RTLD_GLOBAL)
nlar_c = CDLL(path + 'nlar.so', mode=RTLD_GLOBAL)

def afc(x, max_lag=10):
    """
    `x' is a matrix containing replicate simulations in its columns.
    sl.acf turns these into acf's    
    """
    assert x.shape[1] == 1 # never tested otherwise. Do not remove without testing

    vafc = np.zeros((max_lag+1, x.shape[1]))
    vafc = (c_double * vafc.size)(*vafc.flatten())
    NAcode = -1e70
    
    #oo <- .C("slacf",acf=as.double(acf),x=as.double(x),as.integer(nrow(x)),as.integer(ncol(x)),
    #        as.integer(max.lag),as.double(NAcode),correlation=as.integer(0),PACKAGE="sl")
    afc = acf_c.slacf(
        vafc,                                               # *afc
        (c_double * (x.shape[0]*x.shape[1]))(*x.flatten()), # *x
        (c_int    * 1)(x.shape[0]),                         # *n
        (c_int    * 1)(x.shape[1]),                         # *n_reps
        (c_int    * 1)(max_lag),                            # *max_lag,
        (c_double * 1)(NAcode),                             # *NAcode
        (c_int    * 1)(0))                                  # correlation
    
    return np.array(vafc)

def nlar(x, lag, power):
    """ 
    relatively efficient polynomial autoregression for multiple reps.
    each column of `x' is a replicate. 
    `lag[i]' is the lag for term i on rhs of autoregression
    `power[i]' is the power for term i on rhs of autoregression 
    """    
    assert x.shape[1] == 1 # never tested otherwise. Do not remove without testing
    
    beta = np.zeros((len(lag), x.shape[1]))
    beta = (c_double * beta.size)(*beta.flatten())
    NAcode = -1e70
    
    #oo <- .C("slnlar",beta = as.double(beta), x = as.double(x),
    #        n=as.integer(nrow(x)),n.reps=as.integer(ncol(x)),n.terms=as.integer(length(lag)),
    #        as.integer(lag),as.integer(power),as.double(NAcode),PACKAGE="sl")    
    nlar_c.slnlar(
        beta,                                               # *beta
        (c_double * (x.shape[0]*x.shape[1]))(*x.flatten()), # *x
        (c_int    * 1)(x.shape[0]),                         # *n
        (c_int    * 1)(x.shape[1]),                         # *n_reps
        (c_int    * 1)(len(lag)),                           # *n_terms
        (c_int    * len(lag))(*lag),                        # *lag
        (c_int    * len(power))(*power),                    # *power
        (c_double * 1)(NAcode)                              # *NAcode
        )
    
    return np.array(beta)

def order_dist(x, z, n_p=3, diff=1):
    """
    Routine to obtain coefficients summarizing distribution of (differenced) columns
    of x, by regression of sorted differenced columns of x on sorted differenced z's. 
    regression is with order `np' polynomial (no intercept as all centred). `diff'
    is order of differencing to apply.
    """
    #assert x.shape[1] == 1 # never tested otherwise. Do not remove without testing
    assert z.ndim==1 or z.shape[1]==1
    
    beta = np.zeros((n_p, x.shape[1]))
    beta = (c_double * beta.size)(*beta.flatten())
    
    #oo <- .C("order_reg",beta=as.double(beta), as.double(x),as.double(z),n=as.integer(nrow(x)),
    #        as.integer(ncol(x)),as.integer(np),as.integer(diff),PACKAGE="sl")    
    nlar_c.order_reg(
        beta,                                               # *beta
        (c_double * (x.shape[0]*x.shape[1]))(*x.flatten()), # *x
        (c_double * (z.size))               (*z.flatten()), # *z
        (c_int    * 1)(x.shape[0]),                         # *n
        (c_int    * 1)(x.shape[1]),                         # *n_reps
        (c_int    * 1)(n_p),                                # *n_p
        (c_int    * 1)(diff)                                # *diff
        )
    
    return np.array(beta)
    
def summary_stats(X,xo,max_lag=5, lag=[1,1], power=[1,2], n_p=3, diff=1):
    
    vacf = afc(X,max_lag=max_lag)

    b0 = nlar(X**.3,lag=lag,power=power)

    b1 = order_dist(X,xo,n_p=n_p,diff=diff)
    
    return np.concatenate(( vacf, b0, b1, np.mean(x, axis=1), np.mean(x==0, axis=1) ))
    

In [None]:
x = m.gen_single(thetao)['data']

In [None]:
s.calc([m.gen_single(thetao)])

In [None]:
s.obs.shape

In [None]:
summary_stats(x.T,x.T,max_lag=5, lag=[1,1], power=[1,2], n_p=3, diff=1)

In [None]:
x.T.shape

In [None]:
nlar(x.T, lag=[1,1], power=[1,2]).shape

In [None]:
afc(x.T, max_lag=10)

In [None]:
np.cov(x[0,:-4],x[0,4:])

In [None]:
"""
  acf.Y <- sl.acf(Y,max.lag=5)
  acf.y <- sl.acf(y,max.lag=5)

  b0.Y <- nlar(Y^.3,lag=c(1,1),power=c(1,2))
  b0.y <- nlar(y^.3,lag=c(1,1),power=c(1,2))

  b1.Y <- order.dist(Y,y,np=3,diff=1)
  b1.y <- order.dist(y,y,np=3,diff=1)   
  
"""