# analytical approach

- get some statistics by fitting a whole bunch of models with different seeds

# basic setup for experiments

In [None]:
%%capture 
import util
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

import delfi.distribution as dd
import delfi.generator as dg
import delfi.inference as infer
import delfi.summarystats as ds
from delfi.simulator.Gauss import Gauss


## problem setup ##

n_params = 1

assert n_params == 1 # cannot be overstressed: everything in this notebook goes downhill sharply otherwise
eta2 = 1.0     # prior variance
eps2 = 1e20    # calibration kernel width (everything above a certain threshold will be treated as 'uniform')

# pick observed summary statistics
x0 = 0.8 * np.ones(1) #_,obs = g.gen(1) 



In [None]:

## simulation setup ##
n_fits = 1000  # number of MLE fits (i.e. dataset draws), each single-round fits with pre-specified proposal!

sig2s = [1./99, 1./9, 1.] # likelihood variance
Ns      = [30, 100, 300, 1000, 3000]  # number of simulations per dataset

ksi2s = np.array([0.01, 0.1, 0.5, 0.999]) * eta2  # proposal variance


for N in Ns:
    for sig2 in sig2s:

        # set proposal priors (one per experiment)
        nus = eta2/(eta2+sig2)*x0[0]* np.ones(len(ksi2s))  # proposal mean

        res = {'normal'  : np.zeros((len(ksi2s), n_fits,5)),
               't_df10'  : np.zeros((len(ksi2s), n_fits,5)),
               't_df3'   : np.zeros((len(ksi2s), n_fits,5)),
               'cdelfi'  : np.zeros((len(ksi2s), n_fits,5)),
               'unif_w1' : np.zeros((len(ksi2s), n_fits,5)),
               'unif_w6' : np.zeros((len(ksi2s), n_fits,5)),
               'sig2' : sig2, 'eta2' : eta2, 'eps2' : eps2,
               'ksi2s' : ksi2s, 'nus' : nus, 'x0' : x0,
              }
        """
        print( ' SNPE (Gaussian proposals) ' ) 
        proposal_form = 'normal'
        out_snpe  = res[proposal_form]
        util.test_setting(out_snpe, n_params,N,sig2,eta2,eps2,x0,ksi2s,nus,
                          proposal_form,track_rp=True,if_plot=False);
        """
        print(' SNPE (students T proposals, df = 10) ')
        proposal_form = 'studentT'
        df = 10
        out_snpe  = res['t_df' + str(df)]
        util.test_setting(out_snpe,n_params,N,sig2,eta2,eps2,x0,ksi2s,nus,
                          proposal_form,track_rp=True,df=df,if_plot=False);

        print(' SNPE (students T proposals, df = 3) ')
        proposal_form = 'studentT'
        df = 3
        out_snpe  = res['t_df' + str(df)]
        util.test_setting(out_snpe,n_params,N,sig2,eta2,eps2,x0,ksi2s,nus,
                          proposal_form,track_rp=True,df=df,if_plot=False);
        
        #"""
        print(' uniform proposals (+/- 1 posterior std)')
        proposal_form = 'unif'
        sds = 1
        marg = np.sqrt(sds) * np.sqrt(12)
        out_snpe  = res['unif_w'+str(sds)]
        util.test_setting(out_snpe,n_params,N,sig2,eta2,eps2,x0,ksi2s,nus, 
                          proposal_form,track_rp=True,marg=marg,if_plot=False);

        print(' uniform proposals (+/- 3 posterior stds)')
        proposal_form = 'unif'
        sds = 6
        marg = np.sqrt(sds) * np.sqrt(12)
        out_snpe  = res['unif_w'+str(sds)]
        util.test_setting(out_snpe,n_params,N,sig2,eta2,eps2,x0,ksi2s,nus,
                          proposal_form,track_rp=True,marg=marg,if_plot=False);

        print('CDELFI')
        proposal_form = 'normal'
        util.test_setting(out_snpe, n_params,N,sig2,eta2,eps2,x0,ksi2s,nus,
                          proposal_form,track_rp=False,if_plot=False);
        #"""
        
        np.save('res_analytic_n_fits' + str(n_fits) + '_N' + str(N) +'_postVar' + str(np.int(np.round(1/sig2))), res)


# summary figures

## requires loading from savefiles

## biases

In [None]:
%matplotlib inline
import numpy as np
import seaborn
import matplotlib.pyplot as plt


Ns = [ 30, 100, 300, 1000, 3000 ]
n_fits = 1000

sig2s = [1./99, 1./9, 1.] # likelihood variance
for sig2 in sig2s:

    plt.figure(figsize=(12, 8))
    clrs = np.array([[0,1,0]]) * np.linspace(0.1, 0.9, len(Ns)).reshape(-1,1)
    algs = ['normal', 't_df10', 't_df3', 'unif_w1', 'unif_w6']

    #mkrs, algs = mkrs[:3], algs[:3]


    for i in range(len(algs)):

        alg = algs[i]


        for n in range(len(Ns)):

            N = Ns[n]
            clr = clrs[n]

            tmp = np.load('res_analytic_n_fits' + str(n_fits) + '_N' + str(N) +'_postVar' + str(np.int(np.round(1/sig2)))+'.npy')[()]        
            out_snpe = tmp[alg]

            plt.subplot(2,len(algs),i+1)

            if alg == 'cdelfi' : 
                m = analytic_div(out_snpe, eta2, nu*np.ones_like(ksi2s), ksi2s)[:,:,0].mean(axis=1)
            else:
                m = out_snpe[:,:,0].mean(axis=1)
            plt.semilogx(ksi2s, m, color=clr, linewidth=2)
            plt.title(alg + ' proposal prior')

            plt.subplot(2,len(algs),i+1+len(algs))

            if alg == 'cdelfi' : 
                m = analytic_div(out_snpe, eta2, nu*np.ones_like(ksi2s), ksi2s)[:,:,1].mean(axis=1)
            else:
                m = out_snpe[:,:,1].mean(axis=1)
            plt.semilogx(ksi2s, m, color=clr, linewidth=2)

        for n in range(len(Ns)):

            N = Ns[n]
            clr = clrs[n]

            ksi2s = tmp['ksi2s']
            eta2, sig2, x0 = tmp['eta2'], tmp['sig2'], tmp['x0']

            plt.subplot(2,len(algs),i+1)

            gt = eta2/(eta2+sig2)*x0[0]*np.ones_like(ksi2s)    
            plt.semilogx(ksi2s, gt*np.ones_like(ksi2s), 'k--', linewidth=2)

            plt.subplot(2,len(algs),i+1+len(algs))

            gt = eta2 - eta2**2 / (eta2 + sig2)
            plt.semilogx(ksi2s, gt*np.ones_like(ksi2s), 'k--', linewidth=2)

    for i in range(len(algs)):
        plt.subplot(2,len(algs),i+1)
        plt.axis([0.95*ksi2s[0], 1.05*ksi2s[-1], 0.5*eta2/(eta2+sig2)*x0[0], 1.5*eta2/(eta2+sig2)*x0[0]])
        plt.subplot(2,len(algs),i+1+len(algs))
        plt.axis([0.95*ksi2s[0], 1.05*ksi2s[-1], 0, 1.1*(eta2 - eta2**2 / (eta2 + sig2))])
        if i == i/2+1: 
            plt.xlabel('proposal prior variance / prior variance') 
        plt.plot(1/(1/sig2+1/eta2)*np.ones(2), [0, 1.1*(eta2 - eta2**2 / (eta2 + sig2))], 'r')

    plt.subplot(2,len(algs),1)
    plt.ylabel('posterior means')
    plt.legend(['N = ' + str(n) for n in Ns], loc=4)
    plt.subplot(2,len(algs),len(algs)+1)
    plt.ylabel('posterior variance')

    plt.savefig('bias_' + str(n_fits) + 'fits_SNPE.pdf')
    plt.show()

## estimator standard deviations

In [None]:
%matplotlib inline
import numpy as np
import seaborn
import matplotlib.pyplot as plt


Ns = [ 30, 100, 300, 1000, 3000 ]
n_fits = 1000

plt.figure(figsize=(12, 8))
clrs = np.array([[0,1,0]]) * np.linspace(0.1, 0.9, len(Ns)).reshape(-1,1)
algs = ['normal', 't_df10', 't_df3', 'unif_w1', 'unif_w6']

#mkrs, algs = mkrs[:3], algs[:3]


for i in range(len(algs)):
    
    alg = algs[i]
    
    for n in range(len(Ns)):

        N = Ns[n]
        clr = clrs[n]
        
        tmp = np.load('res_analytic_n_fits' + str(n_fits) + '_N' + str(N) +'_postVar' + str(np.int(np.round(1/sig2)))+'.npy')[()]        
        out_snpe = tmp[alg]

        plt.subplot(2, len(algs),i+1)

        if alg == 'cdelfi' : 
            s = analytic_div(out_snpe, eta2, nu*np.ones_like(ksi2s), ksi2s)[:,:,0].std(axis=1)
        else:
            s = out_snpe[:,:,0].std(axis=1)
        plt.semilogx(ksi2s, s, color=clr, linewidth=2)
        plt.title(alg + ' proposal prior')
        
        plt.subplot(2,len(algs),i+1+len(algs))
        
        if alg == 'cdelfi' : 
            s = analytic_div(out_snpe, eta2, nu*np.ones_like(ksi2s), ksi2s)[:,:,1].std(axis=1)
        else:
            s = out_snpe[:,:,1].std(axis=1)
        plt.semilogx(ksi2s, s, color=clr, linewidth=2)

        
for i in range(len(algs)):
    plt.subplot(2, len(algs), i+1)
    plt.axis([ksi2s[0], ksi2s[-1], 0, 0.3])
    plt.subplot(2,len(algs),len(algs)+1+i)
    plt.axis([ksi2s[0], ksi2s[-1], 0, 0.033])
    plt.xlabel('proposal prior variance / prior variance') 
    
plt.subplot(2,len(algs),1)
plt.ylabel('posterior means')
plt.legend(['N = ' + str(n) for n in Ns], loc=1)
plt.subplot(2,len(algs),len(algs)+1)
plt.ylabel('posterior variance')

plt.savefig('std_1000fits_SNPE.pdf')
plt.show()

# another look at CDELFI in 'proposal prior'-space
## Gaussian priors, no analytical division, no importance sampling

In [None]:
%matplotlib inline
import numpy as np
import seaborn
import matplotlib.pyplot as plt


Ns = [ 50, 100, 500, 1000, 5000 ]
n_fits = 1000

plt.figure(figsize=(12, 8))
clrs = np.array([[0,1,0]]) * np.linspace(0.1, 0.9, len(Ns)).reshape(-1,1)
    
for n in range(len(Ns)):

    N = Ns[n]
    clr = clrs[n]

    tmp = np.load('res_analytic_n_fits' + str(n_fits) + '_N' + str(N) +'_postVar' + str(np.int(np.round(1/sig2)))+'.npy')[()]        
    out_snpe = tmp['cdelfi']

    plt.subplot(2,2,1)

    m = out_snpe[:,:,0].mean(axis=1)
    plt.semilogx(ksi2s, m, color=clr, linewidth=2)

    plt.subplot(2,2,3)

    m = out_snpe[:,:,1].mean(axis=1)
    plt.semilogx(ksi2s, m, color=clr, linewidth=2)

    plt.subplot(2,2,2)

    s = out_snpe[:,:,0].std(axis=1)
    plt.semilogx(ksi2s, s, color=clr, linewidth=2)

    plt.subplot(2,2,4)
    s = out_snpe[:,:,1].std(axis=1)
    plt.semilogx(ksi2s, s, color=clr, linewidth=2)
    
    
for n in range(len(Ns)):

    N = Ns[n]
    clr = clrs[n]

    ksi2s = tmp['ksi2s']
    eta2, sig2, x0 = tmp['eta2'], tmp['sig2'], tmp['x0']

    plt.subplot(2,2,1)

    gt = ksi2s/(ksi2s+sig2)*x0[0]    
    plt.semilogx(ksi2s, gt, 'k--', linewidth=2)

    plt.subplot(2,2,3)

    gt = ksi2s - ksi2s**2 / (ksi2s + sig2)
    plt.semilogx(ksi2s, gt, 'k--', linewidth=2)
            
plt.subplot(2,2,1)
plt.ylabel('posterior means')
plt.title('estimator avg')
plt.legend(['N = ' + str(n) for n in Ns], loc=4)
plt.subplot(2,2,3)
plt.ylabel('posterior variance')
plt.xlabel('proposal prior variance / prior variance')
plt.subplot(2,2,2)
plt.title('estimator std')
plt.subplot(2,2,4)
plt.xlabel('proposal prior variance / prior variance') 

plt.savefig('bias_std_1000fits_CDELFI.pdf')

plt.show()