# Comparison of Statistical and Consistent Bayesian Inversion

We define a problem where the observed density corresponds to a likelihood function from classical Bayesian inversion

Copyright 2018 Michael Pilosov


In [None]:
import numpy as np
import scipy.stats as sstats
from matplotlib import pyplot as plt

In [None]:
plt.rcParams['figure.figsize'] = (20,10)
plt.rcParams['font.size'] = 18

In [None]:
import cbayes.sample as samp
import cbayes.distributions as dist
import cbayes.solve as solve
import scipy.integrate as integrate


In [None]:
import ipywidgets as wid

## Consistent Bayes

In [None]:
def comparesandbox(N = int(5E3), M = 5, pr_sd = 0.5, pr_mean = 0, ob_mean = 0.5, ob_sd = 0.5, bw = 0):
    s_set = samp.sample_set(size=(N, 1))
    s_set.set_dist('norm', {'loc':pr_mean, 'scale':pr_sd}, dim=0)
    
    def QoI_fun(lam):
        if M > 1:
            residuals = lam.reshape(-1, 1) + ob_sd*np.random.rand(M)
            return (1./M)*np.sum( (residuals/ob_sd)**2, axis=1 ).reshape(-1,1)
        else:
            return lam.reshape(-1,1)
    
    s_set.generate_samples()
    p_set = samp.map_samples_and_create_problem(s_set, QoI_fun)
    if M == 1:
        p_set.set_observed_dist(dist='norm', dim=0, kwds={'loc':ob_mean, 'scale': ob_sd})
    else:
        p_set.set_observed_dist('gamma', {'a':M/2, 'scale':2/M}, dim=0)
    if bw > 0:
        p_set.compute_pushforward_dist(method='sk', kwds={'bandwidth': bw})
    else:
        p_set.compute_pushforward_dist(method='sc') # use scipy instead if you dont care about bw (faster)
        
    # CREATE SHORT-VERSION FUNCTION HANDLES (for convenience)
    pf = p_set.pushforward_dist
    pr = p_set.prior_dist
    ob = p_set.observed_dist
    # Solve CBayes 
    p_set.set_ratio()
    indC = solve.perform_accept_reject(p_set.output.samples, p_set.ratio, seed=232)
    # solve.problem(p_set)
    
    # Solve SBayes
    L = dist.parametric_dist(1) # Define likelihood
    L.set_dist(dim=0, dist='norm', kwds={'loc':ob_mean, 'scale':ob_sd })
    
    likelihood = L.pdf(p_set.output.samples)**M
    prior = p_set.input.dist.pdf(p_set.input.samples)
    def S_post(x): 
        return L.pdf(x)**M*p_set.input.dist.pdf(x)
    def C_post(x):
        tol = 1E-4
        pfpr = p_set.pushforward_dist.pdf(QoI_fun(x))
        pfpr[pfpr < tol] = 1.0
        prr = p_set.input.dist.pdf(x)
        obb = p_set.observed_dist.pdf(QoI_fun(x))
        output = prr*obb/pfpr

#         output[pfpr < tol] = pfpr[pfpr < tol]
        return output

    evidence=integrate.quad(S_post,-3,3)
    print('Evidence: %2.4f'%evidence[0])
    
    
    indS = solve.perform_accept_reject(p_set.output.samples, likelihood, seed=283)
    print("ACCEPTED:", "S:", len(indS), "| B:", len(indC), " OF", N)
    if len(indC) < 10:
        print(Warning("Be aware, too few accepted samples from CB"))
    
    ## PLOTTING CODE
    x = np.linspace(-2.5,2.5,1000)
    
    # SMOOTH POSTERIORS FOR PLOTTING
#     cb_ps_den = dist.gkde(p_set.input.samples[indC])
#     sb_ps_den = dist.gkde(p_set.input.samples[indS])
    # SMOOTH PUSH-FORWARDS OF POSTERIORS FOR PLOTTING
    cb_pf_den = dist.gkde(p_set.output.samples[indC])
    sb_pf_den = dist.gkde(p_set.output.samples[indS])
#     plt.plot(x,pf.pdf(x), label='Push-forward of Prior', c='k',lw=3)
    plt.plot(x,pr.pdf(x), label='Prior', c = 'orange', ls=':', lw=5)
    plt.plot(x,ob.pdf(x), label='Observed', c='r')
#     sb_pf_den = post
    plt.plot(x, cb_pf_den.pdf(x),  c='b', ls='-', label='Consistent Posterior Push-forward')
#     plt.plot(x, C_post(x),  c='b', ls='-', label='Consistent Posterior Push-forward')
    plt.plot(x, sb_pf_den.pdf(x),  c='g', ls='--', label='Statistical Posterior Push-forward')
#     plt.plot(x, S_post(x)/evidence[0],  c='g', ls='--', label='Statistical Posterior Push-forward')
    plt.plot(x, L.pdf(x),  c='k', ls=':', lw=3, label='Statistical Likelihood Function')
    plt.vlines(ob_mean,0,5)
    if M == 1:
        plt.ylim([0,3])
    
    plt.legend(loc='upper left')
    plt.title('Identity Map - Prior Mean at %.2f'%pr_mean)
#     plt.savefig('comparison.png')
    plt.show()
    return None


In [None]:
N = wid.IntSlider(value=int(5E3), min=100, max=int(1E4), step=100, continuous_update=False)
M = wid.IntSlider(value=1, min=1, max=100, step=1, continuous_update=False)
ob_sd = wid.FloatSlider(value=0.5,  min=0.25, max=1, step=0.05, continuous_update=False)
ob_mean = wid.FloatSlider(value=1,  min=0, max=2, step=0.25, continuous_update=False)
pr_mean = wid.FloatSlider(value=0, min=-1, max=1, step=0.05, continuous_update=False)
pr_sd = wid.FloatSlider(value=0.5,  min=0.25, max=2, step=0.05, continuous_update=False)
bw = wid.FloatSlider(value=0,  min=0, max=0.5, step=0.05, continuous_update=False)

In [None]:
wid.interact_manual(comparesandbox, N=N, M=M, ob_sd=ob_sd, ob_mean=ob_mean, pr_mean=pr_mean, pr_sd=pr_sd, bw=bw)

## The methods will be the same under a uniform prior
(which we simulate by choosing a large prior standard deviation)

In [None]:
pr_sd.value = 2

## Really degenerate Case
Confident prior beliefs and a lot of poor quality data. More data does fix this.

In [None]:
N.value = 5E3
M.value = 10
ob_mean.value = 1
ob_sd.value = 1
pr_mean.value = -1
pr_sd.value = 0.1

Confident prior beliefs and a paucity of confident data.
Very few accepted samples for cbayes, but at least it's not just basically the prior...
Note that we violate a consistent Bayesian assumption (predictability) when we do this.

In [None]:
M.value = 1
ob_mean.value = 1
ob_sd.value = 1
pr_mean.value = 0
pr_sd.value = 0.25