# Consistent Bayes: Some Motivating Examples
---

Author: Michael Pilosov
Copyright 2017

### Import Libraries
_(should be 2.7 and 3.x compatible) _

In [1]:
# Mathematics and Plotting
from HelperFuns import * # pyplot wrapper functions useful for visualizations, numpy, scipy, etc.
%matplotlib inline
plt.rcParams.update({'font.size': 14})

# Interactivity
from ipywidgets import *

---

Some introductory text goes here.   
Define $\Lambda$, $\mathcal{D}$.


--- 

## Sample from $\Lambda$
_Here we implement uniform random priors on the unit hypercube, but you can load in any set of samples in its place._

In [2]:
input_dim = 1 # Specify input space dimension (n)
num_samples = int(1E4) # number of input samples (N)
lam = np.random.uniform( size = (input_dim, num_samples) ) # generate samples

---
## Define Parameter to Observables (PtO) Map
_ Choose from one of the following example options, feel free to add your own _ 

$O_1(\lambda) = (\lambda_1-\frac{1}{2})^2$  

$O_2(\lambda) = \sum_{i=1}^n \lambda_i$  

$O_3(\lambda) = \lbrace \lambda_0, \; \lambda_3 \rbrace$ 

$O_4(\lambda) = \lbrace \lambda_0+\lambda_1, \; \lambda_2, \; \lambda_3-\lambda_4 \rbrace$  

In [3]:
def PtO_fun1(lam): # pull first element
    return np.array([ (lam[0,:] - 0.5)**2 ])

def PtO_fun2(lam): # sum all elements
    return np.array([ np.sum(lam,axis=0) ])

def PtO_fun3(lam): # pull two elements
    return np.array([ lam[0,:], lam[3,:] ])

def PtO_fun4(lam): # three elements
    return np.array([ lam[0,:]+lam[1,:], lam[2,:], lam[3,:]-lam[4,:] ])

In [4]:
PtO_fun_choice = 1

if PtO_fun_choice == 1:
    PtO_fun = PtO_fun1
elif PtO_fun_choice == 2:
    PtO_fun = PtO_fun2
elif PtO_fun_choice == 3:
    PtO_fun = PtO_fun3
elif PtO_fun_choice == 4:
    PtO_fun = PtO_fun4
else:
    raise( ValueError('Specify Proper PtO choice!') )

---
## Compute Data Space $O(\Lambda) = \mathcal{D}$ 

Format: `(n_dims, n_samples)`  
_Optional_: Specify subset of PtO map's components to use for inversion using the variable `sub_indices` 

In [5]:
D_full = PtO_fun(lam)
sub_indices = None
if sub_indices is not None:
    D = D_full[sub_indices,:]
else:
    D = D_full
output_dim = D.shape[0]
print('dimensions :  lambda = '+str(lam.shape)+'   D = '+str(D.shape)+'   D_full = '+str(D_full.shape) )

dimensions :  lambda = (1, 10000)   D = (1, 10000)   D_full = (1, 10000)


### Visualize Data Space

In [6]:
# # Below you will find one-dimensional histogram option
# M = 100 # number of bins in the data space
# plt.hist(D[0],M)
# plt.title('histogram of data space')
# plt.show()

# Interactive Marginal Visualization
data_space_kde = gkde(D[0]) # compute KDE estimate of it
# Can plot "slices" of densities to observe differences between posterior and prior, but not that useful
a, b = 0, 0.25 # linspace parameters for plotting
plot_grid = np.linspace(a, b, 100)

interact(view_est_dens, x = fixed(plot_grid), 
         estimated_dens = fixed(data_space_kde), 
         lab = fixed('KDE data'), title=fixed('Marginal (est.) of $\mathcal{D}$'),
         viewdim=(0, input_dim-1, 1) )

A Jupyter Widget

<function HelperFuns.view_est_dens>

## Define Observed Probability Measure $P_\mathcal{D}$

In [None]:
# var_const = .1

# Interesting things happen with time series data that uses uniform on output.
uni_max = 0.5
obs_dens = sstats.uniform(0,uni_max) # 1D only
# obs_dens = sstats.norm(0.5,sigma) # 1D only
# But if the errors are normally distributed, the observed density will by a chi^2 of order K.
# print num_times
# obs_dens = sstats.chi2(num_times)
xx = np.linspace(-1,1,100)
plt.plot(xx,obs_dens.pdf(xx)) # CHI SQUARED
# plt.plot(xx,np.divide(1,obs_dens.pdf(xx))) # RECIPRICAL CHI SQUARED

# TODO: add support for multivariate uniforms. 

# obs_dens = sstats.multivariate_normal(mean = np.zeros(output_dim), 
#                                       cov = var_const*np.eye(output_dim) )

In [None]:
# for d_dim in range(1,21):
#     d = sstats.multivariate_normal(mean = np.zeros(d_dim), cov = np.eye(d_dim))
#     print '%2.2e'%d.pdf(np.zeros(d_dim))

In [None]:
np.max(q[0])
# num_times


## Compute push-forward of the prior

In [None]:
pf_dens = gkde(q[0])

In [None]:
x = np.linspace(0,1.1*np.max(q[0]),100)
plt.plot(x,pf_dens.evaluate(x))

## Accept/reject sampling of posterior
(samples come from prior that was used to compute the pushforward)

In [None]:
lam_accept = []
# r = (1./obs_dens.pdf( q )) / pf_dens.evaluate(q) # RECIPRICAL CHI SQUARED. REGULAR IS BELOW.
r = obs_dens.pdf( q ) / pf_dens.evaluate(q) # vector of ratios evaluated at all the q(lambda)'s
M = np.max(r)
eta_r = r[0]/M

In [None]:
for i in range(num_samples):
    xi = np.random.uniform(0,1)
    if eta_r[i] > xi:
        lam_accept.append( lam[:,i] )

lam_accept = np.array( lam_accept[1::] ).transpose()
num_accept = lam_accept.shape[1]
print('Number accepted: %d = %2.2f%%'%(num_accept, 100*np.float(num_accept)/num_samples))

In [None]:
np.sum(r)

In [None]:
plt.scatter(lam,eta_r)
# plt.scatter([lam0],0.05)
# plt.xlim([0.05-0.01, 0.05+0.01])

In [None]:
# interact(pltaccept, lam = fixed(lam), lam_accept = fixed(lam_accept), 
#          N = (1, num_accept, 10), i = (0, input_dim-1, 1), j = (0, input_dim-1, 1))


## All in one

In [None]:
num_samples = int(1E4)
num_times = 1000 # measurement frequency (K)
# number of model solves = num_samples*num_times. dim_qoi = 1 because we are assimilating measurements into a single QoI
T_min, T_max = 0.1, 10 #  max time
# uni_max = 1E-3
lam0 = 1 # true / reference lambda_0
sd = 0.2 # STANDARD DEVIATION FOR EACH MEASUREMENT. this makes it constant for all of them.

############################
input_dim = 1
lam = 2*np.random.uniform( size = (input_dim, num_samples) ) # standard uniform PRIOR
x = np.linspace(0, 2, 100) # for plotting purposes

sigma = sd*np.ones(num_times+1)
t = np.linspace(T_min,T_max,num_times) # INCLUDES T_min - Useful if you want to be very specific about start-time
# t = np.linspace(T_min,T_max,num_times+1)[1::] # EXCLUDES T_min - Useful if you want to keep T_min at 0
# print t
def data(lam):
    noise = 1 # noise on or off
    return lam0*np.exp(-t) + noise*sd*np.random.randn(1,num_times)
d = data(lam) # make random data. fix it.

def QoI_fun(d,lam):
    noise = 0 # stochastic map or no? treating model like best unbiased predictor
    return (1./1)*np.sum([np.power([ ( lam*np.exp(-t[k]) + noise*sd*np.random.randn(1,num_samples)
                                      - d[0,k])/sigma[k] ],2)[0] for k in range(num_times)],0)

print('Computing QoI')
q_full = QoI_fun(d,lam)
QoI_indices = None
if QoI_indices is not None:
    q = q_full[QoI_indices,:]
else:
    q = q_full
        
print('Summary of dimensions :  lambda = '+str(lam.shape)+'   q = '+str(q.shape)+'   q_full = '+str(q_full.shape) )
print('Defining Observed Density with bound %2.2e'%(uni_max))
# obs_dens = sstats.uniform(0,uni_max) # 1D only
obs_dens = sstats.chi2(num_times)

pf_dens = gkde(q[0])
lam_accept = []
# r = (1./obs_dens.pdf( q )) / pf_dens.evaluate(q) # RECIPRICAL CHI SQUARED. REGULAR IS BELOW.
print('Computing weights')
r = obs_dens.pdf( q ) / pf_dens.evaluate(q) # vector of ratios evaluated at all the q(lambda)'s
M = np.max(r)
eta_r = r[0]
inds = []
##### OPTIONAL - RUN ACCEPT/REJECT
# print('Performing accept/reject')
# for i in range(num_samples):
#     xi = np.random.uniform(0,1)
#     if eta_r[i] > xi:
#         lam_accept.append( lam[:,i] )
#         inds.append(i)

# lam_accept = np.array( lam_accept[1::] ).transpose()
# num_accept = lam_accept.shape[1]
# print('Number accepted: %d = %2.2f%%'%(num_accept, 100*np.float(num_accept)/num_samples))
######

plt.rcParams['figure.figsize'] = (18, 6)
plt.figure()
plt.subplot(1, 3, 3)
plt.scatter(lam,eta_r)
# plt.plot(lam_accept, gkde(lam_accept))
plt.scatter(lam0,0.05)
# plt.title('Posterior Distribution\nof Uniform Observed Density \nwith bound = %1.2e'%uni_max)
plt.xlabel('Lambda')
pr = 0.2
# plt.xlim(lam0*np.array([1-pr,1+pr]))

plt.subplot(1, 3, 1)
x = np.linspace(0,1.1*np.max(q[0]),50)
plt.plot(x,pf_dens.evaluate(x))
plt.title('Pushforward of Prior')
plt.xlabel('Q(lambda)')

plt.subplot(1, 3, 2)
# xx = np.linspace(0,2*uni_max,50)
xx = np.linspace(0,np.max(q[0]),50)
plt.plot(xx,obs_dens.pdf(xx))
plt.title('Observed Density')
plt.xlabel('Q(lambda)')

plt.show()

In [None]:
# np.exp(t[0])*lam - d[0,1]

In [None]:
lam.shape

---
## Visualize Posterior Density

In [None]:
prior_dens_kde = gkde(lam)
post_dens_kde = gkde(lam_accept) # Not very useful
# Can plot "slices" of densities to observe differences between posterior and prior, but not that useful
interact(compare_est_input_dens, x = fixed(x), 
         estimated_dens1 = fixed(prior_dens_kde), estimated_dens2 = fixed(post_dens_kde), 
         lab_1 = fixed('KDE prior'), lab_2 = fixed('KDE post'), title=fixed(''),
         viewdim=(0, input_dim-1, 1))


In [None]:
# Multidimensional Normal

# prior_dens = sstats.multivariate_normal( mean = np.zeros(1), cov = np.eye(1) ) # Exact density
# interact(compare_input_dens, x = fixed(x), 
#          analytical_dens = fixed(prior_dens), estimated_dens = fixed(post_dens_kde), 
#          lab_1 = fixed('prior'), lab_2 = fixed('KDE post'), title = fixed(''),
#          viewdim = (0, input_dim-1, 1))

## Construct the push-forward of the posterior using accepted samples

In [None]:
x = np.linspace(-1,1, 100)
# x = np.tile(np.linspace(-5,5,100),[output_dim,1])
push_post_dens_kde = gkde( QoI_fun(lam_accept) )
# Plot the push-forward of the posterior, should look like the observed density
# interact(compare_output_dens, x = fixed(x), 
#          analytical_dens = fixed(obs_dens), estimated_dens = fixed(push_post_dens_kde), 
#          lab_1 = fixed('observed'), lab_2 = fixed('KDE push'), title = fixed(''),
#          viewdim = (0, output_dim-1, 1))
compare_output_dens(x,obs_dens, push_post_dens_kde)

In [None]:
# diagonal crossection view
compare_output_dens(x, obs_dens, push_post_dens_kde, 
                    viewdim = range(output_dim), title = 'Diagonal Cross-Section')

---
## Extra visualizations

### Create KDE of samples on $\Lambda$ or use specified density on $\Lambda$

***The KDE is not necessary if the density on the input space is already specified***

Here, we just do this to show how the density estimator works compared to the exact density.

The KDE is not necessary except for the push-forward density. 

### Compare KDE of prior to the actual prior

In [None]:
interact(compare_input_dens, x = fixed(x), 
         analytical_dens = fixed(prior_dens), estimated_dens = fixed(prior_dens_kde),
         lab_1 = fixed('prior'), lab_2 = fixed('KDE prior'), title = fixed(''),
         viewdim = (0, input_dim-1, 1))

In [None]:
# diagonal crossection view
compare_input_dens(x, prior_dens, prior_dens_kde, 
                   viewdim = range(input_dim), title = 'Diagonal Cross-Section')