In [None]:
import numpy as np

##################################################
##### Matplotlib boilerplate for consistency #####
##################################################
from ipywidgets import interact
from ipywidgets import FloatSlider, IntSlider
from matplotlib import pyplot as plt

%matplotlib inline

from IPython.display import set_matplotlib_formats
set_matplotlib_formats('svg')

global_fig_width = 8
global_fig_height = global_fig_width / 1.61803399
font_size = 12

plt.rcParams['axes.axisbelow'] = True
plt.rcParams['axes.edgecolor'] = '0.8'
plt.rcParams['axes.grid'] = True
plt.rcParams['axes.labelpad'] = 8
plt.rcParams['axes.linewidth'] = 2
plt.rcParams['axes.titlepad'] = 16.0
plt.rcParams['axes.titlesize'] = font_size * 1.4
plt.rcParams['figure.figsize'] = (global_fig_width, global_fig_height)
plt.rcParams['font.sans-serif'] = ['Computer Modern Sans Serif', 'DejaVu Sans', 'sans-serif']
plt.rcParams['font.size'] = font_size
plt.rcParams['grid.color'] = '0.8'
plt.rcParams['grid.linestyle'] = 'dashed'
plt.rcParams['grid.linewidth'] = 2
plt.rcParams['lines.dash_capstyle'] = 'round'
plt.rcParams['lines.dashed_pattern'] = [1, 4]
plt.rcParams['xtick.labelsize'] = font_size
plt.rcParams['xtick.major.pad'] = 4
plt.rcParams['xtick.major.size'] = 0
plt.rcParams['ytick.labelsize'] = font_size
plt.rcParams['ytick.major.pad'] = 4
plt.rcParams['ytick.major.size'] = 0
##################################################

## Conditional distributions

### Example Two-dimensional distribution:
- Imagine you are interested in the interrelation between the circumference of a person’s head ($H$) and the volume of their brain ($B$).
- Based on data we find there is a positive correlation between these two variables, which we represent in a distribution $P(H, B)$.

![](fig/Human-brain.SVG)

In [None]:
from scipy.stats import multivariate_normal

def brain_pdf(xy):
    return multivariate_normal.pdf(xy, mean=[45, 1500], cov=[[10**2, 5*100],[5*100,200**2]])

def get_full_brain_pdf():
    x = np.linspace(20, 70, 50)
    y = np.linspace(1000, 2000, 40)

    X, Y = np.meshgrid(x, y)
    XY = np.column_stack([X.flat, Y.flat])

    Z = brain_pdf(XY).reshape(X.shape)
    
    return X, Y, Z

def get_conditional_brain_pdf():
    x = np.linspace(20, 70, 50)
    y = 1450*np.ones_like(x)
    
    xy = np.column_stack([x, y])
    z = brain_pdf(xy).reshape(x.shape)
    
    return x, y, z

def show_brain_distribution(conditional=False):
    X, Y, Z = get_full_brain_pdf()
    plt.contourf(X, Y, Z, 20, cmap='RdGy');
    
    if conditional:
        plt.plot([20,70],[1450, 1450])
    plt.xlabel(r'$H$')
    plt.ylabel(r'$B$')
    plt.colorbar()
    
def show_brain_distribution_conditional():   
    X, Y, Z = get_full_brain_pdf()
    x, y, z = get_conditional_brain_pdf()
    
    f, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 8/1.618))

    ax1.contourf(X, Y, Z, 20, cmap='RdGy');
    ax1.plot([20,70],[1450, 1450])
    ax2.plot(x, z)
    ax2.fill_between(x,z,alpha=0.2)
    ax1.set_xlabel(r'$H$')
    ax1.set_ylabel(r'$B$')
    ax2.set_ylabel(r'$P(B = 1450, H)$')
    ax2.set_xlabel(r'$H$')
    plt.tight_layout()


In [None]:
show_brain_distribution()

## Conditional distributions

- **Question:** If an individual has a brain volume of 1450cm3, then what does the distribution for their head circumference look like?
- **Answer:** Use law of conditional probability:

$$P(H|B = 1450) = \frac{P(B = 1450, H)} {P(B = 1450)}$$

- **Analogy:** imagine walking over the probability distribution along a line of B = 1450cm3, and recording your height as you go.

In [None]:
show_brain_distribution_conditional()

## Gibbs sampler

- A dependent sampling technique
- Useful for hierarchical models: breaks up higher-dimensional problems into separate lower-dimensional problems, *conditioned* on the remaining variables.
- Can be used in conjunction with Random Walk Metropolis method $\Rightarrow$ still allows us to use all the machinery we've already developed!
- Named after Josiah Willard Gibbs (February 11, 1839 – April 28, 1903), algorithm proposed by Stuart and Donald Geman in 1984

![](fig/Josiah_Willard_Gibbs_-from_MMS-.jpg)
    

## Defining the Gibbs sampler

For a parameter vector: $\boldsymbol{\theta} = (\theta_1, \theta_2, \theta_3)$:

- Select a random starting location: $(\theta_1^0, \theta_2^0, \theta_3^0)$, along the same lines as for Random Walk Metropolis.
- For each iteration $t = 1, ..., T$ do:
    1. Select a random parameter update ordering, for example $(\theta_3, \theta_2, \theta_1)$.
    2. Independently sample from the conditional posterior for each parameter in order using the most up-to-date parameters.

## Defining the Gibbs sampler

First we sample:
    
$$\theta^1_3 \sim P(\theta_3|\theta^0_2, \theta_1^0)$$

Then conditional on freshly-sampled $\theta^1_3$

$$\theta^1_2 \sim P(\theta_2|\theta^1_3, \theta_1^0)$$

Then conditional on freshly-sampled $\theta^1_3$ and $\theta_2^1$:

$$\theta^1_1 \sim P(\theta_1|\theta^1_3, \theta_2^1)$$

**Important:** in Gibbs sampling there is no rejection of steps 

$\Rightarrow$ unlike Random Walk Metropolis!



## Example application of Gibbs sampling: : speed of motion of neighbouring birds in a flock

Suppose we record the speed of bird A ($v_A$) and bird B ($v_B$) in
a flock along a particular axis.

Based on observations we find that the joint posterior
distribution over speeds is a multivariate normal distribution:


$$\begin{pmatrix} v_A \\ v_B \end{pmatrix} \sim N \left [ \begin{pmatrix} v_0 \\ v_0 \end{pmatrix}, \begin{pmatrix} 1 & \rho \\ \rho & 1 \end{pmatrix} \right ]$$

Of course here we have an analytic expression for the posterior distribution, but this example illustrates how the method works for more general problems.

In [None]:

def bird_pdf(xy):
    return multivariate_normal.pdf(xy, mean=[0, 0], cov=[[1, 0.5],[0.5,1]])

def get_full_bird_pdf():
    x = np.linspace(-5, 5, 50)
    y = np.linspace(-5,5, 40)

    X, Y = np.meshgrid(x, y)
    XY = np.column_stack([X.flat, Y.flat])

    Z = bird_pdf(XY).reshape(X.shape)
    
    return X, Y, Z

def show_bird_distribution():
    X, Y, Z = get_full_bird_pdf()
    plt.contourf(X, Y, Z, 20, cmap='RdGy');
    plt.xticks([0],['v_0'])
    plt.yticks([0],['v_0'])
    plt.xlabel('speed of bird A')
    plt.ylabel('speed of bird B')
    plt.colorbar()

In [None]:
show_bird_distribution()

## Finding the conditional distributions

- In many circumstances we cannot find the conditional distributions however here it is possible.

- If we knew $v_B$ :

$$v_A \sim N(v_0 + \rho(v_B − v_0), 1 − \rho^2)$$

- Alternatively, if we knew $v_A$:

$$v_B \sim N(v_0 + \rho(v_A − v_0), 1 − \rho^2)$$

Use Gibbs sampling to conditionally sample: $v_A|v_B$ then $v_B |v_A$.

In [None]:
def gibbs_sample(n):
    v0 = 0
    rho = 0.5
    samples = np.empty((2,n),dtype=float)
    for i in range(n):
        # sample from v_A
        samples[0,i] = np.random.normal(v0 + rho * (samples[1,i-1] - v0), 1 - rho**2)
        
        # sample from v_B
        samples[1,i] = np.random.normal(v0 + rho * (samples[0,i] - v0), 1 - rho**2)
    return samples


In [None]:
def show_gibbs_sampling(n):
    samples = gibbs_sample(n)
    f, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 8/1.618))
    X, Y, Z = get_full_bird_pdf()
    ax1.contourf(X, Y, Z, 20, cmap='RdGy');
    ax1.scatter(samples[0,:],samples[1,:],alpha=0.6)
    ax1.set_xticks([0],['v_0'])
    ax1.set_yticks([0],['v_0'])
    ax1.set_xlabel('speed of bird A')
    ax1.set_ylabel('speed of bird B')
    
    ax2.hist2d(samples[0,:],samples[1,:],bins=30)

In [None]:
interact(show_gibbs_sampling, n= IntSlider(value=10, min=0, max=6000, step=100, continuous_update=False));

In [None]:
import pints
import pints.toy
model = pints.toy.LogisticModel()

n = 5
noise = 20




real_hyper_parameters = [500, 50]
real_parameters = [
    np.array([0.015, np.random.normal(real_hyper_parameters[0], real_hyper_parameters[1])]) 
        for i in range(n)
]
times = np.linspace(0, 800, 500)
values = [
    model.simulate(real, times) + np.random.normal(0, noise, times.shape)
        for real in real_parameters
]

problems = [
    pints.SingleOutputProblem(model, times, v)
        for v in values
]

# Create a log-likelihood function
log_likelihoods = [
    pints.GaussianKnownSigmaLogLikelihood(problem, noise)
        for problem in problems
]

log_prior = pints.ComposedLogPrior(
    pints.UniformLogPrior([0.01],[0.02]),
    pints.GaussianLogPrior(real_hyper_parameters[0],1e9*real_hyper_parameters[0])
)


log_posteriors = [
    pints.LogPosterior(log_likelihood, log_prior)
        for log_likelihood in log_likelihoods
]


# Choose starting points for k mcmc chains
starting_points = [
    [ real ]
    for real in real_parameters
]


samplers = [
    pints.AdaptiveCovarianceMCMC(x0)
     for x0 in starting_points
]



In [None]:
import sys
import scipy.stats

# prior hyperparameters
sigma0 = 100
k_0 = 0

mu_0 = real_hyper_parameters[0]
alpha_0 = 1
beta_0 = 1.0e-9 * sigma0

samples = 20000
chain = np.empty((samples, 2))
k_chains = [np.empty((samples, 2)) for i in range(n)]

for sample in range(samples):
    if sample % 10 == 0:
        print('.', end='')
        sys.stdout.flush()

    # generate samples of lower level samplers
    xs = np.empty((2, k))
    error = 0
    for i, (sampler, log_posterior) in enumerate(zip(samplers, log_posteriors)):
        x = sampler.ask()
        xs[:,i] = sampler.tell(log_posterior(x))
        k_chains[i][sample, :] = xs[:, i]
    
    # sample mean and covariance from a normal inverse wishart
    xhat = np.mean(xs[1,:])
    var = np.sum((xs[1,:]-xhat)**2)

    k = k_0 + n
    mu = (k_0 * mu_0 + n * xhat) / k
    alpha = alpha_0 + n/2
    beta = beta_0 + 0.5*(var + (k_0 * n) / k * (xhat - mu_0)**2)
    
    variance_sample = scipy.stats.invgamma.rvs(a=alpha, loc=0, scale=beta)
    mean_sample = scipy.stats.norm.rvs(mu,variance_sample / k)

    # store sample to chain
    chain[sample, 0] = mean_sample
    chain[sample, 1] = np.sqrt(variance_sample)

    # replace individual sampler's priors with hierarchical params,
    for i, (log_posterior, sampler) in enumerate(zip(log_posteriors, samplers)):
        log_posterior._log_prior._priors[1].__init__(mean_sample,np.sqrt(variance_sample))

import pints.plot
pints.plot.trace([chain])
pints.plot.trace([k_chains[0]])
plt.show()