In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import scipy as sp
import scipy.stats as stats
from scipy.stats import gaussian_kde as gkde
from bokeh.plotting import figure, show, output_notebook, gridplot
from bokeh.models import Span

output_notebook()

In [2]:
def normPDF(x, mu=0.0, sig2=1.0, prec=None):
    if prec:
        sig2 = 1.0 / prec
    return stats.norm.pdf(x, mu, np.sqrt(sig2))

def normRVS(samples=1, mu=0.0, sig2=1.0, prec=None):
    if prec:
        sig2 = 1.0 / prec
    return np.random.normal(mu, np.sqrt(sig2), size=samples)

def gammaRVS(alpha, beta, samples=1):
    return np.random.gamma(alpha, beta, size=samples)

def invGammaRVS(alpha, beta, samples=1):
    return beta * sp.stats.invgamma.rvs(alpha, size=samples)

def invGammaPDF(x, alpha, beta=1.0):
    # According to:
    # http://reference.wolfram.com/language/ref/InverseGammaDistribution.html
    # beta is the scale parameter.
    return sp.stats.invgamma.pdf(x, alpha, scale=beta)

In [3]:
def plotMCMCResults(xs, rx, ys, ry, title="Markov-chain results"):
    t = np.arange(xs.shape[0])
    f = figure(title=title, x_axis_label='t',
               plot_width=400, plot_height=400)
    f.line(t, xs, color="darkgreen", alpha=0.5)
    realX = Span(location=rx, dimension='width', 
                 line_color='firebrick', line_dash='dashed',
                 line_width=2)
    f.add_layout(realX)
    f.line(t, ys, color="firebrick", alpha=0.5)
    realY = Span(location=ry, dimension='width', 
                 line_color='darkgreen', line_dash='dashed',
                 line_width=2)
    f.add_layout(realY)
    show(f)

## Mean and precision of a Gaussian distribution

Find the mean ($\mu$) and precision ($\tau = 1/\sigma^2$) of a univariate Gaussian distribution using different approaches.

In [4]:
N = 23
mu = 3.0
sigma2 = 0.5
prec = 1/sigma2
print("mu = {}, sigma2 = {} and tau = {}".format(mu,sigma2,prec))
X = np.linspace(-3.0, 5.0, N)
PX = [normPDF(x,mu,prec=prec) for x in X]
f = figure(title="Real distribution",
           x_axis_label='x',
           y_axis_label='p(x)',
           plot_width=300, plot_height=300)

f.line(X, PX, line_width=2, color="firebrick")
show(f)

mu = 3.0, sigma2 = 0.5 and tau = 2.0


The following code produces the set $D=\{x_n\}_{n=1}^N$ of samples produced by the distribution.

In [5]:
D = normRVS(samples=N, mu=mu, prec=prec)

- $N = |D|$
- $\hat{\mu}_{ML} = \frac{1}{N}\sum_{i=1}^{N} x_i$
- $\hat{\sigma}^2_{ML} = \frac{1}{N}\sum_{i=1}^{N} (x_i - \bar{x})^2$

In [6]:
# MLE estimators 
sigma2ML = np.var(D)
muML = np.mean(D)

### Gibbs sampling

The mean and the variance of a Gaussian distribution follow a [normal-inverse-gamma](https://en.wikipedia.org/wiki/Normal-inverse-gamma_distribution) distribution: $(\mu,\sigma^2) \sim NGamm^{-1}(m,r,a,b)$

The inverse of the variance (i.e. the precision) follows a Gamma distribution: $\frac{1}{\sigma^2} \sim Gamm(a,b)$. The mean depends on the variance and follows a normal distribution: $(\mu | \sigma^2) \sim \mathcal{N}(m,\sigma^2/r)$.

The two distributions required to implement a Gibbs sampler are:

- $p(\mu | \sigma^2) \sim \mathcal{N}(m,\sigma^2/r)$
- $p(\sigma^2 | \mu) \sim Gamm^{-1}\left(a + \frac{1}{2}, \frac{r}{2}(\mu - m)^2\right)$

- $m_0$: initial guess for the mean.
- $r_0$: certainty about the mean.
- $a_0$:
- $b_0$:

In [7]:
a0 = 0.5
b0 = 1.0
r0 = 1.0
m0 = 1.0

Draws a sample from an inverse gamma distribution given the mean $\mu$:

\begin{equation*}
Gamm^{-1}\left(a + \frac{1}{2}, \frac{r}{2}(\mu - m)^2\right)
\end{equation*}

In [8]:
def Draw_sigma_given_mu(mu):
    alpha = a0 + N / 2.0
    coef = r0 / (r0 + N)
    beta = b0 + (N / 2.0) * (sigma2ML + coef*(muML - mu)**2)
    mu_sample = invGammaRVS(alpha, beta)
    return mu_sample

In [9]:
def Draw_mu_given_sigma(sigma):
    distMean = (r0 * m0 + N * muML) / (r0 + N)
    distSigma = np.sqrt(sigma / (r0 + N))
    mu_sample = sp.stats.norm.rvs(distMean, scale=distSigma)
    return mu_sample

In [10]:
def Gibbs(iter):
    # Row 0 contains the values of mu and row 1 contains the ones of sigma2
    mu = np.zeros(iter)
    sigma = np.zeros(iter)
    
    # Initialization
    mu[0] = m0
    sigma[0] = 1e-7 # this is not actually used.
    
    for i in range(1, iter):
        sigma[i] = Draw_sigma_given_mu(mu[i-1])
        mu[i] = Draw_mu_given_sigma(sigma[i])
    
    return mu, sigma

In [11]:
iterations = 10000
emu, esigma = Gibbs(iterations)
eprec = 1.0 / esigma

In [12]:
plotMCMCResults(emu, mu, esigma, sigma2)

In [13]:
burnIn = 0.5
thin = 10
start = int(np.size(emu) * burnIn)

# Apply burn-in
emu = emu[start:]
esigma = esigma[start:]
# Apply thin
idxs = np.arange(0,emu.size,thin)
emu = emu[idxs]
esigma = esigma[idxs]

In [14]:
plotMCMCResults(emu, mu, esigma, sigma2)

### Conclusions

Gibbs sampling is useful when the conditional probabilities can be deduced from the posterior. In our case that was easy since the distribution of $\mu$ and $\sigma^2$ is well known. However if this distribution does not have a closed form deriving the conditionals might be quite difficult. 

MH does not have this requirement and hence can be used in a broader set of problems. However, having the conditionals provides a more informative sampling and hence the convergence of the Markov chain can happen faster than in MH.

### References

- http://web.as.uky.edu/statistics/users/pbreheny/701/S13/notes/1-24.pdf
- 
http://www.zabaras.com/Publications/PDFiles/THESES/LouisEllam/MSc/MSc_Thesis_LouisEllam.pdf
- http://homepages.math.uic.edu/~rgmartin/Teaching/Stat591/Bayes/Notes/591_gibbs.pdf
- http://engineering.richrelevance.com/bayesian-analysis-of-normal-distributions-with-python/