## <font color=darkcyan> Markov chain and Markov chain Monte Carlo methods</font>
$
\newcommand{\PP}{\mathbb P}
\newcommand{\PE}{\mathbb E}
\newcommand{\Xset}{\mathsf{X}}
\newcommand{\nset}{\mathbb{N}}
\newcommand{\invcdf}[1]{F_{#1}^{\leftarrow}}
\newcommand{\rmd}{\mathrm{d}}
\newcommand{\rme}{\mathrm{e}}
$

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import numpy.random as npr
import scipy.stats as stats
from scipy.stats import expon, geom, norm
from math import pi

import seaborn as sns
import autograd.numpy as np
import pandas as pd
import scipy.stats as st
# package which differentiates standard Python and Numpy code
from autograd import grad
# to get progress bars
from tqdm import tqdm

#### <font color=darkorange> The invariant measure of a Markov chain </font>



#### Question 1



Consider a Gaussian AR($1$) process, $X_t= \mu + \phi X_{t-1} + \sigma Z_t$, where $(Z_t)_{t \in \nset}$ is an iid sequence of standard Gaussian random variables, independent of $X_0$. Assume that $|\phi| < 1$. Show that the Gaussian distribution with mean $\mu/(1-\phi)$ and variance $\sigma^2/(1-\phi^2)$ is a stationary distribution of the Markov chain.



Write $P(x,y) \propto \exp(-(y-\mu-\phi x)^2/(2\sigma^2)) $ the density of the AR(1) kernel. Then, we can compute $\pi P$ where $\pi$ is the Gaussian probability density function with mean $\tilde\mu$ and variance $\tilde\sigma^2$. For all $y$,
$$
\pi P(y) \propto \int  \exp(-(x-\tilde \mu)^2/(2\tilde\sigma^2))\exp(-(y-\mu-\phi x)^2/(2\sigma^2))\mathrm{d} x\,.
$$
This integral can be computed explicitly and $\pi P$ is a Gaussian probability density function. It is then enough to solve the equation $\pi P = \pi$.

#### Question 2

Illustrate this property with an histogram of the values taken by a single trajectory of the Markov chain. 

In [None]:
p,mu,phi,sig=10000,1,0.9,1
mc=npr.rand(1)*np.ones(p)

f=lambda x,m,sq: np.exp(-(x-m)**2/(2*sq))/np.sqrt(2*pi*sq)
mc[0]=0

for i in range(p-1):
    mc[i+1]=mu+phi*mc[i]+sig*npr.randn()

x=np.linspace(min(mc),max(mc),30)
plt.hist(mc,bins=80,density=True,edgecolor="black")
plt.plot(x,f(x,mu/(1-phi),sig**2/(1-phi**2)),color="red")
plt.title("Histogram of a trajectory of the MC. n="+str(p))
plt.show()

#### <font color=darkorange> Symmetric Random Walk Metropolis Hasting algorithm </font>

We now consider a target distribution which is the mixture of two Gaussian distributions, one centered at $a$ and the other one centered at $-a$ 
$$
\pi(x)=\frac{1}{2}\left(\phi(x-a)+\phi(x+a)\right)=\frac{1}{2} \frac{\rme^{-(x-a)^2}}{\sqrt{2\pi}}+\frac{1}{2} \frac{\rme^{-(x+a)^2}}{\sqrt{2\pi}}
$$
where $\phi$ is the density of the centered standard normal distribution. 

To target this distribution, we sample according to a Symmetric Random Walk Metropolis Hasting algorithm. When the chain is at the state $X_k$, we propose a candidate $Y_{k+1}$ according to $Y_{k+1}=X_k+ \sigma Z_k$ where $Z_k\sim {\mathcal N}(0,1)$ and then we accept $X_{k+1}=Y_{k+1}$ with probability $\alpha(X_k,Y_{k+1})$, where $\alpha(x,y)=\frac{\pi(y)}{\pi(x)} \wedge 1$. Otherwise, $X_{k+1}=X_{k}$. 

#### Question 3

- Write the Symmetric Random Walk MH loop with target distribution $\pi$.
- Display the trajectory of the Markov chain.
- Display the histogram of the Markov chain as long as the target density.

In [None]:
# Functions to define the target distribution in particular for mixtures of Gaussian distributions.
# Can be replaced by a simpler target distributions as proposed.

def multi_gauss(mu, sigma):
    """
    Inputs
    ----------
    mu: mean of the Gaussian distribution
    sigma: covariance matrix of the Gaussian distribution
    
    Outputs
    -------
    logp: opposite of the loglikelihood
    """

    def logp(x):
        k   = mu.shape[0]
        cst       = k * np.log(2 * np.pi)
        det       = np.log(np.linalg.det(sigma))
        quad_term = np.dot(np.dot((x - mu).T, np.linalg.inv(sigma)), x - mu)
        return (cst +  det + quad_term) * 0.5
    
    return logp

def mixture(log_prob, weights):
    """
    Inputs
    ----------
    log_prob: opposite of the likelihood of each term
    weights: weights of the components of the mixture
    
    Outputs
    -------
    logp: opposite of the loglikelihood of the mixture
    """
    
    def logp(x):
        likelihood = 0
        for j in range(np.size(weights)):
            log_marginal = -log_prob[j](x)
            likelihood   = likelihood + weights[j]*np.exp(log_marginal)
        
        return -np.log(likelihood)

    return logp

In [None]:
grid_lim = 6
# grid on which the target pdf is displayed
grid_plot = (-grid_lim, grid_lim, -grid_lim, grid_lim)
# coordinates chosen on this grid
nb_points = 100

xplot = np.linspace(-grid_lim, grid_lim, nb_points)
yplot = np.linspace(-grid_lim, grid_lim, nb_points)
Xplot, Yplot = np.meshgrid(xplot, yplot)

We write a solution for any target density when we know the opposite of the logdensity to sample from

In [None]:
def MH_monte_carlo(n_samples, log_prob, initial_state, step_size = 0.1):
    """
    Inputs
    ----------
    n_samples: number of samples to return
    log_prob: opposite of the loglikelihood to sample from
    initial_state: initial sample
    step_size: standard deviation of the proposed moves
    
    Outputs
    -------
    samples: samples from the MCMC algorithm
    accepted: array of 0 and 1 to display which proposed moves have been accepted
    """
    initial_state = np.array(initial_state)
    
    samples  = [initial_state]
    accepted = []

    size = (n_samples,) + initial_state.shape[:1]
    
    # random variable to sample proposed moves
    epsilon = st.norm(0, 1).rvs(size)
    
    for noise in tqdm(epsilon):
        
        q_new = samples[-1] + step_size*noise
       
        # acceptance rate
        old_log_p = log_prob(samples[-1]) 
        new_log_p = log_prob(q_new) 
        
        if np.log(np.random.rand()) < old_log_p - new_log_p:
            samples.append(q_new)
            accepted.append(True)
        else:
            samples.append(np.copy(samples[-1]))
            accepted.append(False)

    return (np.array(samples[1:]),np.array(accepted),)

In [None]:
mu1 = 2*np.ones(2)
cov1 = np.array([[1., 0.5],
                [0.5, 1.]])
mu2 = -mu1
cov2 = np.array([[1., -0.1],
                [-0.1, 1.]])

mu3 = np.array([-1.5, 2.2])
cov3 = 0.8 * np.eye(2)

log_p = mixture([multi_gauss(mu1, cov1), multi_gauss(mu2, cov2), multi_gauss(mu3, cov3)],[0.3,0.3,0.4])

In [None]:
n_samples = 20000
step_size = 0.2
samples_MH, accepted_MH = MH_monte_carlo(n_samples, log_p, np.random.randn(2), step_size)

In [None]:
fig = plt.figure(figsize=(8,8))

Zplot = np.copy(Xplot)
for i in range(nb_points):
    for j in range(nb_points):
        Zplot[i][j] = np.exp(-log_p(np.array((Xplot[i][j], Yplot[i][j]))))

plt.imshow(Zplot, alpha = 0.9, extent = grid_plot, cmap='Blues', origin='upper')
plt.plot(samples_MH[:,0], samples_MH[:,1], '.', color='orange', alpha = 0.1, label = 'RW Metropolis-Hastings samples')
plt.legend();

In [None]:
fig = plt.figure(figsize=(6,6))
plt.plot(samples_MH[:,0], alpha = 0.5, label = 'RW Metropolis-Hastings trajectory, 1st coordinate')
plt.plot(samples_MH[:,1], alpha = 0.5, label = 'RW Metropolis-Hastings trajectory, 2nd coordinate')
plt.legend();

#### <font color=darkorange> Independent Metropolis Hasting algorithm </font>

We again consider a target distribution which is a mixture of two Gaussian distributions, one centered at $a$ and the other one centered at $-a$ 
$$
\pi(x)=\frac{1}{2}\left(\phi(x-a)+\phi(x+a)\right)=\frac{1}{2} \frac{\rme^{-(x-a)^2}}{\sqrt{2\pi}}+\frac{1}{2} \frac{\rme^{-(x+a)^2}}{\sqrt{2\pi}},
$$
where $\phi$ is the density of the centered standard normal distribution. 

To target this distribution, we sample according to a Metropolis Hasting algorithm with independent proposal. When the chain is at the state $X_k$, we propose a candidate $Y_{k+1}$ according to $Y_{k+1}=Z_k$ where $Z_k\sim {\mathcal N}(\theta,\sigma^2)$ and then we accept $X_{k+1}=Y_{k+1}$ with probability $\alpha(X_k,Y_{k+1})$, where $\alpha(x,y)=\frac{\pi(y)q(x)}{\pi(x)q(y)} \wedge 1=\frac{\pi(y)/q(y)}{\pi(x)/q(x)} \wedge 1$ and $q$ is the density of ${\mathcal N}(\theta,\sigma^2)$. Otherwise, $X_{k+1}=X_{k}$. 

#### Question 4

- Write the independent MH loop with target distribution $\pi$.
- Display the trajectory of the Markov chain.
- Display the histogram of the Markov chain as long as the target density.

In [None]:
def MH_independent(n_samples, log_prob, initial_state, step_size = 0.1):
    """
    Inputs
    ----------
    n_samples: number of samples to return
    log_prob: opposite of the loglikelihood to sample from
    initial_state: initial sample
    step_size: standard deviation of the proposed moves
    
    Outputs
    -------
    samples: samples from the MCMC algorithm
    accepted: array of 0 and 1 to display which proposed moves have been accepted
    """
    initial_state = np.array(initial_state)
    
    samples  = [initial_state]
    accepted = []

    size = (n_samples,) + initial_state.shape[:1]
    
    # random variable to sample proposed moves
    epsilon = st.norm(0, 1).rvs(size)
    
    for noise in tqdm(epsilon):
        
        q_new = step_size*noise
       
        # acceptance rate
        old_log_p = log_prob(samples[-1]) 
        new_log_p = log_prob(q_new) 
        
        if np.log(np.random.rand()) < (old_log_p - new_log_p) + 0.5*(np.dot(samples[-1],samples[-1]) - np.dot(q_new,q_new))/step_size**2:
            samples.append(q_new)
            accepted.append(True)
        else:
            samples.append(np.copy(samples[-1]))
            accepted.append(False)

    return (np.array(samples[1:]),np.array(accepted),)

In [None]:
n_samples = 20000
step_size = 1
samples_ind, accepted_ind = MH_independent(n_samples, log_p, np.random.randn(2), step_size)

In [None]:
fig = plt.figure(figsize=(8,8))

Zplot = np.copy(Xplot)
for i in range(nb_points):
    for j in range(nb_points):
        Zplot[i][j] = np.exp(-log_p(np.array((Xplot[i][j], Yplot[i][j]))))

plt.imshow(Zplot, alpha = 0.9, extent = grid_plot, cmap='Blues', origin='upper')
plt.plot(samples_ind[:,0], samples_ind[:,1], '.', color='orange', alpha = 0.1, label = 'RW Metropolis-Hastings samples')
plt.legend();

In [None]:
fig = plt.figure(figsize=(6,6))
plt.plot(samples_ind[:,0], alpha = 0.5, label = 'RW Metropolis-Hastings trajectory, 1st coordinate')
plt.plot(samples_ind[:,1], alpha = 0.5, label = 'RW Metropolis-Hastings trajectory, 2nd coordinate')
plt.legend();

#### <font color=darkorange> Metropolis Adjusted Langevin algorithm (MALA) </font>


``Objective target density:`` $\pi$.

At each iteration $k\geqslant 0$, generate $Z_{k+1} \sim X_k + \frac{\sigma}{2}\nabla\log\pi(X_k) + \sigma \varepsilon_{k+1}$.

Set $X_{k+1} = Z_{k+1}$ with probability $\alpha(X_k,Z_{k+1})$ and  $X_{k+1} = X_k$ with probability $1-\alpha(X_k,Z_{k+1})$, where 

$$
\alpha(x,y) = 1\wedge\frac{\pi(y)}{\pi(x)}\frac{q(y,x)}{q(x,y)}\,,
$$

where $q(x,y)$ is the Gaussian pdf with mean $x + \frac{\sigma}{2}\nabla\log\pi(x)$ and variance $\sigma^2 I_d$.

#### Question 5

- Write the Metropolis Adjusted Langevin algorithm loop with target distribution $\pi$.
- Display the trajectory of the Markov chain.
- Display the histogram of the Markov chain as long as the target density.

In [None]:
def MALA_monte_carlo(n_samples, log_prob, initial_state, step_size = 0.1):
    """
    Inputs
    ----------
    n_samples: number of samples to return
    log_prob: opposite of the loglikelihood to sample from
    initial_state: initial sample
    step_size: standard deviation of the proposed moves
    
    Outputs
    -------
    samples: samples from the MCMC algorithm
    accepted: array of 0 and 1 to display which proposed moves have been accepted
    """
    initial_state = np.array(initial_state)

    gradV = grad(log_prob)

    samples  = [initial_state]
    accepted = []

    size = (n_samples,) + initial_state.shape[:1]
    
    # random variable to sample proposed moves
    epsilon = st.norm(0, 1).rvs(size)
    
    for noise in tqdm(epsilon):
        
        grad_new = gradV(samples[-1])
        mean_new = samples[-1] - 0.5*step_size*grad_new
        
        q_new    = mean_new + step_size*noise
       
        grad_y   = gradV(q_new)
        mean_y   = q_new - 0.5*step_size*grad_y
        
        # acceptance rate
        old_log_p = log_prob(samples[-1]) + 0.5*np.dot(q_new-mean_new,q_new-mean_new)/(step_size**2)
        new_log_p = log_prob(q_new) + 0.5*np.dot(samples[-1]-mean_y,samples[-1]-mean_y)/(step_size**2)
        
        if np.log(np.random.rand()) < old_log_p - new_log_p:
            samples.append(q_new)
            accepted.append(True)
        else:
            samples.append(np.copy(samples[-1]))
            accepted.append(False)

    return (np.array(samples[1:]),np.array(accepted),)

In [None]:
mu1 = 2*np.ones(2)
cov1 = np.array([[1., 0.5],
                [0.5, 1.]])
mu2 = -mu1
cov2 = np.array([[1., -0.1],
                [-0.1, 1.]])

mu3 = np.array([-1.5, 2.2])
cov3 = 0.8 * np.eye(2)

log_p = mixture([multi_gauss(mu1, cov1), multi_gauss(mu2, cov2), multi_gauss(mu3, cov3)], [0.3, 0.3, 0.4])

In [None]:
n_samples = 20000
step_size = 0.2
samples_Mala, accepted_Mala = MALA_monte_carlo(n_samples, log_p, np.random.randn(2), step_size)

In [None]:
fig = plt.figure(figsize=(8,8))

plt.imshow(Zplot, alpha = 0.9, extent = grid_plot, cmap='Blues', origin='upper')
plt.plot(samples_Mala[:,0], samples_Mala[:,1], '.', color='orange', alpha = 0.1, label = 'MALA samples')
plt.legend();

In [None]:
fig = plt.figure(figsize=(6,6))
plt.plot(samples_Mala[:,0], alpha = 0.5, label = 'MALA trajectory, 1st coordinate')
plt.plot(samples_Mala[:,1], alpha = 0.5, label = 'MALA trajectory, 2nd coordinate')
plt.legend();

#### Question 6
Compare all MH versions when the target $\pi$ is a mixture of Gaussian distributions.