# MCMC with Cython

## Performance profiling of Python baseline version

In [7]:
%matplotlib inline

import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from scipy.stats import norm

sns.set_style('white')
sns.set_context('talk')

np.random.seed(123)

In [8]:
def sampler(data, samples=4, mu_init=.5, proposal_width=.5, plot=False, mu_prior_mu=0, mu_prior_sd=1.):
    mu_current = mu_init
    posterior = [mu_current]
    for i in range(samples):
        # suggest new position
        mu_proposal = norm(mu_current, proposal_width).rvs()

        # Compute likelihood by multiplying probabilities of each data point
        likelihood_current = norm(mu_current, 1).pdf(data).prod()
        likelihood_proposal = norm(mu_proposal, 1).pdf(data).prod()
        
        # Compute prior probability of current and proposed mu        
        prior_current = norm(mu_prior_mu, mu_prior_sd).pdf(mu_current)
        prior_proposal = norm(mu_prior_mu, mu_prior_sd).pdf(mu_proposal)
        
        p_current = likelihood_current * prior_current
        p_proposal = likelihood_proposal * prior_proposal
        
        # Accept proposal?
        p_accept = p_proposal / p_current
        
        # Usually would include prior probability, which we neglect here for simplicity
        accept = np.random.rand() < p_accept
        
        if accept:
            # Update position
            mu_current = mu_proposal
        
        posterior.append(mu_current)
        
    return posterior

In [9]:
np.random.seed(123)
data = np.random.randn(20)

In [24]:
%%time
np.random.seed(123)
posterior = sampler(data, samples=1500, mu_init=1.0)
posterior

CPU times: user 6.24 s, sys: 0 ns, total: 6.24 s
Wall time: 6.23 s


In [17]:
def log_sampler(data, samples=4, mu_init=.5, proposal_width=.5, plot=False, mu_prior_mu=0, mu_prior_sd=1.):
    mu_current = mu_init
    posterior = [mu_current]
    for i in range(samples):
        # suggest new position
        mu_proposal = norm(mu_current, proposal_width).rvs()

        # Compute likelihood by adding log probabilities of each data point
        log_likelihood_current = norm(mu_current, 1).logpdf(data).sum()
        log_likelihood_proposal = norm(mu_proposal, 1).logpdf(data).sum()
        
        # Compute prior log probability of current and proposed mu        
        log_prior_current = norm(mu_prior_mu, mu_prior_sd).logpdf(mu_current)
        log_prior_proposal = norm(mu_prior_mu, mu_prior_sd).logpdf(mu_proposal)
        
        log_p_current = log_likelihood_current + log_prior_current
        log_p_proposal = log_likelihood_proposal + log_prior_proposal
        
        # Accept proposal?
        log_p_accept = log_p_proposal - log_p_current
        
        # Usually would include prior probability, which we neglect here for simplicity
        accept = np.random.rand() < np.exp(log_p_accept)
        
        if accept:
            # Update position
            mu_current = mu_proposal
        
        posterior.append(mu_current)
        
    return posterior

In [25]:
%%time
np.random.seed(123)
log_posterior = log_sampler(data, samples=1500, mu_init=1.0)

CPU times: user 6.1 s, sys: 10 ms, total: 6.11 s
Wall time: 6.1 s


In [28]:
np.allclose(posterior, log_posterior)

0.0

In [29]:
norm.logpdf??

In [33]:
from numpy import pi
def norm_logpdf(mu, sigma, x):
    n = x.shape[0]
    return - n / 2.0 * (np.log(2 * pi) + 2.0 * np.log(sigma)) - (0.5 / sigma**2) * np.sum((x - mu)**2)

In [38]:
norm_logpdf(1.0, 1.0, np.linspace(-1, 3, 1000)), norm(1.0, 1.0).logpdf(np.linspace(-1, 3, 1000)).sum()

(-1586.9398678726741, -1586.9398678726739)

In [39]:
%%timeit x=np.linspace(-1, 3, 1000)
norm_logpdf(1.0, 1.0, x)

The slowest run took 27.49 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 11.2 µs per loop


In [41]:
%%timeit x=np.linspace(-1, 3, 1000)
norm(1.0, 1.0).logpdf(x).sum()

1000 loops, best of 3: 856 µs per loop


In [52]:
def log_sampler_v2(data, samples=4, mu_init=.5, proposal_width=.5, plot=False, mu_prior_mu=0, mu_prior_sd=1.):
    mu_current = mu_init
    posterior = [mu_current]
    for i in range(samples):
        # suggest new position
        mu_proposal = np.random.normal(mu_current, proposal_width)

        # Compute likelihood by adding log probabilities of each data point
        log_likelihood_current = norm_logpdf(mu_current, 1, data)
        log_likelihood_proposal = norm_logpdf(mu_proposal, 1, data)
        
        # Compute prior log probability of current and proposed mu
        log_prior_current = norm_logpdf(mu_prior_mu, mu_prior_sd, np.array([mu_current]))
        log_prior_proposal = norm_logpdf(mu_prior_mu, mu_prior_sd, np.array([mu_proposal]))
        
        log_p_current = log_likelihood_current + log_prior_current
        log_p_proposal = log_likelihood_proposal + log_prior_proposal
        
        # Accept proposal?
        log_p_accept = log_p_proposal - log_p_current
        
        # Usually would include prior probability, which we neglect here for simplicity
        accept = np.random.rand() < np.exp(log_p_accept)
        
        if accept:
            # Update position
            mu_current = mu_proposal
        
        posterior.append(mu_current)
        
    return posterior

In [53]:
np.random.seed(123)
posterior_v2 = log_sampler_v2(data, samples=1500, mu_init=1.0)
np.random.seed(123)
posterior = log_sampler(data, samples=1500, mu_init=1.0)
np.allclose(posterior_v2, posterior)

True

In [54]:
%%time
np.random.seed(123)
log_posterior = log_sampler_v2(data, samples=1500, mu_init=1.0)

CPU times: user 90 ms, sys: 30 ms, total: 120 ms
Wall time: 102 ms


In [55]:
%%prun
np.random.seed(123)
log_posterior = log_sampler_v2(data, samples=1500, mu_init=1.0)

 

In [57]:
%%prun
log_sampler_v2(data, samples=15000, mu_init=1.0)

 

In [59]:
%load_ext Cython

In [69]:
%%cython
# cython: profiler=True

import numpy as np
cimport numpy as cnp
from numpy import pi

cdef double norm_logpdf(double mu, double sigma, cnp.ndarray[double] x):
    n = x.shape[0]
    return - n / 2.0 * (np.log(2 * pi) + 2.0 * np.log(sigma)) - (0.5 / sigma**2) * np.sum((x - mu)**2)


def log_sampler_cy(data, samples=4, mu_init=.5, proposal_width=.5, plot=False, mu_prior_mu=0, mu_prior_sd=1.):
    mu_current = mu_init
    posterior = [mu_current]
    for i in range(samples):
        # suggest new position
        mu_proposal = np.random.normal(mu_current, proposal_width)

        # Compute likelihood by adding log probabilities of each data point
        log_likelihood_current = norm_logpdf(mu_current, 1, data)
        log_likelihood_proposal = norm_logpdf(mu_proposal, 1, data)
        
        # Compute prior log probability of current and proposed mu
        log_prior_current = norm_logpdf(mu_prior_mu, mu_prior_sd, np.array([mu_current]))
        log_prior_proposal = norm_logpdf(mu_prior_mu, mu_prior_sd, np.array([mu_proposal]))
        
        log_p_current = log_likelihood_current + log_prior_current
        log_p_proposal = log_likelihood_proposal + log_prior_proposal
        
        # Accept proposal?
        log_p_accept = log_p_proposal - log_p_current
        
        # Usually would include prior probability, which we neglect here for simplicity
        accept = np.random.rand() < np.exp(log_p_accept)
        
        if accept:
            # Update position
            mu_current = mu_proposal
        
        posterior.append(mu_current)
        
    return posterior

In [70]:
np.random.seed(123)
posterior_v2 = log_sampler_v2(data, samples=15000, mu_init=1.0)
np.random.seed(123)
posterior_cy = log_sampler_cy(data, samples=15000, mu_init=1.0)
np.allclose(posterior_v2, posterior_cy)

True

In [73]:
%%prun
np.random.seed(123)
posterior_cy = log_sampler_cy(data, samples=15000, mu_init=1.0)

 

In [74]:
%%prun
np.random.seed(123)
posterior_cy = log_sampler_v2(data, samples=15000, mu_init=1.0)

 

In [75]:
%%cython
# cython: profiler=True

from libc.math cimport log as clog, pi as cpi
import numpy as np
cimport numpy as cnp
from numpy import pi

cdef double norm_logpdf(double mu, double sigma, cnp.ndarray[double] x):
    cdef double s = 0.0
    cdef int n = x.shape[0]
    cdef int i
    for i in range(n):
        s += (x[i] - mu)**2
    return - n / 2.0 * (clog(2 * cpi) + 2.0 * clog(sigma)) - (0.5 / sigma / sigma) * s


def log_sampler_cy_v2(data, samples=4, mu_init=.5, proposal_width=.5, plot=False, mu_prior_mu=0, mu_prior_sd=1.):
    mu_current = mu_init
    posterior = [mu_current]
    for i in range(samples):
        # suggest new position
        mu_proposal = np.random.normal(mu_current, proposal_width)

        # Compute likelihood by adding log probabilities of each data point
        log_likelihood_current = norm_logpdf(mu_current, 1, data)
        log_likelihood_proposal = norm_logpdf(mu_proposal, 1, data)
        
        # Compute prior log probability of current and proposed mu
        log_prior_current = norm_logpdf(mu_prior_mu, mu_prior_sd, np.array([mu_current]))
        log_prior_proposal = norm_logpdf(mu_prior_mu, mu_prior_sd, np.array([mu_proposal]))
        
        log_p_current = log_likelihood_current + log_prior_current
        log_p_proposal = log_likelihood_proposal + log_prior_proposal
        
        # Accept proposal?
        log_p_accept = log_p_proposal - log_p_current
        
        # Usually would include prior probability, which we neglect here for simplicity
        accept = np.random.rand() < np.exp(log_p_accept)
        
        if accept:
            # Update position
            mu_current = mu_proposal
        
        posterior.append(mu_current)
        
    return posterior

In [76]:
np.random.seed(123)
posterior_v2 = log_sampler_v2(data, samples=15000, mu_init=1.0)
np.random.seed(123)
posterior_cy = log_sampler_cy_v2(data, samples=15000, mu_init=1.0)
np.allclose(posterior_v2, posterior_cy)

True

In [91]:
%%timeit np.random.seed(123)
posterior_cy = log_sampler_cy_v2(data, samples=15000, mu_init=1.0)

10 loops, best of 3: 124 ms per loop


In [90]:
%%timeit np.random.seed(123)
posterior_cy = log_sampler_cy(data, samples=15000, mu_init=1.0)

1 loop, best of 3: 748 ms per loop


In [141]:
%%cython -a
# cython: profile=True

DEF LOG_2_PI = 1.8378770664093453

cimport cython
from libc.math cimport log as clog, pi as cpi, exp as cexp
import numpy as np
cimport numpy as cnp
from numpy import pi

@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
cdef double norm_logpdf(const double mu, const double sigma, const double* const x, const int n):
    cdef double s = 0.0
    cdef double ONE_OVER_SIG = 1.0 / sigma
    cdef int i
    for i in range(n):
        s += (x[i] - mu) * (x[i] - mu)
    return - 0.5 * n * LOG_2_PI - n * clog(sigma) - 0.5 * ONE_OVER_SIG * ONE_OVER_SIG * s

cdef double sample_norm(double mu, double sigma):
    return np.random.normal(mu, sigma)

cdef bint accept_p(double log_p_accept):
    return np.random.rand() < cexp(log_p_accept)

@cython.boundscheck(False)
@cython.wraparound(False)
def log_sampler_cy_v3(cnp.ndarray[double] data,
                      int samples,
                      double mu_init=.5,
                      double proposal_width=.5,
                      double mu_prior_mu=0,
                      double mu_prior_sd=1.):
    
    cdef:
        double mu_proposal, log_likelihood_current, log_likelihood_proposal
        double log_prior_current, log_prior_proposal
        double log_p_current, log_p_proposal
        double log_p_accept
        bint accept
        double mu_current = mu_init
        list posterior = [mu_current]
        int i
        double *cdata = &data[0]
        int ndata = data.shape[0]
        cnp.ndarray[double] np_buf = np.empty((1,), dtype='f8')
        double *buf1 = &np_buf[0]
        
        
    for i in range(samples):
        # suggest new position
        mu_proposal = sample_norm(mu_current, proposal_width)

        # Compute likelihood by adding log probabilities of each data point
        log_likelihood_current = norm_logpdf(mu_current, 1, cdata, ndata)
        log_likelihood_proposal = norm_logpdf(mu_proposal, 1, cdata, ndata)
        
        # Compute prior log probability of current and proposed mu
        buf1[0] = mu_current
        log_prior_current = norm_logpdf(mu_prior_mu, mu_prior_sd, buf1, 1)
        buf1[0] = mu_proposal
        log_prior_proposal = norm_logpdf(mu_prior_mu, mu_prior_sd, buf1, 1)
        
        log_p_current = log_likelihood_current + log_prior_current
        log_p_proposal = log_likelihood_proposal + log_prior_proposal
        
        # Accept proposal?
        log_p_accept = log_p_proposal - log_p_current
        
        if accept_p(log_p_accept):
            # Update position
            mu_current = mu_proposal
        
        posterior.append(mu_current)
        
    return posterior

In [143]:
%%prun
np.random.seed(123)
posterior_cy = log_sampler_cy_v3(data, samples=15000, mu_init=1.0)

 

In [144]:
%%timeit np.random.seed(123)
posterior_cy = log_sampler_cy_v3(data, samples=15000, mu_init=1.0)

10 loops, best of 3: 39.1 ms per loop


In [145]:
np.random.seed(123)
posterior_v2 = log_sampler_v2(data, samples=15000, mu_init=1.0)
np.random.seed(123)
posterior_v3 = log_sampler_cy_v3(data, samples=15000, mu_init=1.0)
np.allclose(posterior_v2, posterior_v3)

True