Permalink
Cannot retrieve contributors at this time
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
154 lines (109 sloc)
4.48 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
""" Compare the speed of mine and PyMC's computation of Gelman et. al's effective sample size. """ | |
import matplotlib.pyplot as plt | |
plt.style.use('seaborn') | |
import numpy as np | |
from scipy.stats import norm | |
from IPython import get_ipython | |
ipython = get_ipython() | |
def my_ESS(x): | |
""" Compute the effective sample size of estimand of interest. Vectorised implementation. """ | |
m_chains, n_iters = x.shape | |
variogram = lambda t: ((x[:, t:] - x[:, :(n_iters - t)])**2).sum() / (m_chains * (n_iters - t)) | |
post_var = my_gelman_rubin(x) | |
t = 1 | |
rho = np.ones(n_iters) | |
negative_autocorr = False | |
# Iterate until the sum of consecutive estimates of autocorrelation is negative | |
while not negative_autocorr and (t < n_iters): | |
rho[t] = 1 - variogram(t) / (2 * post_var) | |
if not t % 2: | |
negative_autocorr = sum(rho[t-1:t+1]) < 0 | |
t += 1 | |
return int(m_chains*n_iters / (1 + 2*rho[1:t].sum())) | |
def my_gelman_rubin(x): | |
""" Estimate the marginal posterior variance. Vectorised implementation. """ | |
m_chains, n_iters = x.shape | |
# Calculate between-chain variance | |
B_over_n = ((np.mean(x, axis=1) - np.mean(x))**2).sum() / (m_chains - 1) | |
# Calculate within-chain variances | |
W = ((x - x.mean(axis=1, keepdims=True))**2).sum() / (m_chains*(n_iters - 1)) | |
# (over) estimate of variance | |
s2 = W * (n_iters - 1) / n_iters + B_over_n | |
return s2 | |
def ESS(x): | |
""" Compute the effective sample size of estimand of interest. PyMC's implementation. """ | |
m_chains, n_iters = x.shape | |
variogram = lambda t: (sum(sum((x[j][i] - x[j][i-t])**2 for i in range(t, n_iters)) for j in | |
range(m_chains)) / (m_chains * (n_iters - t))) | |
post_var = gelman_rubin(x) | |
t = 1 | |
rho = np.ones(n_iters) | |
negative_autocorr = False | |
# Iterate until the sum of consecutive estimates of autocorrelation is negative | |
while not negative_autocorr and (t < n_iters): | |
rho[t] = 1 - variogram(t) / (2 * post_var) | |
if not t % 2: | |
negative_autocorr = sum(rho[t-1:t+1]) < 0 | |
t += 1 | |
return int(m_chains * n_iters / (1 + 2*rho[1:t].sum())) | |
def gelman_rubin(x): | |
""" Estimate the marginal posterior variance. PyMC's implementation. """ | |
m_chains, n = x.shape | |
# Calculate between-chain variance | |
B_over_n = ((np.mean(x, axis=1) - np.mean(x))**2).sum() / (m_chains - 1) | |
# Calculate within-chain variances | |
W = np.sum([(x[i] - xbar) ** 2 for i, xbar in enumerate(np.mean(x, 1))]) / (m_chains * (n - 1)) | |
# (over) estimate of variance | |
s2 = W * (n - 1) / n + B_over_n | |
return s2 | |
if __name__ == "__main__": | |
# Observed data | |
data = np.zeros(2) | |
# Number of iterations and number of runs to make | |
iters = 10000 | |
runs = 4 | |
# Initial values | |
mu_cur = np.array([[2.5, 2.5], [2.5, -2.5], [-2.5, 2.5], [-2.5, -2.5]]) | |
# Array to store output in | |
output = np.zeros([runs, 2, iters]) | |
output[:, :, 0] = mu_cur | |
# Innovation size | |
rw_cov = np.eye(2) | |
for j in range(runs): | |
ll_cur = norm.logpdf(data, mu_cur[j], 1).sum() | |
accept = 0 | |
for i in range(1, iters): | |
# Propose new values | |
mu_prop = np.random.multivariate_normal(mu_cur[j], rw_cov) | |
# Compute log-likelihood of proposed values | |
ll_prop = norm.logpdf(data, mu_prop, 1).sum() | |
# Accept or reject proposal | |
if ll_prop - ll_cur > np.log(np.random.uniform()): | |
mu_cur[j] = mu_prop.copy() | |
ll_cur = ll_prop | |
accept += 1 | |
# Record current state of chain | |
output[j, :, i] = mu_cur[j] | |
print("Chain {} acceptance rate was: {:.2f}%".format(j, accept / (iters - 1) * 100)) | |
# Plot walk around parameter space | |
fig, ax = plt.subplots(1, 1) | |
for j in range(runs): | |
ax.plot(output[j, 0, :], output[j, 1, :]) | |
ax.set_aspect('equal') | |
ax.set_xlabel(r'$\mu_1$') | |
ax.set_ylabel(r'$\mu_2$') | |
fig.tight_layout() | |
# Plot trajectories of chains | |
fig, ax = plt.subplots(1, 2) | |
for i in range(2): | |
for j in range(runs): | |
ax[i].plot(np.r_[:iters], output[j, i, :]) | |
ax[i].set_xlabel('Iteration') | |
ax[i].set_ylabel(r'$\mu_{}$'.format(i+1)) | |
ax[i].set_xticks((0, iters)) | |
ax[i].set_xticklabels((0, r'$10\,000$')) | |
ax[i].set_xlim((0, iters)) | |
fig.tight_layout() | |
ipython.magic('timeit ESS(output[:, 0, :])') | |
ipython.magic('timeit my_ESS(output[:, 0, :])') |