In [1]:
%matplotlib inline

import os
import sys
import emcee
import scipy
import pandas as pd

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from ipywidgets import interact
from functools import partial

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from tutorial_utils import *

sns.set_style('darkgrid')

%load_ext line_profiler

# Bayesian Inference

In [None]:
import emcee
ndim, nwalkers = 3, 200
ivar = 1. / np.random.rand(ndim)
p0 = np.random.uniform(size=(nwalkers, ndim))

def log_prob(params):
    sigma, ell, meas_noise = np.exp(params)
    cov_func = partial(squared_exponential, ell=ell, sigma=sigma)
    return log_likelihood(cov_func, X, y, meas_noise=meas_noise)

sampler = emcee.EnsembleSampler(nwalkers, ndim, log_prob)
sampler_output = sampler.run_mcmc(p0, 2000)

In [None]:
posterior_params = np.exp(sampler_output[0])
df = pd.DataFrame(posterior_params, columns=['sigma', 'ell', 'noise'])
sns.pairplot(df)

In [None]:
def draw_bayesian_sample():
    random_int = np.random.randint(0, posterior_params.shape[0])
    random_params = posterior_params[random_int]
    cov_func = partial(squared_exponential, ell=random_params[1], sigma=random_params[0])
    pred_mean, pred_cov = fit_and_predict(cov_func, X, y, x_gridded, meas_noise=random_params[2])
    return sample_from(pred_mean, pred_cov, size=1)[:, 0]

samples = np.array([draw_bayesian_sample() for i in range(1000)])

bayesian_mean = np.mean(samples, axis=0)
bayesian_sd = np.sqrt(np.var(samples, axis=0))

#for sample in samples:
#    plt.plot(x_gridded, sample, color="steelblue", alpha=0.5)
        

In [None]:
plot_truth()
plot_measurements()

plt.plot(x_gridded, bayesian_mean, lw=5, color='forestgreen', label="prediction")
plt.fill_between(x_gridded, bayesian_mean + bayesian_sd,
                 bayesian_mean - bayesian_sd,
                 color='forestgreen', label="prediction", alpha=0.3)

cov_func = partial(squared_exponential, ell=mle_ell, sigma=mle_sigma)
mle_mean, mle_cov = fit_and_predict(cov_func, X, y, x_gridded, meas_noise=mle_meas_noise)
mle_sd = np.sqrt(np.diag(mle_cov))
plt.plot(x_gridded, mle_mean, lw=5, color='steelblue', label="prediction")
plt.fill_between(x_gridded, mle_mean + mle_sd,
                 mle_mean - mle_sd,
                 alpha=0.3,
                 color='steelblue', label="prediction")

plt.ylim([-1, 5])

In [None]:
plot_fit_and_predict(mle_ell, mle_sigma, mle_meas_noise)

In [None]:
import timeit

def generate_timings():
    for n in np.linspace(np.log10(1000), np.log10(8000), 11):
        n = round(n)
        xs = np.random.normal(size=n)
        ys = xs + np.random.normal(size=n)
        start_time = timeit.default_timer()
        count = 0
        while timeit.default_timer() - start_time < 10.:
            count = count + 1
            fit_and_predict(cov_func, xs, ys, np.array([4]), meas_noise=0.1)
        yield n, (timeit.default_timer() - start_time) / count

#sizes, times = zip(*generate_timings())
plt.plot(sizes, times, marker=".")
plt.xlabel("Number of Measurements")
plt.ylabel("Fit / Predict time [seconds]")