In [2]:
# dependencies

from itertools import product

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
from scipy.spatial.distance import cdist
from scipy.special import logsumexp

from infemus import infemus, griddy
from infemus.models import lgp_iid
from infemus.tools.metropolis_mv import eval_norm_prec

sns.set_theme()

In [2]:
# config

seed = 2

# algo settings

n_variates = 128
n_interp = 2
n_samples = 64
n_burnin = 64
bds_t1 = (0, 3.5)
bds_t2 = (-2, 2)
l = 16

# DGP settings

n_obs = 32
n_dim = 1
mu = 0
t1 = 1
t2 = 1
phi = .25
sd_out = 1.5

In [3]:
# DGP

def gen_inputs(n_obs, n_dim, mu, t1, t2, phi, sd_out, rng):
    x = rng.standard_normal(size=(n_obs, n_dim))
    mean, cov, eig_cov = gen_suff(x, mu, t1, t2)
    the = rng.multivariate_normal(mean, cov)
    y = rng.normal(the, np.where(np.sum(np.abs(x), 1) > sd_out, 10 * phi, phi))
    xc = np.linspace(-2.5, 2.5, int(1e3))[:, np.newaxis]
    cov1 = t1 / t2 * np.exp(-np.square(cdist(xc, xc)) * t2 / x.shape[1])
    cov12 = t1 / t2 * np.exp(-np.square(cdist(xc, x)) * t2 / x.shape[1])
    meanc = cov12 @ np.linalg.inv(cov) @ the
    covc = cov1 - cov12 @ np.linalg.inv(cov) @ cov12.T
    thec = rng.multivariate_normal(meanc, covc)
    return (x, y, the), (xc, thec), (mean, eig_cov, phi)

def gen_suff(x, mu, t1, t2, nug=1e-6):
    mu = np.repeat(mu, x.shape[0])
    cov = t1 / t2 * np.exp(-np.square(cdist(x, x)) * t2 / x.shape[1])
    return mu, cov + nug * np.identity(len(mu)), np.linalg.eigh(cov + nug * np.identity(len(mu)))
 
def eval_logmargin(y, mean, cov, eig_cov, phi):
    l_mcov = eig_cov[0] + phi ** 2
    return np.sum(eval_norm_prec(y[np.newaxis], mean, eig_cov[1], 1 / l_mcov[np.newaxis]))

In [10]:
# sampling

def eval_logprior(data, param, hyper):
    the = param
    mean, _, eig_cov = hyper
    return np.sum(eval_norm_prec(the[np.newaxis], mean, eig_cov[1], 1 / eig_cov[0][np.newaxis]))

def cond_sampler(data, hyper, rng):
    y, phi = data
    mean, cov, _ = hyper
    return lgp_iid.sample_posterior(y, mean, cov, phi, rng)

def est_emus(lams, lame, y, x, mu, phi, rng, n_samples, n_burnin):
    suffs = [gen_suff(x, mu, *w_) for w_ in lams]
    suffe = [gen_suff(x, mu, *w_) for w_ in lame]
    return infemus.est_mlik(suffs, suffe, (y, phi), cond_sampler, eval_logprior, rng, n_samples, n_burnin)

def est_griddy(lams, lame, y, x, mu, phi, rng, n_samples, n_burnin):
    suffs = [gen_suff(x, mu, *w_) for w_ in lams]
    us = griddy.est_mlik(suffs, (y, phi), cond_sampler, eval_logprior, rng, n_samples, n_burnin)
    ue = griddata(lams, us, lame, method='nearest')
    return ue / np.sum(ue)

In [5]:
# generate input data

rng = np.random.default_rng(seed)
(x, y, _), (xc, thec), _ = gen_inputs(n_obs, n_dim, mu, t1, t2, phi / 2, sd_out, rng)

In [None]:
# draw input data [fig 2]

plt.figure(figsize=(4, 3))
plt.scatter(x, y)
plt.plot(xc.flatten(), thec, color='black')
plt.xlabel('$x$')
plt.ylabel('$y$')

In [7]:
# generate grid

lams_t1, lams_t2 = (np.exp(np.linspace(*bds, l + 1)) for bds in (bds_t1, bds_t2))
lame_t1, lame_t2 = (np.exp(np.linspace(*bds, l * n_interp + 1)) for bds in (bds_t1, bds_t2))

lams = np.array(list(product(lams_t1, lams_t2)))
lame = np.array(list(product(lame_t1, lame_t2)))

t1_rec, t2_rec = np.meshgrid(lame_t1, lame_t2)

In [8]:
# evaluate ground truth

log_u = [eval_logmargin(y, *suffe_, phi) for suffe_ in [gen_suff(x, mu, *w_) for w_ in lame]]
u = np.exp(np.array(log_u) - logsumexp(log_u))
u_rec = np.reshape(u, 2 * (int(np.sqrt(len(u))),)).T

In [None]:
# estimate sampling distribution of respective methods

u_emus = np.array([est_emus(lams, lame, y, x, mu, phi, rng, n_samples, 0) for _ in range(n_variates)])
u_griddy = np.array([est_griddy(lams, lame, y, x, mu, phi, rng, n_samples, n_burnin) for _ in range(n_variates)])

In [None]:
# draw typical variates [fig 3]

u_rec_emus = np.reshape(u_emus[0], 2 * (int(np.sqrt(len(u_emus[0]))),)).T
u_rec_griddy = np.reshape(u_griddy[0], 2 * (int(np.sqrt(len(u_griddy[0]))),)).T

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(6, 3), sharex=True, sharey=True)

ax1.pcolormesh(t1_rec, t2_rec, u_rec_emus, cmap='magma')
ax1.set_xscale('log', base=2)
ax1.set_yscale('log', base=2)
ax1.set_xlabel(r'$\tau_{1}$')
ax1.set_ylabel(r'$\tau_{2}$')

ax2.pcolormesh(t1_rec, t2_rec, u_rec_griddy, cmap='magma')
ax2.set_xscale('log', base=2)
ax2.set_yscale('log', base=2)
ax2.set_xlabel(r'$\tau_{1}$')

In [None]:
# draw profile sampling distribution [fig 4]

fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(5, 3), sharex='col', sharey=True)

ax1.plot(lame_t1, np.max(u_rec, 0), color='black')
u_emus_prof_t1 = [np.max(np.reshape(u_, 2 * (int(np.sqrt(len(u_))),)), 1) for u_ in u_emus]
ax1.fill_between(lame_t1, *np.percentile(u_emus_prof_t1, [12.5, 87.5], 0), color=sns.color_palette()[0], alpha=.5)
ax1.set_xscale('log', base=2)
ax1.set_yticklabels([])

ax2.plot(lame_t2, np.max(u_rec, 1), color='black')
u_emus_prof_t2 = [np.max(np.reshape(u_, 2 * (int(np.sqrt(len(u_))),)), 0) for u_ in u_emus]
ax2.fill_between(lame_t2, *np.percentile(u_emus_prof_t2, [12.5, 87.5], 0), color=sns.color_palette()[0], alpha=.5)
ax2.set_xscale('log', base=2)
ax2.set_yticklabels([])

ax3.plot(lame_t1, np.max(u_rec, 0), color='black')
u_griddy_prof_t1 = [np.max(np.reshape(u_, 2 * (int(np.sqrt(len(u_))),)), 1) for u_ in u_griddy]
ax3.fill_between(lame_t1, *np.percentile(u_griddy_prof_t1, [12.5, 87.5], 0), step='mid', color=sns.color_palette()[1], alpha=.5)
ax3.set_xlabel(r'$\tau_{1}$')
ax3.set_xscale('log', base=2)

ax4.plot(lame_t2, np.max(u_rec, 1), color='black')
u_griddy_prof_t2 = [np.max(np.reshape(u_, 2 * (int(np.sqrt(len(u_))),)), 0) for u_ in u_griddy]
ax4.fill_between(lame_t2, *np.percentile(u_griddy_prof_t2, [12.5, 87.5], 0), step='mid', color=sns.color_palette()[1], alpha=.5)
ax4.set_xlabel(r'$\tau_{2}$')
ax4.set_xscale('log', base=2)