In [1]:
# dependencies

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from scipy.special import expit
from xfx.glm.gaussian import sample_posterior as sample_gaussian
from xfx.glm.binomial import sample_posterior as sample_binomial
from tests.test_glm import sample_mar_fixture
from xfx.misc import plot

sns.set()

In [3]:
# spaghetti

def sample_iat(n_levels_, n_samples, n_warmup, n_acf_lags, ome):

    run = ome.__getstate__()['state']['state']
    gauss_fixture = sample_mar_fixture(np.repeat(n_levels_, 2), 1, 1e100, 1e-100, 0.9, ome)[0]
    binom_fixture = (gauss_fixture[0], np.float64(expit(gauss_fixture[1]) < ome.uniform(size=len(gauss_fixture[1]))))

    gauss_collapsed_sampler, gauss_vanilla_sampler = (
        sample_gaussian(np.repeat(n_levels_, 2), *gauss_fixture, np.repeat(1, 2), np.ones(2), np.inf, 1, b, ome) for b in (True, False))
    binom_collapsed_sampler, binom_vanilla_sampler = (
        sample_binomial(np.repeat(n_levels_, 2), *binom_fixture, np.repeat(1, 2), np.ones(2), b, ome)  for b in (True, False))
    gauss_collapsed_samples, gauss_vanilla_samples, binom_collapsed_samples, binom_vanilla_samples = (
        [next(sampler)[:3] for _ in range(n_samples + n_warmup)][n_warmup:] for sampler in (
            gauss_collapsed_sampler, gauss_vanilla_sampler, binom_collapsed_sampler, binom_vanilla_sampler))

    gauss_collapsed_df, gauss_vanilla_df, binom_collapsed_df, binom_vanilla_df = (
        package_samples(samples, mod, algo, len(gauss_fixture[1]), run) for mod, algo, samples in (
            ('linear', 'collapsed', gauss_collapsed_samples), ('linear', 'vanilla', gauss_vanilla_samples), ('logistic', 'collapsed', binom_collapsed_samples), ('logistic', 'vanilla', binom_vanilla_samples)))
    df = pd.concat([gauss_collapsed_df, gauss_vanilla_df, binom_collapsed_df, binom_vanilla_df])

    return est_iat(df, n_acf_lags)

def package_samples(samples, model, algo, n_obs, run):

    alp0, alp, tau = zip(*samples)
    mean = np.mean(alp, 2).T
    prior_prec = np.array(tau).T
    dfs = [pd.DataFrame({'iter': np.arange(len(samples)), 'value': np.array(alp0), 'factor': [0] * len(samples), 'stat': ['mean'] * len(samples)})]
    for i in range(mean.shape[0]):
        df_mean_ = pd.DataFrame({'iter': np.arange(len(samples)), 'value': mean[i], 'factor': [i + 1] * len(samples), 'stat': ['mean'] * len(samples)})
        df_prior_prec_ = pd.DataFrame({'iter': np.arange(len(samples)), 'value': prior_prec[i], 'factor': [i + 1] * len(samples), 'stat': ['prior_prec'] * len(samples)})
        dfs.extend([df_mean_, df_prior_prec_])
    df = pd.concat(dfs)
    df['model'] = model
    df['algo'] = algo
    df['n_obs'] = n_obs
    df['run'] = run
    return df.set_index(['model', 'algo', 'n_obs', 'run', 'factor', 'stat', 'iter']).unstack('iter').value

def est_acf(samples, n_lags):

    acf = samples.apply(lambda x: plot.est_acf(x.values, n_lags), 1, False, 'expand')
    acf.columns = acf.columns.rename('lag')
    return acf

def est_iat(samples, n_acf_lags):
    
    acf = est_acf(samples, n_acf_lags)
    iat = acf.apply(lambda x: plot.est_int_autocor(x.values), 1, False, 'expand').rename('iat')
    return iat

In [3]:
# config

seed = 0
n_runs = 10
n_levels = 2 ** np.arange(5, 11)
n_samples = 10000
n_warmup = 10
n_acf_lags = 128

ome = np.random.default_rng(seed)

In [28]:
# run

df = pd.concat([pd.concat([sample_iat(n_levels_, n_samples, n_warmup, n_acf_lags, ome) for n_levels_ in n_levels]) for _ in range(n_runs)])

In [9]:
# dump

df.to_csv('paper/output/collapsed_vs_vanilla_ess.csv')

In [2]:
# configure for export

import matplotlib
matplotlib.use('pgf')
matplotlib.rcParams.update({
    'pgf.texsystem': 'xelatex',
    'font.family': 'serif',
    'text.usetex': False,
    'pgf.rcfonts': False,
    'axes.labelsize': 10,
    'axes.titlesize': 10,
    'font.size': 10
})
df = pd.read_csv('paper/output/collapsed_vs_vanilla_ess.csv')

In [4]:
# item 1

df = df.groupby(['model', 'algo', 'n_obs', 'factor', 'stat']).iat.mean().reset_index()
df['iat'] *= 2
df['group'] = df.factor.astype(str) + '-' + df.stat
g = sns.relplot(data=df, x='n_obs', y='iat', hue='algo', row='model', col='factor', style='stat', kind='line', aspect=4/3, height=1.25, markers=False, legend=False, lw=1, facet_kws={'margin_titles':True})
xticks, yticks = 10 ** np.arange(2, 6), 10 ** np.arange(3)
g.set_titles(row_template='{row_name}', col_template='factor {col_name}')
g.set(xscale='log', yscale='log')
g.set(xticks=xticks, yticks=yticks)
g.axes[0,0].set(ylabel='IAT')
g.axes[1,0].set(xlabel=None, ylabel='IAT')
g.axes[1,1].set(xlabel='number of observations')
g.axes[1,2].set(xlabel=None)
g.fig.subplots_adjust(wspace=.1, hspace=.16)

plt.savefig('paper/plots/collapsed_vs_vanilla_ess.pdf', bbox_inches='tight')