In [None]:
# dependencies

import datetime

import numpy as np
import pandas as pd
import pystan as stan

import xfx.mvglm.multinomial
import xfx.misc.plot

In [None]:
# helper functions

def package_cgibbs_samples(samples, factor_names, response_names):

    rfx_samples, prec_samples = zip(*samples)
    rfx_samples = [np.array(samples_) for samples_ in zip(*rfx_samples)]
    prec_samples = np.trace(prec_samples, axis1=2, axis2=3).T
    return package_rfx_samples(rfx_samples, ['_const'] + factor_names, response_names, 'cGibbs'), package_prec_samples(prec_samples, factor_names, 'cGibbs')

def package_nuts_samples(samples, data, factor_names, response_names):

    alp0_samples = samples['intercept'][:, 0, :-1]
    alp_samples = [[alp_[:, lo:(hi+1)].T for lo, hi in zip(data['lo'] - 1, data['hi'] - 1)] for alp_ in samples['coefs'][:, 0, :-1]]
    rfx_samples = [np.array(samples_) for samples_ in zip(*[[alp0_[np.newaxis]] + alp_ for alp0_, alp_ in zip(alp0_samples, alp_samples)])]
    prec_samples = np.array([[np.trace(np.linalg.inv(tau__)) for tau__ in tau_] for tau_ in samples['cov_factor'][:, 0]]).T
    return package_rfx_samples(rfx_samples, ['_const'] + factor_names, response_names, 'Stan/NUTS'), package_prec_samples(prec_samples, factor_names, 'Stan/NUTS')

def package_rfx_samples(rfx_samples, factor_names, response_names, algo_name):

    dfs = []
    for i, (samples_, factor_name) in enumerate(zip(rfx_samples, factor_names)):
        for j in range(samples_.shape[1]):
            df_ = pd.DataFrame(samples_[:, j].T, index=response_names)
            df_.index = df_.index.rename('response')
            df_.columns = df_.columns.rename('iter')
            df_['algo'] = algo_name
            df_['factor'] = factor_name
            df_['level'] = j
            dfs.append(df_)
    df = pd.concat(dfs).reset_index().set_index(['algo', 'factor', 'level', 'response'])
    return df

def package_prec_samples(prec_samples, factor_names, algo_name):

    df = pd.DataFrame(prec_samples, index=factor_names)
    df.index = df.index.rename('factor')
    df.columns = df.columns.rename('iter')
    df['algo'] = algo_name
    df = df.reset_index().set_index(['algo', 'factor'])
    return df

def est_acf(samples, n_lags):

    acf = samples.apply(lambda x: xfx.misc.plot.est_acf(x.values, n_lags), 1, False, 'expand')
    acf.columns = acf.columns.rename('lag')
    return acf

def est_ess(acfs, titer):
    
    df = pd.DataFrame(index=acfs.index)
    df['iat[iter]'] = acfs.apply(lambda x: xfx.misc.plot.est_int_autocor(x.values), 1, False, 'expand').rename('iat')
    df['iat[sec]'] = df['iat[iter]'] * titer
    df['rate[iter]'] = 1 / (2 * df['iat[iter]'])
    df['rate[sec]'] = df['rate[iter]'] / titer
    return df

In [None]:
# config

factor_names = ['province_id', 'activity', 'age', 'education', 'municipality_size', 'voting_recall', 'gender']
response_names = ['conservatives', 'social_democrats']
exclude = ['abstention', 'invalid']
seed = 0

In [None]:
# construct inputs

cis = pd.read_csv('paper/data/cis.csv')
cis = cis.loc[(cis.study_id == '2019-11-10') & (~cis.voting_intention.isin(exclude)) & (~cis.voting_intention.isna())]
cis['response'] = np.where(cis.voting_intention.isin(response_names), cis.voting_intention, '_others')
cis['voting_recall'] = np.where(cis.voting_recall.isin(response_names), cis.voting_recall, '_others')
cis = cis[factor_names + ['response']].dropna()
codes = cis.apply(lambda x: x.astype('category').cat.codes)
response = pd.get_dummies(codes.response)
indices = codes.drop('response', 1)
n_levels = np.max(indices, 0).astype(np.int64) + 1
rng = np.random.default_rng(seed)

In [None]:
# construct nuts inputs

counts = codes.groupby(codes.columns.tolist()).size().unstack('response').fillna(0)
coef_idx = counts.index.to_frame() + np.hstack([0, np.cumsum(n_levels[:-1])]) + 1
nuts_inputs = {
    'n_strata': counts.shape[0],
    'n_parties': counts.shape[1],
    'n_factors': len(n_levels),
    'n_coefs': n_levels.sum(),
    'prior_df': counts.shape[1] - 1,
    'lo': np.hstack([0, np.cumsum(n_levels[:-1])]) + 1,
    'hi': np.cumsum(n_levels).values,
    'coef_idx': coef_idx.values,
    'counts': counts.applymap(int).values}

In [None]:
# sample nuts

nuts_n_samples = 1000
stan_model = stan.StanModel('paper/stan/xfx_mvlogit2.stan')
t0 = datetime.datetime.now()
nuts_sampler = stan_model.sampling(data=nuts_inputs, iter=2*nuts_n_samples, warmup=nuts_n_samples, chains=1, seed=0)
t1 = datetime.datetime.now()
nuts_titer = (t1 - t0).total_seconds() / nuts_n_samples
nuts_leaps = nuts_sampler.get_sampler_params(inc_warmup=False)[0]['n_leapfrog__'].mean()
nuts_samples = nuts_sampler.extract(['intercept', 'coefs', 'cov_factor'], permuted=False)

In [None]:
# nuts iteration time

print(nuts_titer, nuts_leaps)

In [None]:
# compute nuts summaries

nuts_rfx_samples, nuts_prec_samples = package_nuts_samples(nuts_samples, nuts_inputs, factor_names, response_names)
nuts_rfx_acf, nuts_prec_acf = (est_acf(samples_, 64) for samples_ in (nuts_rfx_samples, nuts_prec_samples))
nuts_rfx_ess, nuts_prec_ess = (est_ess(acfs_, nuts_titer) for acfs_ in (nuts_rfx_acf, nuts_prec_acf))

In [None]:
# construct cgibbs inputs

prior_n_tau = np.repeat(len(response_names), len(n_levels))
cgibbs_inputs = (response.values, n_levels.values, indices.values, None, prior_n_tau)

In [None]:
# sample cgibbs

cgibbs_n_samples = 10000
cgibbs_sampler = xfx.mvglm.multinomial.sample_posterior(*cgibbs_inputs, ome=rng)
next(cgibbs_sampler)
t0 = datetime.datetime.now()
cgibbs_samples = [next(cgibbs_sampler) for _ in range(2 * cgibbs_n_samples)][cgibbs_n_samples:]
t1 = datetime.datetime.now()
cgibbs_titer = (t1 - t0).total_seconds() / cgibbs_n_samples

In [None]:
# cgibbs iteration time

print(cgibbs_titer)

In [None]:
# compute cgibbs summaries

cgibbs_rfx_samples, cgibbs_prec_samples = package_cgibbs_samples(cgibbs_samples, factor_names, response_names)
cgibbs_rfx_acf, cgibbs_prec_acf = (est_acf(samples_, 256) for samples_ in (cgibbs_rfx_samples, cgibbs_prec_samples))
cgibbs_rfx_ess, cgibbs_prec_ess = (est_ess(acfs_, cgibbs_titer) for acfs_ in (cgibbs_rfx_acf, cgibbs_prec_acf))

In [None]:
# nuts random effects ACFs

nuts_rfx_acf

In [None]:
# nuts random effects ESS

nuts_rfx_ess

In [None]:
# cgibbs random effects ACFs

cgibbs_rfx_acf

In [None]:
# cgibbs random effects ESS

cgibbs_rfx_ess