In [None]:
# if need be, manually add the local project root to PYTHONPATH and move working directories

import os
import sys

project = '/' # change to local project root
sys.path.append(project)
os.chdir(project)

In [None]:
# dependencies

import numpy as np
import pandas as pd

import xfx.mvglm.multinomial
import xfx.misc.plot

In [None]:
# helper functions

def package_samples(samples, factor_names, response_names):

    rfx_samples, prec_samples = zip(*samples)
    rfx_samples = [np.array(samples_) for samples_ in zip(*rfx_samples)]
    prec_samples = np.trace(prec_samples, axis1=2, axis2=3)
    return package_rfx_samples(rfx_samples, ['_const'] + factor_names, response_names), package_prec_samples(prec_samples, factor_names)

def package_rfx_samples(rfx_samples, factor_names, response_names):

    dfs = []
    for i, (samples_, factor_name) in enumerate(zip(rfx_samples, factor_names)):
        for j in range(samples_.shape[1]):
            df_ = pd.DataFrame(samples_[:, j].T, index=response_names)
            df_.index = df_.index.rename('response')
            df_.columns = df_.columns.rename('iter')
            df_['factor'] = factor_name
            df_['level'] = j
            dfs.append(df_)
    df = pd.concat(dfs).reset_index().set_index(['factor', 'level', 'response'])
    return df

def package_prec_samples(prec_samples, factor_names):

    df = pd.DataFrame(prec_samples.T, index=factor_names)
    df.index = df.index.rename('factor')
    df.columns = df.columns.rename('iter')
    return df

In [None]:
# select predictors and responses

factor_names = ['province_id', 'activity', 'age', 'education', 'municipality_size', 'voting_recall', 'gender']
response_names = ['conservatives', 'social_democrats']
exclude = ['abstention', 'invalid']

In [None]:
# configure algorithm (hyperprior on the random effect prior precision is Wishart(prior_pseudo_tau, prior_guess_tau / prior_pseudo_tau))

n_samples = 1000
n_warmup = 1000
prior_pseudo_tau = len(response_names) * np.ones(len(factor_names))
prior_guess_tau = len(factor_names) * [np.identity(len(response_names))]
seed = 0

In [None]:
# create inputs

cis = pd.read_csv('demos/data/cis.csv')
cis = cis.loc[(cis.study_id == '2019-11-10') & (~cis.voting_intention.isin(exclude)) & (~cis.voting_intention.isna())]
cis['response'] = np.where(cis.voting_intention.isin(response_names), cis.voting_intention, '_others')
cis['voting_recall'] = np.where(cis.voting_recall.isin(response_names), cis.voting_recall, '_others')
cis = cis[factor_names + ['response']].dropna()
codes = cis.apply(lambda x: x.astype('category').cat.codes)
n_true = pd.get_dummies(codes.response)
indices = codes.drop('response', 1)
n_levels = np.max(indices, 0).astype(np.int64) + 1
rng = np.random.default_rng(seed)

In [None]:
# sample

sampler = xfx.mvglm.multinomial.sample_posterior(n_true.values, n_levels, indices.values, prior_n_tau=prior_pseudo_tau, prior_est_tau=prior_guess_tau, ome=rng)
samples = [next(sampler) for _ in range(n_samples + n_warmup)][n_warmup:]

In [None]:
# reformat samples for plotting

rfx_samples, prec_samples = package_samples(samples, factor_names, response_names)

In [None]:
# random effects samples

rfx_samples.iloc[:5, :5]

In [None]:
# prior precision trace samples

prec_samples.iloc[:5, :5]

In [None]:
# plot traces for 'voting_recall' random effects, grid by level, color by level

xfx.misc.plot.plot_traces(rfx_samples.loc['age'], 'iter', 'level', 'response')

In [None]:
# plot marginals for 'age' random effects, grid by level, color by response

xfx.misc.plot.plot_marginals(rfx_samples.loc['education'], 'level', 'response')

In [None]:
# plot ACFs for prior precisions, grid by level, color by response

xfx.misc.plot.plot_acf(rfx_samples.loc['voting_recall'], 'level', 'response')