In [4]:
# dependencies

import datetime
import re

import numpy as np
import pandas as pd
import stan as stan
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.special import softmax

import xfx.mvglm2.multinomial
from xfx.misc import plot

sns.set()

In [5]:
# spaghetti

def format_vb_output(block_names, block_samples, n_samples):

    uq_names = list(set([s.split('[')[0] for s in block_names]))
    indices = {un: np.int32([re.findall('\[(.+)\]', n)[0].split(',') if '[' in n else np.array([]) for n in block_names if un == n.split('[')[0]]) - 1 for un in uq_names}
    samples = {un: [s for n, s in zip(block_names, block_samples) if un == n.split('[')[0]] for un in uq_names}
    arrays = {un: np.empty(np.append(indices[un].max(0) + 1, n_samples)) for un in uq_names}
    for un in uq_names:
        for ix, s in zip(indices[un], samples[un]):
            arrays[un][tuple(ix)] = s
        arrays[un] = np.transpose(arrays[un], [-1, *range(len(arrays[un].shape) - 1)])
    return arrays

def process_census(results_raw, participation_raw, election, parties):

    results = results_raw.set_index(['election', 'province_id']).drop(['province'], axis=1)
    participation = participation_raw.set_index(['election', 'province_id'])

    results = results.loc[election, parties].fillna(0)
    results.loc[:, '~others'] = 1 - results.sum(axis=1)

    participation = participation.loc[election]
    census = (results.T * participation.assigned).T
    census['abstention'] = participation.abstention
    census.columns.names = ('voting_recall',)
    census = census.stack()

    return census.sort_index()

def process_surveys(surveys_raw, election, parties, covariates):

    surveys = surveys_raw.set_index(['study_id', 'questionary_id'])
    surveys = surveys.loc[surveys.election == election, covariates + ['voting_intention']].dropna()
    surveys = surveys.loc[~surveys.voting_intention.isin(['invalid', 'blank'])]
    surveys = surveys.loc[~surveys.voting_recall.isin(['invalid', 'blank'])]

    surveys.loc[~surveys.voting_intention.isin(parties + ['abstention']), 'voting_intention'] = '~others'
    surveys.loc[~surveys.voting_recall.isin(parties + ['abstention']), 'voting_recall'] = '~others'

    return surveys

def estimate_vote(prob, census):

    counts = (prob.T * census.drop(0)).T.drop('abstention', 1).sum(0)
    return counts / counts.sum()

In [6]:
# config

ome = np.random.default_rng()
n_samples = 10000
elections = {
    '2019-04-28': ['conservatives', 'social_democrats'],
    '2019-11-10': ['conservatives', 'social_democrats'],
}
factors = ['province_id', 'voting_recall']

In [7]:
# import data

results_raw = pd.read_csv('paper/output/election_votes.csv')
participation_raw = pd.read_csv('paper/output/election_participation.csv')
surveys_raw = pd.read_csv('paper/output/election_cis.csv')
surveys = process_surveys(surveys_raw, list(elections.keys())[-1], list(elections.values())[-1], factors)
census = process_census(results_raw, participation_raw, list(elections.keys())[0], list(elections.values())[0])

In [8]:
# construct xfx inputs

response, levels = surveys.voting_intention, surveys.drop('voting_intention', axis=1)
xfx_response = pd.get_dummies(response)
xfx_indices = levels.apply(lambda x: x.astype('category').cat.codes).astype('int64')
n_levels = np.max(xfx_indices, 0).astype(np.int64) + 1

In [9]:
# sample xfx

xfx_sampler = xfx.mvglm2.multinomial.sample_posterior(n_levels.values, xfx_indices.values, xfx_response.values, ome=ome)
xfx_alp0, xfx_alp, _ = zip(*[next(xfx_sampler) for _ in range(2 * n_samples)][n_samples::])

TypeError: cannot unpack non-iterable numpy.random._generator.Generator object

In [12]:
# construct stan inputs

stan_response = surveys.groupby(factors).voting_intention.value_counts().unstack('voting_intention').fillna(0).astype('int64')
stan_levels = stan_response.index.to_frame()
stan_indices = stan_levels.apply(lambda x: x.astype('category').cat.codes).astype('int64') + np.hstack([0, np.cumsum(n_levels[:-1])]) + 1

stan_data = {
    'n_strata': stan_response.shape[0],
    'n_parties': stan_response.shape[1],
    'n_factors': len(n_levels),
    'n_coefs': n_levels.sum(),
    'prior_df': stan_response.shape[1] - 1,
    'lo': np.hstack([0, np.cumsum(n_levels[:-1])]) + 1,
    'hi': np.cumsum(n_levels).values,
    'coef_idx': stan_indices.values,
    'counts': stan_response.values
    }
stan_model = stan.StanModel('paper/stan/xfx_mvlogit2.stan')

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_cf96aaaa55bb5e41476a50e31d238c87 NOW.


In [13]:
# sample stan

raw_stan_samples = stan_model.vb(data=stan_data, iter=10000, tol_rel_obj=1e-10, output_samples=n_samples, seed=0, init='random')



In [24]:
# generate predictive

stan_arrays = format_vb_output(raw_stan_samples['sampler_param_names'], raw_stan_samples['sampler_params'], n_samples)
stan_samples = np.swapaxes(np.dstack((stan_arrays['intercept'][:, :, np.newaxis], stan_arrays['coefs'])), 1, 2)
xfx_samples = np.array([np.vstack([alp0_] + alp_) for alp0_, alp_ in zip(xfx_alp0, xfx_alp)])
xfx_samples = np.dstack((xfx_samples, np.zeros(xfx_samples.shape[:2] + (1,))))
levels_plus = census.drop(0).index.to_frame()
indices_plus = levels_plus.apply(lambda x: x.astype('category').cat.codes).astype('int64')
indices_plus['_constant'] = 0
indices_plus = indices_plus[['_constant'] + factors]
dummies_plus = pd.get_dummies(indices_plus, columns=indices_plus.columns).sort_index()
xfx_probs = np.swapaxes(softmax(np.tensordot(xfx_samples, dummies_plus, (1, 1)), 1), 1, 2)
stan_probs = np.swapaxes(softmax(np.tensordot(stan_samples, dummies_plus, (1, 1)), 1), 1, 2)
xfx_predictions = np.array([estimate_vote(pd.DataFrame(sample_, levels_plus.index, stan_response.columns), census) for sample_ in xfx_probs])
xfx_predictions = pd.DataFrame(xfx_predictions, columns=stan_response.columns.drop('abstention'))
stan_predictions = np.array([estimate_vote(pd.DataFrame(sample_, levels_plus.index, stan_response.columns), census) for sample_ in stan_probs])
stan_predictions = pd.DataFrame(stan_predictions, columns=stan_response.columns.drop('abstention'))

In [25]:
# melt to tidy

xfx_predictions['iter'] = xfx_predictions.index
xfx_predictions_long = xfx_predictions.drop('~others', 1).melt('iter', var_name='party')
xfx_predictions_long['algo'] = 'cGibbs'
stan_predictions['iter'] = stan_predictions.index
stan_predictions_long = stan_predictions.drop('~others', 1).melt('iter', var_name='party')
stan_predictions_long['algo'] = 'Stan/ADVI'
predictions_long = pd.concat([xfx_predictions_long, stan_predictions_long]).reset_index(drop=True)

In [28]:
# item 3

df = predictions_long.pivot(index=['iter', 'algo'], columns='party').value.reset_index().reset_index(drop=True)
g = sns.relplot(data=df, x='conservatives', y='social_democrats', hue='algo', col='algo', legend=False, hue_order=['Gibbs', 'Stan/ADVI'], col_order=['cGibbs', 'Stan/ADVI'], s=1, facet_kws={'sharex': True, 'sharey': True, 'margin_titles': True}, height=3)
g.set(xlabel='Conservative vote')
g.axes[0, 0].set_ylabel('Labour vote')
g.set_titles(col_template='{col_name}')

<seaborn.axisgrid.FacetGrid at 0x7f7fe07d5e80>

RuntimeError: The command
    pdflatex -interaction=nonstopmode -halt-on-error figure.tex
failed and generated the following output:
This is pdfTeX, Version 3.14159265-2.6-1.40.20 (TeX Live 2019/Debian) (preloaded format=pdflatex)
 restricted \write18 enabled.
entering extended mode
(./figure.tex
LaTeX2e <2020-02-02> patch level 2
L3 programming layer <2020-02-14>
(/usr/share/texlive/texmf-dist/tex/latex/hyperref/hyperref.sty
(/usr/share/texlive/texmf-dist/tex/generic/ltxcmds/ltxcmds.sty)
(/usr/share/texlive/texmf-dist/tex/generic/iftex/iftex.sty)
(/usr/share/texlive/texmf-dist/tex/latex/pdftexcmds/pdftexcmds.sty
(/usr/share/texlive/texmf-dist/tex/generic/infwarerr/infwarerr.sty))
(/usr/share/texlive/texmf-dist/tex/latex/graphics/keyval.sty)
(/usr/share/texlive/texmf-dist/tex/generic/kvsetkeys/kvsetkeys.sty)
(/usr/share/texlive/texmf-dist/tex/generic/kvdefinekeys/kvdefinekeys.sty)
(/usr/share/texlive/texmf-dist/tex/generic/pdfescape/pdfescape.sty)
(/usr/share/texlive/texmf-dist/tex/latex/hycolor/hycolor.sty)
(/usr/share/texlive/texmf-dist/tex/latex/letltxmacro/letltxmacro.sty)
(/usr/share/texlive/texmf-dist/tex/latex/auxhook/auxhook.sty)
(/usr/share/texlive/texmf-dist/tex/latex/kvoptions/kvoptions.sty)
(/usr/share/texlive/texmf-dist/tex/latex/hyperref/pd1enc.def)
(/usr/share/texlive/texmf-dist/tex/generic/intcalc/intcalc.sty)
(/usr/share/texlive/texmf-dist/tex/generic/etexcmds/etexcmds.sty)
(/usr/share/texlive/texmf-dist/tex/latex/url/url.sty)
(/usr/share/texlive/texmf-dist/tex/generic/bitset/bitset.sty
(/usr/share/texlive/texmf-dist/tex/generic/bigintcalc/bigintcalc.sty))
(/usr/share/texlive/texmf-dist/tex/generic/atbegshi/atbegshi.sty)

Package hyperref Warning: Option `pdfpagelabels' is turned off
(hyperref)                because \thepage is undefined.

) (/usr/share/texlive/texmf-dist/tex/latex/hyperref/hpdftex.def
(/usr/share/texlive/texmf-dist/tex/latex/rerunfilecheck/rerunfilecheck.sty
(/usr/share/texlive/texmf-dist/tex/latex/atveryend/atveryend.sty)
(/usr/share/texlive/texmf-dist/tex/generic/uniquecounter/uniquecounter.sty)))
(/usr/share/texlive/texmf-dist/tex/latex/base/minimal.cls
Document Class: minimal 2001/05/25 Standard LaTeX minimal class
) (/usr/share/texlive/texmf-dist/tex/latex/geometry/geometry.sty
(/usr/share/texlive/texmf-dist/tex/generic/iftex/ifvtex.sty))
(/usr/share/texlive/texmf-dist/tex/latex/pgf/basiclayer/pgf.sty
(/usr/share/texlive/texmf-dist/tex/latex/pgf/utilities/pgfrcs.sty
(/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfutil-common.tex
(/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfutil-common-lists.t
ex)) (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfutil-latex.def
(/usr/share/texlive/texmf-dist/tex/latex/ms/everyshi.sty))
(/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfrcs.code.tex
(/usr/share/texlive/texmf-dist/tex/generic/pgf/pgf.revision.tex)))
(/usr/share/texlive/texmf-dist/tex/latex/pgf/basiclayer/pgfcore.sty
(/usr/share/texlive/texmf-dist/tex/latex/graphics/graphicx.sty
(/usr/share/texlive/texmf-dist/tex/latex/graphics/graphics.sty
(/usr/share/texlive/texmf-dist/tex/latex/graphics/trig.sty)
(/usr/share/texlive/texmf-dist/tex/latex/graphics-cfg/graphics.cfg)
(/usr/share/texlive/texmf-dist/tex/latex/graphics-def/pdftex.def)))
(/usr/share/texlive/texmf-dist/tex/latex/pgf/systemlayer/pgfsys.sty
(/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgfsys.code.tex
(/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfkeys.code.tex
(/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfkeysfiltered.code.t
ex)) (/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgf.cfg)
(/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-pdftex.def
(/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-common-pdf.de
f)))
(/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgfsyssoftpath.code.
tex)
(/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgfsysprotocol.code.
tex)) (/usr/share/texlive/texmf-dist/tex/latex/xcolor/xcolor.sty
(/usr/share/texlive/texmf-dist/tex/latex/graphics-cfg/color.cfg))
(/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcore.code.tex
(/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex
(/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathcalc.code.tex
(/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathutil.code.tex)
(/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathparser.code.tex)
(/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.code.tex
(/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.basic.code
.tex)
(/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.trigonomet
ric.code.tex)
(/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.random.cod
e.tex)
(/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.comparison
.code.tex)
(/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.base.code.
tex)
(/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.round.code
.tex)
(/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.misc.code.
tex)
(/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.integerari
thmetics.code.tex)))
(/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfloat.code.tex))
(/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfint.code.tex)
(/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepoints.code.te
x)
(/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathconstruct.
code.tex)
(/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathusage.code
.tex)
(/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorescopes.code.te
x)
(/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoregraphicstate.c
ode.tex)
(/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoretransformation
s.code.tex)
(/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorequick.code.tex
)
(/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreobjects.code.t
ex)
(/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathprocessing
.code.tex)
(/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorearrows.code.te
x)
(/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreshade.code.tex
)
(/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreimage.code.tex

(/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreexternal.code.
tex))
(/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorelayers.code.te
x)
(/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoretransparency.c
ode.tex)
(/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepatterns.code.
tex)
(/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorerdf.code.tex))
)
(/usr/share/texlive/texmf-dist/tex/generic/pgf/modules/pgfmoduleshapes.code.tex
) (/usr/share/texlive/texmf-dist/tex/generic/pgf/modules/pgfmoduleplot.code.tex
)
(/usr/share/texlive/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version-0-65
.sty)
(/usr/share/texlive/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version-1-18
.sty)) (/usr/share/texlive/texmf-dist/tex/latex/l3backend/l3backend-pdfmode.def
)
No file figure.aux.
(/usr/share/texlive/texmf-dist/tex/latex/hyperref/nameref.sty
(/usr/share/texlive/texmf-dist/tex/latex/refcount/refcount.sty)
(/usr/share/texlive/texmf-dist/tex/generic/gettitlestring/gettitlestring.sty))
*geometry* driver: auto-detecting
*geometry* detected driver: pdftex
ABD: EveryShipout initializing macros
(/usr/share/texlive/texmf-dist/tex/context/base/mkii/supp-pdf.mkii
[Loading MPS to PDF converter (version 2006.09.02).]
) (/usr/share/texlive/texmf-dist/tex/latex/epstopdf-pkg/epstopdf-base.sty
(/usr/share/texlive/texmf-dist/tex/latex/latexconfig/epstopdf-sys.cfg))
(./figure.pgf
Runaway definition?
->
! TeX capacity exceeded, sorry [main memory size=5000000].
\pgf@sys@bp ...rrentprotocol {\the \pgfutil@toks@ 
                                                  }}
l.151558 \pgfusepath{stroke,fill}
                                 %
!  ==> Fatal error occurred, no output PDF file produced!
Transcript written on figure.log.

and the following error:


In [17]:
# mean_xfx_probs = pd.DataFrame(xfx_probs.var(0), columns=stan_response.columns)
# mean_stan_probs = pd.DataFrame(stan_probs.var(0), columns=stan_response.columns)
# mean_xfx_probs['algo'] = 'Gibbs'
# mean_stan_probs['algo'] = 'Stan/ADVI'
# df = pd.concat([mean_xfx_probs, mean_stan_probs]).melt(id_vars=['algo'], var_name='party')

# g = sns.displot(data=df, x='value', hue='algo', col='algo', row='party', kind='kde', legend=False, hue_order=['Gibbs', 'Stan/ADVI'], col_order=['Gibbs', 'Stan/ADVI'], facet_kws={'sharex': True, 'sharey': True, 'margin_titles': True}, height=2, aspect=2)

In [30]:
# dump

predictions_long.to_csv('paper/output/election_predictions.csv', index=False)

In [5]:
# configure for export

import matplotlib
matplotlib.use('pgf')
matplotlib.rcParams.update({
    'pgf.texsystem': 'xelatex',
    'font.family': 'serif',
    'text.usetex': False,
    'pgf.rcfonts': False,
    'axes.labelsize': 10,
    'axes.titlesize': 10,
    'font.size': 10
})
predictions_long = pd.read_csv('paper/output/election_predictions.csv')

In [6]:
# item 3
with sns.axes_style({'axes.facecolor': sns.color_palette('flare', as_cmap=True)(1 / 40), 'axes.grid': False}):
    df = predictions_long.pivot(index=['iter', 'algo'], columns='party').value.reset_index().reset_index(drop=True)
    g = sns.displot(data=df, x='conservatives', y='social_democrats', col='algo', kind='kde', aspect=1, height=2.5, legend=False, fill=True, thresh=0, levels=20, cmap='flare', linewidth=0, bw_adjust=1.5, row_order=['cGibbs', 'Stan/ADVI'], col_order=['Gibbs', 'Stan/ADVI'], facet_kws={'sharex': True, 'sharey': True, 'margin_titles': True})
    g.set(xlabel='Conservative vote')
    g.axes[0, 0].set_ylabel('Labour vote')
    g.set_titles(col_template='{col_name}')
    plt.savefig('paper/plots/elections_predictive.pdf', bbox_inches='tight')

In [3]:
sys.path

['/home/timsf/Documents/research/hierarch/xfx_remote',
 '/home/timsf/Documents/research/hierarch/xfx_remote/paper',
 '/home/timsf/.vscode/extensions/ms-toolsai.jupyter-2021.3.684299474/pythonFiles',
 '/home/timsf/.vscode/extensions/ms-toolsai.jupyter-2021.3.684299474/pythonFiles/lib/python',
 '/usr/lib/python38.zip',
 '/usr/lib/python3.8',
 '/usr/lib/python3.8/lib-dynload',
 '',
 '/home/timsf/.virtualenvs/xfx_remote-OqQ78Mkr/lib/python3.8/site-packages',
 '/home/timsf/.virtualenvs/xfx_remote-OqQ78Mkr/lib/python3.8/site-packages/IPython/extensions',
 '/home/timsf/.ipython']