# Election models
> Tools for election modeling, including plots spcifically designed for that

In [1]:
#| default_exp election_models

In [2]:
#| hide
from nbdev.showdoc import *
from fastcore.test import *

In [3]:
#| exporti
import json, os, inspect
import itertools as it
from collections import defaultdict

import numpy as np
import pandas as pd
import datetime as dt

from typing import List, Tuple, Dict, Union, Optional

import altair as alt
import scipy.stats as sps

from salk_toolkit.utils import *
from salk_toolkit.io import extract_column_meta, read_json
from salk_toolkit.plots import stk_plot, register_stk_cont_version

import streamlit as st

# Machinery

In [4]:
#| export

def dhondt(pvotes, n_mandates, dh_power=1.0, pmand=None):
    
    # Calculate d'Hondt values and get party indices out
    n_mandates = np.array(n_mandates)
    max_mandates = int(n_mandates.max())
    if pmand is None: pmand = np.zeros_like(pvotes) # previously handed out mandates by party - zero by default
    dhvals = pvotes[:,:,None]/(pmand[:,:,None]+np.arange(1, max_mandates+1, 1)[None,None,:])**dh_power
    sinds = np.argsort(-dhvals.reshape( (dhvals.shape[0],-1) ),axis=1) // max_mandates

    # Select the first n as compensation
    rmand = np.ones(pvotes.shape[0]) * n_mandates # This can be a vector, one per draw
    ri = ((np.arange(sinds.shape[1])[None,:]-rmand[:,None])<0)
    comp_ident = np.concatenate([np.zeros( (1,pvotes.shape[-1]) ),np.identity(pvotes.shape[-1])])
    return comp_ident[(sinds+1)*ri].sum(axis=1)

# Vectorized basic election simulation: quotas, dHondt
# Input 'support' should be of shape (draws,districts,parties)
def simulate_election(support, nmandates, threshold=0.0, ed_threshold=0.0, quotas=True, first_quota_coef=1.0, dh_power=1.0, body_size=None, **kwargs):

    # Remove parties below a national threshold
    zero_mask = (support.sum(axis=1)/(support.sum(axis=(1,2))+1e-3)[:,None])>threshold
    uzsim_t = zero_mask[:,None,:]*support
    
    # Remove parties below an electoral_district specific threshold
    zero_mask = (support/(support.sum(axis=(2))+1e-3)[:,:,None])>ed_threshold
    uzsim_t = zero_mask[:,:,:]*uzsim_t

    # Districts with quotas, then country-level compensation (Estonian system)
    if quotas:
        quotas = (support.sum(axis=-1)+1e-3)/nmandates[None,:]
        v, r = np.divmod(uzsim_t/quotas[:,:,None],1.0)
        dmandates = v+(r>=first_quota_coef)
    
        # Calculate votes and mandates for each party
        pvotes = uzsim_t.sum(axis=1)
        pmand = dmandates.sum(axis=1)

        # Calculate compensation votes using dHondt
        if body_size is None: body_size = sum(nmandates)
        remaining_mand = body_size - pmand.sum(axis=1)
        comp_mandates = dhondt(pvotes, remaining_mand, dh_power, pmand)
        
        # Return the districts + compensation results
        return np.concatenate( [dmandates,comp_mandates[:,None,:]],axis=1 )
    
    else: # Separate election in each district (Croatian system)
        
        return np.stack([ 
            dhondt(uzsim_t[:,i,:],nmandates[i],dh_power)
            for i in range(support.shape[1]) ],axis=1)

In [5]:
#| export

# Basic wrapper around simulate elections that goes from dataframe to dataframe
def simulate_election_e2e(sdf, parties, mandates_dict, ed_col='electoral_district', **kwargs):
    
    # Convert data frame to a numpy tensor for fast vectorized processing
    parties = [ p for p in parties if p in sdf.columns ]
    ed_df = sdf.groupby(['draw',ed_col])[parties].sum()
    districts = list(sdf.electoral_district.unique())
    support = ed_df.reset_index(drop=True).to_numpy().reshape( (-1,len(districts),len(parties)) )    
    nmandates = np.array([ mandates_dict[d] for d in districts ])
    
    edt = simulate_election(support, nmandates, **kwargs)
    
    if edt.shape[1]>support.shape[1]: districts = districts + ['Compensation']
    
    # Shape it back into a data frame
    eddf = pd.DataFrame( edt.reshape( (-1,) ), columns=['mandates'], dtype='int')
    eddf.loc[:, ['draw', ed_col, 'party']] = np.array(tuple(it.product( range(edt.shape[0]), districts, parties )))
    return eddf

In [7]:
# Test simulation
from salk_toolkit.io import read_annotated_data, extract_column_meta

df, meta = read_annotated_data('../samples/mrp.parquet')
cmeta = extract_column_meta(meta)
edm = cmeta['electoral_district']

#electoral_system = edm['electoral_system']
electoral_system = {
    "body_size": 101,
    "threshold": 0.05,
    "first_quota_coef": 0.75,
    "dh_power": 0.9,
    "max_comp": 20
}

sdf = simulate_election_e2e(df,cmeta['party_preference']['categories'],edm['mandates'],**electoral_system)
assert (sdf.groupby('draw')['mandates'].sum() == electoral_system['body_size']).all()

  ed_df = sdf.groupby(['draw',ed_col])[parties].sum()


In [8]:
adf = sdf.groupby(['draw','party'])['mandates'].sum().reset_index()
adf = adf[adf['mandates']>0]
ddf = adf.groupby('party')['mandates'].value_counts().rename('count').reset_index()

# Individual parties
alt.Chart(
        ddf,
        #title=var
    ).mark_bar(opacity=0.5, stroke='black', strokeWidth=0.5).encode(
        alt.X('mandates:Q', title="Mandates", axis=alt.Axis(format='.2s'),scale=alt.Scale(domainMin=0)),
        alt.Y('count:Q', title=None, axis=None),
        alt.Row('party:N', title=None),
        #color=alt.Color('party:N', legend=None, scale=erakonnad_palett),
        tooltip=[alt.Tooltip('mandates:Q', format=',d')]
    ).properties(height=60)

In [9]:
coalition = ['Isamaa','EKRE']
adf = sdf.groupby(['draw','party'])['mandates'].sum().reset_index()
adf = adf[(adf['mandates']>0) & adf['party'].isin(coalition)]
ddf = adf.groupby('draw')['mandates'].sum().value_counts().rename('count').reset_index()

n = 51

k_plot = alt.Chart(ddf).mark_bar(opacity=0.8, color='grey').encode(
    x=alt.X('mandates:Q', title='Mandates'),
    y=alt.Y('count:Q', title=None, axis=None),
).properties(height=200)

rule=alt.Chart(pd.DataFrame({'x': [n]})).mark_rule(color='red', size=1.25, strokeDash=[5, 2]).encode(x='x')

(k_plot+rule).configure_view(strokeWidth=0)

In [10]:
#| export

def simulate_election_pp(data, mandates, electoral_system, cat_col, value_col, factor_col, cat_order, factor_order):
    # Reshape input to (draws,electoral_districts,parties)
    draws = data.draw.unique()
    pdf = data.pivot(index=['draw',factor_col], columns=cat_col, values=value_col).reset_index()
    ded = pd.DataFrame(list(it.product(draws,factor_order)),columns=['draw',factor_col])
    sdata = ded.merge(pdf,on=['draw',factor_col]).loc[:,cat_order].fillna(0).to_numpy().reshape( (len(draws),len(factor_order),len(cat_order)) )
    
    # Run the actual electoral simulation
    nmandates = np.array([ mandates[d] for d in factor_order ])
    edt = simulate_election(sdata,nmandates,**electoral_system)
    if edt.shape[1]>sdata.shape[1]: factor_order = factor_order+['Compensation']
    
    # Shape it back into a data frame
    df = pd.DataFrame( edt.reshape( (-1,) ), columns=['mandates'])
    df.loc[:, ['draw',factor_col, cat_col]] = np.array(tuple(it.product( draws, factor_order, cat_order )))
    
    return df

In [11]:
#| export

# This fits into the pp framework as: f0['col']=party_pref, factor=electoral_district, hence the as_is and hidden flags
@stk_plot('coalition_applet', data_format='longform', draws=True, requires_factor=True, agg_fn='sum', factor_meta=['mandates','electoral_system'], as_is=True, n_facets=(2,2))#, hidden=True)
def coalition_applet(data, mandates, electoral_system, value_col='value', facets=[], width=None, alt_properties={}, outer_factors=[], translate=None):
    
    f0, f1 = facets[0], facets[1]
    tf = translate if translate else (lambda s: s)
    
    if outer_factors: raise Exception("This plot does not work with extra factors")
    
    sdf = simulate_election_pp(data, mandates, electoral_system, f0['col'], value_col, f1['col'], f0['order'], f1['order'])

    # Aggregate to total mandate counts
    adf = sdf.groupby(['draw',f0['col']])['mandates'].sum().reset_index()
    adf = adf[adf['mandates']>0]

    parties = list(adf[f0['col']].unique()) # Leave only parties that have mandates

    coalition = st.multiselect(tf('Select the coalition:'),
        f0["order"],
        help=tf('Choose the parties whose coalition to model'))

    st.markdown("""___""")

    col1, col2 = st.columns((9, 9), gap='large')
    col1.markdown(tf('**Party mandate distributions**'))

    # Individual parties plot
    ddf = adf.groupby(f0['col'])['mandates'].value_counts().rename('count').reset_index()
    p_plot = alt.Chart(
            ddf,
            #title=var
        ).mark_bar(opacity=0.5, stroke='black', strokeWidth=0, size=20).encode(
            alt.X('mandates:Q', title="Mandates", axis=alt.Axis(tickMinStep=1),scale=alt.Scale(domainMin=0)),
            alt.Y('count:Q', title=None, axis=None),
            alt.Row(f'{f0["col"]}:N', title=None),
            color=alt.Color(f'{f0["col"]}:N', legend=None, scale=f0["colors"]),
            tooltip=[alt.Tooltip('mandates:Q', format=',d')]
        ).properties(height=60)
    col1.altair_chart(p_plot, use_container_width=True)

    total_mandates = sum(mandates.values())

    col2.markdown(tf('**Coalition simulation**'))
    n = col2.number_input(tf('Choose mandate cutoff:'), min_value=0, max_value=total_mandates, value=(total_mandates//2) + 1, step=1, help='...')

    if len(coalition)>0:
        # Coalition plot
        acdf = adf[adf[f0['col']].isin(coalition)]
        cdf = acdf.groupby('draw')['mandates'].sum().value_counts().rename('count').reset_index()

        mi, ma = min(cdf['mandates'].min(),n), max(cdf['mandates'].max(),n)
        tick_count = (ma-mi+1) # This is the only way to enforce integer ticks as tickMinStep seems to not do it sometimes
        k_plot = alt.Chart(cdf).mark_bar(color='#ff2b2b',size=20).encode(
            x=alt.X('mandates:Q', title='Mandates', scale=alt.Scale(round=True), axis=alt.Axis(tickMinStep=1,tickCount=tick_count)),
            y=alt.Y('count:Q', title=None, stack=None, axis=None),
        ).properties(height=200,width=300)
        rule = alt.Chart(pd.DataFrame({'x': [n]})).mark_rule(color='red', size=1.25, strokeDash=[5, 2]).encode(x='x')
        col2.altair_chart((k_plot+rule).configure_view(strokeWidth=0), use_container_width=True)

        col2.write(tf("Probability of at least  **{0:.0f}** mandates: **{1:.1%}**").format(n, (cdf['mandates'] > n-1).mean()))
        #col3.write('Distributsiooni mediaan: **{:d}**'.format(int((d_dist[koalitsioon].sum(1)).median())))
        #m, l, h = hdi(sim_data['riigikogu'][koalitsioon], 0.9)
        #col2.write('Distributsiooni mediaan on **{:.0f}** mandaati. 90% tõenäosusega jääb mandaatide arv **{:.0f}** ning **{:.0f}** vahele.'.format(m, l, h))
        
    return None

register_stk_cont_version('coalition_applet')

<function salk_toolkit.plots.register_stk_cont_version.<locals>.cont(*args, **kwargs)>

In [12]:
#| export

# This fits into the pp framework as: f0['col']=party_pref, factor=electoral_district, hence the as_is and hidden flags
@stk_plot('mandate_plot', data_format='longform', draws=True, requires_factor=True, agg_fn='sum', n_facets=(2,2), factor_meta=['mandates','electoral_system'], as_is=True)#, hidden=True)
def mandate_plot(data, mandates, electoral_system, value_col='value', facets=[], width=None, alt_properties={}, outer_factors=[]):
    f0, f1 = facets[0], facets[1]
    
    if outer_factors: raise Exception("This plot does not work with extra factors")
    
    df = simulate_election_pp(data, mandates, electoral_system, f0['col'], value_col, f1['col'], f0['order'], f1['order'])
    
    # Shape it into % values for each vote count
    maxv = df['mandates'].max()
    tv = np.arange(1,maxv+1,dtype='int')[None,:]
    dfv = df['mandates'].to_numpy()[:,None]
    dfm = pd.DataFrame((dfv>=tv).astype('int'),columns=tv[0], index=df.index)
    dfm['draw'],dfm[f0['col']], dfm[f1['col']] = df['draw'], df[f0['col']], df[f1['col']]
    res = dfm.groupby([f0['col'],f1['col']],observed=True)[tv[0]].mean().reset_index().melt(id_vars=[f0['col'],f1['col']],
                                                                                var_name='mandates',value_name='percent')
    # Remove parties who have no chance of even one elector
    eliminate = (res.groupby(f0['col'],observed=True)[value_col].sum() < 0.2)
    el_cols = [i for i,v in eliminate.items() if v]
    res = res[~res[f0['col']].isin(el_cols)]
    cat_order = list(eliminate[~eliminate].index)
    
    f_width = max(50,width/len(cat_order))

    plot = alt.Chart(data=res).mark_bar().encode(
        x=alt.X('mandates',title=None),
        y=alt.Y(value_col,title=None,axis=alt.Axis(format='%')),
        color=alt.Color(f'{f0["col"]}:N', scale=f0["colors"], legend=None),
        tooltip=[
            alt.Tooltip(f0['col'], title='party'),
            alt.Tooltip(f1['col']),
            alt.Tooltip('mandates'),
            alt.Tooltip(value_col, format='.1%', title='probability'),
            ]
    ).properties(
        width=f_width,
        height=f_width//2,
        **alt_properties
        #title="Ringkonna- ja kompensatsioonimandaatide tõenäolised jaotused"
    ).facet(
        #header=alt.Header(labelAngle=-90),
        row=alt.X(
            f'{f1["col"]}:N',
            sort=f1["order"]+['Compensation'],
            title=None,
            header=alt.Header(labelOrient='top')
            ),
        column=alt.Y(
            f'{f0["col"]}:N',
            sort=cat_order,
            title=None,
            header=alt.Header(labelFontWeight='bold')
            ),
    )
    return plot

register_stk_cont_version('mandate_plot')

<function salk_toolkit.plots.register_stk_cont_version.<locals>.cont(*args, **kwargs)>

In [13]:
# Test the plot - categorical party_preference version
from salk_toolkit.pp import e2e_plot
e2e_plot({
    'res_col' : 'party_preference',
    'factor_cols': ['electoral_district'],
    'internal_facet': True,
    'plot': 'mandate_plot',
    #'filter': { 'party_preference': ['Keskerakond', 'EKRE',
    #   'Reformierakond', 'Isamaa', 'SDE', 'Rohelised', 'Eesti 200',
    #   'Parempoolsed'] },
}, '../samples/m_bootstrap.parquet', width=800, lazy=False)

[{'col': 'party_preference', 'order': ['Keskerakond', 'EKRE', 'Reformierakond', 'Isamaa', 'SDE', 'Rohelised', 'Eesti 200', 'Parempoolsed', 'Other', 'None of the parties', 'No opinion'], 'colors': Scale({
  domain: ['Keskerakond', 'EKRE', 'Reformierakond', 'Isamaa', 'SDE', 'Rohelised', 'Eesti 200', 'Parempoolsed', 'Other', 'None of the parties', 'No opinion'],
  range: ['#007557', '#8B4513', '#FFE200', '#009BDF', '#E10600', '#88AF47', '#31758A', 'orange', 'lightgrey', 'grey', 'lightgrey']
})}, {'col': 'electoral_district', 'order': ['Haabersti, Põhja-Tallinn ja Kristiine', 'Harju- ja Raplamaa', 'Hiiu-, Lääne- ja Saaremaa', 'Ida-Virumaa', 'Järva- ja Viljandimaa', 'Jõgeva- ja Tartumaa', 'Kesklinn, Lasnamäe ja Pirita', 'Lääne-Virumaa', 'Mustamäe ja Nõmme', 'Pärnumaa', 'Tartu linn', 'Võru-, Valga- ja Põlvamaa'], 'colors': Undefined}]


In [18]:
# Test the plot - spread out (MRP) version
from salk_toolkit.pp import e2e_plot
e2e_plot({
    'res_col' : 'party_preference_dist',
    'factor_cols': ['electoral_district'],
    'internal_facet': True,
    'plot': 'mandate_plot-cont',
    #'agg_fn': 'sum'
}, '../samples/mrp.parquet', width=800)

[{'col': 'party_preference', 'order': ['Keskerakond', 'EKRE', 'Reformierakond', 'Isamaa', 'SDE', 'Rohelised', 'Eesti 200', 'Parempoolsed', 'Other', 'None of the parties', 'No opinion'], 'colors': Scale({
  domain: ['Keskerakond', 'EKRE', 'Reformierakond', 'Isamaa', 'SDE', 'Rohelised', 'Eesti 200', 'Parempoolsed', 'Other', 'None of the parties', 'No opinion'],
  range: ['#007557', '#8B4513', '#FFE200', '#009BDF', '#E10600', '#88AF47', '#31758A', 'orange', 'lightgrey', 'grey', 'lightgrey']
})}, {'col': 'electoral_district', 'order': ['Haabersti, Põhja-Tallinn ja Kristiine', 'Harju- ja Raplamaa', 'Hiiu-, Lääne- ja Saaremaa', 'Ida-Virumaa', 'Järva- ja Viljandimaa', 'Jõgeva- ja Tartumaa', 'Kesklinn, Lasnamäe ja Pirita', 'Lääne-Virumaa', 'Mustamäe ja Nõmme', 'Pärnumaa', 'Tartu linn', 'Võru-, Valga- ja Põlvamaa'], 'colors': Undefined}]


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()