In [1]:
import arviz as az
import bokeh.plotting as bkp  
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle

from bokeh.io import output_notebook, show
from bokeh.layouts import column, gridplot
from bokeh.models import GeoJSONDataSource, HoverTool, LinearColorMapper
from bokeh.models.annotations import Title
from bokeh.models.widgets import Select
from bokeh.palettes import brewer
from bokeh.transform import linear_cmap

output_notebook()
BINS = np.array([15., 25., 35., 45., 55., 65., 75.])
COLORS = {
    "farleft": np.array(brewer["Reds"][7][::-1]),
    "left": np.array(brewer["PuRd"][7][::-1]),
    "green": np.array(brewer["Greens"][7][::-1]),
    "center": np.array(brewer["Oranges"][7][::-1]),
    "right": np.array(brewer["Blues"][7][::-1]),
    "farright": np.array(brewer["Purples"][7][::-1]),
    "other": np.array(brewer["Greys"][7][::-1]),
}
PARTIES = ["farleft", "left", "green", "center", "right", "farright", "other"]
PARTIES_AGG = [
    "farleft_agg",
    "left_agg",
    "green_agg",
    "center_agg",
    "right_agg",
    "farright_agg",
]

In [2]:
az.style.use("arviz-darkgrid")


def get_top_n(df: pd.DataFrame, nlargest: int = 3) -> pd.DataFrame:

    order = np.argsort(-df.values, axis=1)[:, :nlargest]
    # the "-" is a common trick to argsort in decreasing order

    return pd.DataFrame(
        df.columns[order].values,
        columns=[f"top{i}" for i in range(1, nlargest + 1)],
        index=df.index,
    )

In [3]:
d = pd.read_excel("../data/results_by_districts_paris.xlsx", index_col=0)
# a party that wasn't there won 0 ballots:
d[PARTIES] = d[PARTIES].fillna(0).astype(int)
district_id, districts = d.arrondissement.factorize(sort=True)

with open("trace_dos_effects.pkl", "rb") as f:
    data = pickle.load(f)
m_dos_effects, trace_dos_effects = data["model"], data["trace"]
idata = az.from_pymc3(trace_dos_effects)
post = idata.posterior

In [4]:
district = np.random.randint(0, 20)

election = np.random.choice(d.date.unique(), replace=False)
election_mask = (d.date == election).to_numpy()

In [5]:
election_est = post.sel(p_district_dim_0=election_mask.nonzero()[0])
election_est["p_district"] = election_est["p_district"] * 100

preds = election_est["p_district"].stack(samples=('chain', 'draw'), districts=("p_district_dim_0", "p_district_dim_1")).to_pandas().reset_index(drop=True)
preds.columns = preds.columns.set_levels([districts, PARTIES])
preds.columns = preds.columns.rename(["districts", "parties"])

In [12]:
win_summary = {}
for dis in districts:
    df = preds[dis]
    winners = get_top_n(df, 1)
    
    probs = pd.DataFrame(winners.top1.value_counts(normalize=True).multiply(100).round().astype(int))
    probs.index.name = "winner"
    probs.columns = ["odds"]
    probs["low"], probs["high"], probs["mean"] = np.nan, np.nan, np.nan
    
    for party in probs.index:
        probs.loc[party, "low"], probs.loc[party, "high"] = az.hpd(df[party])
        probs.loc[party, "mean"] = df[party].mean()
        # assign means to color bins:
        bins_idx = np.digitize(probs.loc[party, "mean"], BINS)
        probs.loc[party, "color"] = COLORS[party][bins_idx]
    win_summary[dis] = probs.reset_index()

win_summary = pd.concat(win_summary).reset_index(level=1)
# keep only most probable winner:
win_summary = win_summary[win_summary.level_1 == 0].drop("level_1", axis=1)

Unnamed: 0,winner,odds,low,high,mean,color
1,right,100,38.935032,41.958096,40.505807,#6baed6
2,left,100,37.28983,39.965571,38.648445,#df65b0
3,left,100,41.84752,44.018305,42.888641,#df65b0
4,left,100,38.439121,40.81145,39.609831,#df65b0
5,left,100,40.809962,42.422721,41.60897,#df65b0


In [13]:
paris_shape = gpd.read_file("../data/paris_shape.json").sort_values("code").reset_index(drop=True).set_index(districts)
paris_shape = pd.concat([paris_shape, win_summary], axis=1).reset_index()
paris_shape["winner"] = paris_shape["winner"].str.title()
paris_shape[["low", "high", "mean"]] = paris_shape[["low", "high", "mean"]].round(1)
geosource = GeoJSONDataSource(geojson=paris_shape.to_json()) 

Unnamed: 0,index,code,nom,geometry,winner,odds,low,high,mean,color
0,1,75101,Paris 1er Arrondissement,"POLYGON ((2.32576 48.86955, 2.32787 48.86986, ...",Right,100,38.9,42.0,40.5,#6baed6
1,2,75102,Paris 2e Arrondissement,"POLYGON ((2.35084 48.86334, 2.32787 48.86986, ...",Left,100,37.3,40.0,38.6,#df65b0
2,3,75103,Paris 3e Arrondissement,"POLYGON ((2.35009 48.86195, 2.35084 48.86334, ...",Left,100,41.8,44.0,42.9,#df65b0
3,4,75104,Paris 4e Arrondissement,"POLYGON ((2.34456 48.85399, 2.35009 48.86195, ...",Left,100,38.4,40.8,39.6,#df65b0
4,5,75105,Paris 5e Arrondissement,"POLYGON ((2.34456 48.85399, 2.36432 48.84617, ...",Left,100,40.8,42.4,41.6,#df65b0


In [14]:
p = bkp.figure(title = f'Predictions by district for {pd.to_datetime(election).date()} election',
               align="center",
               plot_height = 350,
               plot_width = 600,
               sizing_mode="scale_both", 
               title_location="above",
               tools = "",
               toolbar_location=None,
              )
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
p.axis.visible = False
p.outline_line_color = None
p.title.align = "center"
p.title.text_font_size = "1.2rem"
p.background_fill_color = '#e6e6e6'
p.border_fill_color = '#e6e6e6'

district_shapes = p.patches('xs','ys', source=geosource, fill_color="color", fill_alpha=1,
                   line_color = "gray", 
                   line_width = 0.75)
p.add_tools(HoverTool(renderers = [district_shapes],
                      tooltips = [("", 'Paris, District @index'), 
                                  ("Winner", '@winner, with @odds% chance'),
                                  ("Mean", "@mean{1.1} %"),
                                  ("94% HPD", '[@low{1.1} - @high{1.1}] % of votes')]))
show(p)