In [6]:
%load_ext lab_black
%matplotlib inline

import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pymc3 as pm
import scipy as sp
import seaborn as sns
import theano.tensor as tt
import warnings

from scipy.special import expit as logistic
from scipy.special import softmax
from typing import Set

warnings.simplefilter(action="ignore", category=FutureWarning)

sns.set(context="notebook", font_scale=1.2, rc={"figure.figsize": (12, 5)})
plt.style.use(["seaborn-colorblind", "seaborn-darkgrid"])

RANDOM_SEED = 8927
np.random.seed(286)

PARTIES = ["farleft", "left", "green", "center", "right", "farright", "other"]
PARTIES_INT = [f"{p}_int" for p in PARTIES]

The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black


In [3]:
d = pd.read_excel("data/election_results_1st_round/munic2014-ardmnt.xlsx")
d["date"] = d["Date de l'export"].dt.normalize()  # only interested in the date
d["ville"], _, d["arrondissement"] = d["Libellé de la commune"].str.split().str
d["arrondissement"] = d["arrondissement"].astype(int)
d = d.sort_values(["ville", "arrondissement"])
d.head()

Unnamed: 0,Date de l'export,Code du département,Type de scrutin,Libellé du département,Code de la commune,Libellé de la commune,Inscrits,Abstentions,% Abs/Ins,Votants,...,Liste.10,Sièges / Elu.10,Sièges Secteur.10,Sièges CC.10,Voix.10,% Voix/Ins.10,% Voix/Exp.10,date,ville,arrondissement
0,2014-03-25 12:52:00,69,LI2,RHONE,123SR01,Lyon secteur 1,16482,6936,42.08,9546,...,,,,,,,,2014-03-25,Lyon,1
1,2014-03-25 12:52:00,69,LI2,RHONE,123SR02,Lyon secteur 2,16863,6658,39.48,10205,...,,,,,,,,2014-03-25,Lyon,2
2,2014-03-25 12:52:00,69,LI2,RHONE,123SR03,Lyon secteur 3,52133,22494,43.15,29639,...,,,,,,,,2014-03-25,Lyon,3
3,2014-03-25 12:52:00,69,LI2,RHONE,123SR04,Lyon secteur 4,22557,9096,40.32,13461,...,,,,,,,,2014-03-25,Lyon,4
4,2014-03-25 12:52:00,69,LI2,RHONE,123SR05,Lyon secteur 5,28373,11724,41.32,16649,...,,,,,,,,2014-03-25,Lyon,5


In [4]:
subset = ["date", "ville", "arrondissement", "Exprimés"]
for i, j in zip(
    d.filter(like="Code Nuance").columns, d.columns[d.columns.str.startswith("Voix")]
):
    subset.append(i)
    subset.append(j)
d = d[subset]
d.head()

Unnamed: 0,date,ville,arrondissement,Exprimés,Code Nuance,Voix,Code Nuance.1,Voix.1,Code Nuance.2,Voix.2,...,Code Nuance.6,Voix.6,Code Nuance.7,Voix.7,Code Nuance.8,Voix.8,Code Nuance.9,Voix.9,Code Nuance.10,Voix.10
0,2014-03-25,Lyon,1,9433,LEXG,86,LFG,3156,LSOC,2447,...,LFN,583.0,,,,,,,,
1,2014-03-25,Lyon,2,10055,LFG,487,LSOC,2737,LVEC,609,...,,,,,,,,,,
2,2014-03-25,Lyon,3,29134,LFG,1579,LSOC,11256,LVEC,2854,...,LFN,3603.0,,,,,,,,
3,2014-03-25,Lyon,4,13199,LEXG,123,LFG,1323,LSOC,4522,...,LDIV,375.0,LUD,3493.0,LFN,1131.0,,,,
4,2014-03-25,Lyon,5,16405,LEXG,154,LFG,752,LSOC,5954,...,LFN,1857.0,,,,,,,,


In [20]:
def extract_nuances(nuances_df: pd.DataFrame) -> Set[str]:
    """
    Extract the nuances competing in this election.
    From the dataframe of nuances, we check each column for each line. 
    If the cell is not empty and the nuance is not already counted, we add it to the set of nuances.
    """
    nuances_set = set()

    for _, line in nuances_df.iterrows():
        for col in nuances_df.columns:
            if pd.notnull(line[col]):
                nuances_set.update({line[col]})

    return nuances_set


def format_results(df: pd.DataFrame, nuances_set: Set[str]) -> pd.DataFrame:
    """
    Take the raw df, for each line switch the nuance's label to column name, 
    and match with the corresponding score of this party.
    Return a dataframe with the proper format.
    """
    res = {
        "date": df.date.values,
        "ville": df.ville.values,
        "arrondissement": df.arrondissement.values,
        "Exprimés": df["Exprimés"].values,
    }
    res.update({nuance: [] for nuance in nuances_set})

    # each line is an arrondissement:
    for _, line in df.iterrows():
        tempset = nuances_set.copy()
        nuances_lbls = df.filter(like="Code Nuance")
        scores_lbls = df.filter(like="Voix")

        # iterate over nuances in line:
        for n, s in zip(nuances_lbls, scores_lbls):
            name = line[n]
            score = line[s]
            if pd.notnull(name):
                # if 1st time we see this nuance in this line:
                if name in tempset:
                    res[name].append(score)
                    tempset.remove(name)
                # if we already saw this nuance in this line:
                else:
                    res[name][-1] += score
        # if nuance still in tempset after iteration, then it's not competing in this arrondissement:
        for nuance in tempset:
            res[nuance].append(np.nan)

    results = pd.DataFrame(data=res)

    return results

In [21]:
nuances_set = extract_nuances(d.filter(like="Code Nuance"))
d_ = format_results(d, nuances_set)

Unnamed: 0,date,ville,arrondissement,Exprimés,LEXG,LFN,LDVD,LPG,LUG,LUDI,LUD,LDIV,LVEC,LUMP,LDVG,LSOC,LFG
0,2014-03-25,Lyon,1,9433,86.0,583.0,,,,,1804.0,293.0,1064.0,,,2447.0,3156.0
1,2014-03-25,Lyon,2,10055,,1159.0,,,,,4738.0,325.0,609.0,,,2737.0,487.0
2,2014-03-25,Lyon,3,29134,,3603.0,,,,,8161.0,1681.0,2854.0,,,11256.0,1579.0
3,2014-03-25,Lyon,4,13199,123.0,1131.0,,,,,3493.0,630.0,1567.0,,410.0,4522.0,1323.0
4,2014-03-25,Lyon,5,16405,154.0,1857.0,,,,,5850.0,498.0,1340.0,,,5954.0,752.0
5,2014-03-25,Lyon,6,17920,,1867.0,,,,,8971.0,610.0,1110.0,,,4801.0,561.0
6,2014-03-25,Lyon,7,19902,227.0,2597.0,,,,,4746.0,900.0,2165.0,,,7724.0,1543.0
7,2014-03-25,Lyon,8,18543,317.0,3421.0,,,,,4303.0,586.0,1435.0,,,7473.0,1008.0
8,2014-03-25,Lyon,9,12220,199.0,1684.0,,,,,2701.0,448.0,921.0,,,5581.0,686.0
9,2014-03-25,Marseille,1,23480,84.0,3526.0,,,6331.0,,9063.0,594.0,,,1774.0,,2108.0


In [None]:
AFFILIATIONS = {'farleft': ['arthaud', 'besancenot', 'buffet', 'gluckstein', 'hue', 'laguiller', 'mélenchon', 'poutou',
                           'LFG', 'FG', 'FI', 'BC-FG', 'LXG', 'LEXG', 'EXG', 'BC-EXG', 'LCOP', 'LPC', 'LCOM', 'COM', 'BC-COM', 
                            'LUTTE OUVRIERE ET LCR', 'LUTTE OUVRIERE', "BOUGE L'EUROPE", 'PARTI DES TRAVAILL.', 'PCF'], 
                'left': ['hamon', 'hollande', 'jospin', 'royal', 'LUG', 'LSOC', 'LPS', 'SOC', 'BC-SOC', 'BC-UG', 'LGA',
                        'CONSTRUISONS NOTRE EUROPE', 'EUROPE SOLIDAIRE'], 
                'green': ['bové', 'cohn-bendit', 'joly', 'mamere', 'voynet', 'LVEC', 'LVE', 'LVEG', 'LEC', 'VEC', 'BC-VEC', 'LECO', 'ECO',
                         "L'ECOLOGIE, LES VERTS", 'ECOLOGIE  CHOIX DE LA VIE', 'UNION DES ECOLOG.', 'GENERATION ECOLOGIE'], 
                'center': ['balladur', 'bayrou', 'macron', 'REM', 'LUC', 'LCMD', 'MDM', 'LUDF', 'UDF', 'UDFD', "AVEC L'EUROPE", 
                           'ENERGIE RADICALE', 'AUTRE EUROPE'], 
                'right': ['chirac', 'fillon', 'sarkozy', 'LR', 'LUMP', 'LUD', 'LMAJ', 'UDI', 'BC-UDI', 'UMP', 'BC-UMP', 'BC-UD', 'LDR',
                          "L'UNION POUR L'EUROPE", 'UDF-RPR', 'RPR', 'M-NC', 'M'],
                'farright': ['le pen', 'LFN', 'FN', 'FRN', 'BC-FN', 'FRONT NATIONAL']}

In [None]:
def attribute_parties(election: str, df: pd.DataFrame) -> pd.DataFrame:
    
    for p in AFFILIATIONS.keys():
        intersection = list(set(df.columns) & set(AFFILIATIONS[p])) # quel candidat reprÃ©sente le parti cette annÃ©e?
        
        if ('mÃ©lenchon' in intersection) or ('FI' in intersection) or ('LFG' in intersection)\
            or ('FG' in intersection) or ('BC-FG' in intersection): # take only LFI for farleft, starting in 2012
            df = df.rename(columns={'mÃ©lenchon': p, 'FI': p, 'LFG': p, 'FG': p, 'BC-FG': p})
        else:
            if len(intersection) >= 2: # somme les candidats de mÃªme nuance, puis drop
                df[p] = df[intersection].sum(axis=1)
                df.drop(intersection, axis=1, inplace=True)
            
            elif len(intersection) == 1: # rename column of only candidate of this party
                df = df.rename(columns={intersection[0]: p})

    competing = list(set(AFFILIATIONS.keys()) & set(df.columns)) # quels partis sont prÃ©sents cette annÃ©e?
    df = df[competing]
    df['other'] = 100 - df.copy().sum(axis=1) # capte tout parti manquant
    df = pd.concat([df], keys=[election], names=['election'])
    
    return df.swaplevel().sort_index()

In [None]:
results = attribute_parties(f.split('.')[0], results)

In [None]:
# https://opendata.paris.fr/explore/dataset/bureaux-de-votes/table/
# https://fr.wikipedia.org/wiki/%C3%89lections_municipales_de_2008_%C3%A0_Paris#R%C3%A9sultats_par_arrondissement