# Parsing Election Results 

Original datafile from http://enr.elections.ca/National.aspx?lang=e

In [30]:
import pandas as pd
import numpy as np

In [64]:
df = pd.read_csv('../data/final_results.txt', engine='python', sep='\t', header=1, index_col=None,
                 skipfooter=3)

In [65]:
df.columns

Index([u'Electoral district number - Numéro de la circonscription', u'Electoral district name', u'Nom de la circonscription', u'Type of results*', u'Type de résultats**', u'Surname - Nom de famille', u'Middle name(s) - Autre(s) prénom(s)', u'Given name - Prénom', u'Political affiliation', u'Appartenance politique', u'Votes obtained - Votes obtenus', u'% Votes obtained - Votes obtenus %', u'Rejected ballots - Bulletins rejetés***', u'Total number of ballots cast - Nombre total de votes déposés'], dtype='object')

In [66]:
len(df)/338

5

In [67]:
df['FEDNUM'] = df['Electoral district number - Numéro de la circonscription']
del df['Electoral district number - Numéro de la circonscription']

In [68]:
party_dict = {'Bloc Qu\xc3\xa9b\xc3\xa9cois': 'blc', 'Liberal': 'lib', 'Green Party': 'grn',
              'Conservative': 'con', 'NDP-New Democratic Party': 'ndp'}

In [69]:
for p in set(df['Political affiliation']):
    if p not in party_dict:
        party_dict[p] = 'oth'

In [70]:
def party(p): return party_dict[p]

In [71]:
df['party_short'] = df['Political affiliation'].apply(party)

In [98]:
cols = ['riding_name', 'con', 'lib', 'ndp', 'grn', 'blc', 'oth', 'elected', 'check_sum']
df_parsed = pd.DataFrame(columns=cols, index=set(df['FEDNUM']))
df_parsed.index.name = 'FEDNUM'

In [99]:
for i in df.index:
    
    i2 = df['FEDNUM'][i]
    if not isinstance(df_parsed['riding_name'][i2], str):
        df_parsed.ix[i2, 'riding_name'] = df['Electoral district name'][i]
    p = df['party_short'][i]
    
    if np.isnan(df_parsed.ix[i2, p]):
        df_parsed.ix[i2, p] = df['% Votes obtained - Votes obtenus %'][i]
    else:
        df_parsed.ix[i2, p] += df['% Votes obtained - Votes obtenus %'][i]

In [100]:
parties = set(party_dict.values())

In [101]:
for i in df_parsed.index:
    m = 0
    check_sum = 0
    for p in parties:
        if not np.isnan(df_parsed[p][i]):
            check_sum += df_parsed[p][i]
            if df_parsed[p][i] > m: 
                m = df_parsed[p][i]
                elected = p
    df_parsed.ix[i, 'elected'] = elected
    df_parsed.ix[i, 'check_sum'] = check_sum
    if elected == 'oth':
        print elected

In [106]:
df_parsed.head()

Unnamed: 0_level_0,riding_name,con,lib,ndp,grn,blc,oth,elected,check_sum
FEDNUM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
24064,Rosemont--La Petite-Patrie,4.3,20.7,49.2,3.1,21.0,1.7,ndp,100
24065,Marc-Aurèle-Fortin,11.8,41.6,23.3,1.9,21.4,,lib,100
24066,Saint-Hyacinthe--Bagot,16.8,27.5,28.5,2.3,24.4,0.5,ndp,100
24067,Saint-Jean,10.8,33.2,29.1,2.1,24.8,,lib,100
24068,Saint-Laurent,19.5,61.6,11.5,2.4,4.7,0.3,lib,100


In [104]:
set(df_parsed['check_sum'])

{99.800000000000011,
 99.899999999999977,
 99.899999999999991,
 99.900000000000006,
 99.999999999999986,
 100.0,
 100.00000000000001,
 100.09999999999999,
 100.10000000000001,
 100.10000000000002,
 100.19999999999999}

In [107]:
df_parsed.to_csv('../data/final_results_parsed.csv')