In [1]:
import os

import pandas as pd
import numpy as np

# Consolidate Multiple Files

### 1) Elezioni Camera e Senato 2018

In [2]:
data_dir = r'raw'
paths = ['camera_2018', 'senato_2018']

final_dfs = []

for p in paths:
    
    print(p)
    
    fullpath = os.path.join(data_dir, p)
    files = os.listdir(fullpath)
    
    temp_files = []
    
    for f in files:
        
        temp = pd.read_csv(os.path.join(fullpath,f), sep = ";")
        
        # Forward fill for multi-paty candidates
        temp = temp.fillna(method = 'ffill')
        temp['Elezione'] = p        
        temp = temp[['Ente', 'Candidato', 'Liste/Gruppi', 'Voti lista', 'Elezione']]
        
        temp_files.append(temp)
        
    # Concatenate temp files
    temp_elezioni = pd.concat(temp_files)
    
    final_dfs.append(temp_elezioni)

parlamento2018 = pd.concat(final_dfs)

camera_2018
senato_2018


### 2) Regionali 2015

In [3]:
data_dir = r'raw'
paths = ['regionali_2015']

final_dfs = []

for p in paths:
    
    print(p)
    
    fullpath = os.path.join(data_dir, p)
    files = os.listdir(fullpath)
    
    temp_files = []
    
    for f in files:
        
        temp = pd.read_csv(os.path.join(fullpath,f), sep = ";")
        cols = ['Ente', 'Candidato', 'Liste/Gruppi', 'Voti lista', 'Voti candidato']
        temp['Candidato'] = temp['Candidato'].fillna(method = 'ffill')
        temp = temp[cols]
        temp_files.append(temp)
        
    # Concatenate temp files
    temp_elezioni = pd.concat(temp_files)

regionali2015 = temp_elezioni.copy()

regionali_2015


### 3) Europee 2019

In [4]:
data_dir = r'raw'
paths = ['europee_2019']

final_dfs = []

for p in paths:
    
    print(p)
    
    fullpath = os.path.join(data_dir, p)
    files = os.listdir(fullpath)
    
    temp_files = []
    
    for f in files:
        
        temp = pd.read_csv(os.path.join(fullpath,f), sep = ";").reset_index()
        temp.columns = ['Ente', 'Liste/Gruppi', 'Voti', '1', '2']
        temp.drop(['1', '2'], 1, inplace = True)
        temp_files.append(temp)
        
europee2019 = pd.concat(temp_files)

europee_2019


# Analytics

### Functions

In [8]:
def get_comuni_campania_geo():

    geo_av = pd.read_html('http://www.dossier.net/utilities/coordinate-geografiche/provincia-avellino.htm')[0].iloc[1:]
    geo_av.columns = ['Ente', 'Latitudine', 'Longitudine']
    geo_av['Ente'] = [x.upper() for x in geo_av['Ente']]

    geo_bn = pd.read_html('http://www.dossier.net/utilities/coordinate-geografiche/provincia-benevento.htm')[0].iloc[1:]
    geo_bn.columns = ['Ente', 'Latitudine', 'Longitudine']
    geo_bn['Ente'] = [x.upper() for x in geo_bn['Ente']]

    geo_sa = pd.read_html('http://www.dossier.net/utilities/coordinate-geografiche/provincia-salerno.htm')[0].iloc[1:]
    geo_sa.columns = ['Ente', 'Latitudine', 'Longitudine']
    geo_sa['Ente'] = [x.upper() for x in geo_sa['Ente']]

    geo_na = pd.read_html('http://www.dossier.net/utilities/coordinate-geografiche/provincia-napoli.htm')[0].iloc[1:]
    geo_na.columns = ['Ente', 'Latitudine', 'Longitudine']
    geo_na['Ente'] = [x.upper() for x in geo_na['Ente']]

    geo_ce = pd.read_html('http://www.dossier.net/utilities/coordinate-geografiche/provincia-caserta.htm')[0].iloc[1:]
    geo_ce.columns = ['Ente', 'Latitudine', 'Longitudine']
    geo_ce['Ente'] = [x.upper() for x in geo_ce['Ente']]

    comuni_campania = pd.concat([geo_av, geo_bn, geo_sa, geo_na, geo_ce])
    
    return comuni_campania


def generate_pivot(df, columns, values, geo):
    
    pivot = pd.pivot_table(df, 
               index = 'Ente',
               columns = columns,
               values = values, 
               aggfunc = np.sum
              ).fillna(0)

    pivot['VOTI TOTALI'] = pivot.sum(axis = 1)

    new_cols_order = []

    for c in pivot.columns[:-1]:

        new_cols_order.append(c)
        new_col = '{}_PERC'.format(c)
        new_cols_order.append(new_col)

        pivot[new_col] = pivot[c] / pivot['VOTI TOTALI']

    pivot = pivot[new_cols_order].reset_index()
    
    pivot = pd.merge(pivot, geo, on = 'Ente', how = 'left')
    
    return pivot

### Analysis

In [6]:
geo_comuni_italiani =  get_comuni_campania_geo()

In [9]:
pivot_parlamento = generate_pivot(parlamento2018, 'Liste/Gruppi', 'Voti lista', geo_comuni_italiani)

In [11]:
pivot_regionali = generate_pivot(regionali2015, 'Liste/Gruppi', 'Voti lista', geo_comuni_italiani)

In [12]:
pivot_europee = generate_pivot(europee2019, 'Liste/Gruppi', 'Voti', geo_comuni_italiani)

### Output

In [18]:
with pd.ExcelWriter('output/dati_elettorali.xlsx') as writer:
    parlamento2018.to_excel(writer, sheet_name='parlamento2018', index = False)
    regionali2015.to_excel(writer, sheet_name='regionale2015', index = False)
    europee2019.to_excel(writer, sheet_name='europee2019', index = False)
    pivot_parlamento.to_excel(writer, sheet_name='pivot_parlamento', index = False)
    pivot_regionali.to_excel(writer, sheet_name='pivot_regionali', index = False)
    pivot_europee.to_excel(writer, sheet_name='pivot_europee', index = False)

# Heatmaps

In [19]:
import os
import folium
from folium.plugins import HeatMap

print(folium.__version__)

0.9.1


In [20]:
cols = ['Ente', 'POTERE AL POPOLO!', 'POTERE AL POPOLO!_PERC', 'Latitudine', 'Longitudine']

sub = pivot_parlamento[cols].dropna()
sub['Latitudine'] = sub['Latitudine'].astype(float)
sub['Longitudine'] = sub['Longitudine'].astype(float)

sub['Score'] = sub['POTERE AL POPOLO!'] * sub['POTERE AL POPOLO!_PERC']
sub['Score'] = sub['POTERE AL POPOLO!_PERC']

sub['Score'] = sub['POTERE AL POPOLO!'] * sub['POTERE AL POPOLO!_PERC']
sub.head()

Unnamed: 0,Ente,POTERE AL POPOLO!,POTERE AL POPOLO!_PERC,Latitudine,Longitudine,Score
4,ACERNO,17.0,0.006273,40.738759,15.057815,0.106642
5,ACERRA,615.0,0.012554,40.951722,14.377204,7.720453
6,AFRAGOLA,477.0,0.009241,40.921037,14.307217,4.407939
7,AGEROLA,73.0,0.009327,40.638759,14.53987,0.680848
8,AGROPOLI,254.0,0.013168,40.350583,14.989517,3.344704


In [21]:
sub_map = sub.copy()
sub_map.columns = ['Ente', 'Voti', 'Percentuale', 'Latitudine', 'Longitudine', 'Score']

In [22]:
m = folium.Map([sub['Latitudine'].mean(), 
                sub['Longitudine'].mean()],  
                zoom_start=8)

sub_map = sub.copy()
sub_map.columns = ['Ente', 'Voti', 'Percentuale', 'Latitudine', 'Longitudine', 'Score']

for p in sub_map.itertuples():
    
    lat = p.Latitudine
    lon = p.Longitudine
    ente = p.Ente
    score = p.Score
    perc = p.Percentuale
    voti = p.Voti
    
    pop = pd.DataFrame({'Comune': [ente], 'Voti': [voti], 'Percentuale': [perc], 'Score': [score] })
    html = pop.to_html(classes='table table-striped table-hover table-condensed table-responsive')
    
    if score > sub['Score'].mean():
    
        folium.CircleMarker(
        location=[lat, lon],
        radius=5,
        fill=True,
        popup=folium.Popup(html),
        color='green'
            ).add_to(m)
    
    else:
        folium.CircleMarker(
        location=[lat, lon],
        radius=5,
        fill=True,
        popup=folium.Popup(html),
        color='red'
            ).add_to(m)
        
    

In [307]:
# test = sub[sub['Score'] > sub['Score'].mean()].copy()

# test['ratio_tot'] = test['Score'] / test['Score'].sum()

# budget = 1000

# test['budget_share'] = test['ratio_tot'] * budget

In [309]:
# test.sort_values(by = 'budget_share', ascending = False).head(20)

Unnamed: 0,Ente,POTERE AL POPOLO!,POTERE AL POPOLO!_PERC,Latitudine,Longitudine,Score,ratio_tot,budget_share
295,NAPOLI,9898.0,0.02588,40.839997,14.252871,256.161912,0.279798,279.798226
397,SALERNO,2246.0,0.016796,40.677957,14.765912,37.722776,0.041203,41.203494
354,PORTICI,1272.0,0.023597,40.814051,14.339019,30.015472,0.032785,32.785029
139,CAVA DE' TIRRENI,1263.0,0.022876,40.70034,14.68595,28.892232,0.031558,31.558147
358,POZZUOLI,1446.0,0.019739,40.823745,14.121622,28.54259,0.031176,31.176244
545,VILLAMAINA,170.0,0.15625,40.970481,15.090944,26.5625,0.029013,29.013448
38,AVELLINO,1220.0,0.021588,40.915168,14.795488,26.336837,0.028767,28.766963
447,SANT'ANDREA DI CONZA,178.0,0.116875,40.845963,15.370648,20.803677,0.022723,22.723253
112,CASERTA,1221.0,0.016193,41.075415,14.332195,19.771903,0.021596,21.596276
247,MARANO DI NAPOLI,981.0,0.018262,40.898143,14.194782,17.914723,0.019568,19.567732
