# Errores de codificación a nivel de mesa electoral

Busca mesas con resultados sospechosos en municipios con más de 200.000 votos válidos.

In [5]:
%reload_ext autoreload
%autoreload 2

In [48]:
import glob
import pandas as pd
from common_functions import get_parties, get_election_results, process_election, find_suspicious, find_suspicious_2

Estos archivos provienen de http://www.infoelectoral.mir.es/infoelectoral/min/areaDescarga.html. Para acceder a ellos hay que ir al final de la página, hasta la sección *Extracción de datos*, y luego elegir los que nos interesen. Los que empiezan por *02* son elecciones municipales, *04* generales, etc. Dentro de cada .zip hay un archivo DOCX / RTF con instrucciones sobre los distintos códigos.

In [7]:
datafiles = sorted(glob.glob("/home/chema/Dropbox/data/elecciones/*.zip"))
for x in enumerate(datafiles):
    print(x)

(0, '/home/chema/Dropbox/data/elecciones/02201512_MESA.zip')
(1, '/home/chema/Dropbox/data/elecciones/02201606_MESA.zip')
(2, '/home/chema/Dropbox/data/elecciones/04201105_MESA.zip')
(3, '/home/chema/Dropbox/data/elecciones/04201505_MESA.zip')


Vamos a buscar municipios que tuvieran más de 200.000 votos válidos.

In [20]:
file_idx = 2
eldata = process_election(datafiles[file_idx], filter_prov_code = None, filter_town_code = None)
town_totals = eldata.groupby(['prov_code', 'town_code'])\
    .agg({'votes': 'sum'})\
    .sort_values('votes', ascending = False)\
    .reset_index()
town_totals = town_totals.loc[town_totals['votes'] > 100000, :]
print(town_totals.shape)

(20, 3)


In [31]:
town_names = pd.read_csv("/home/chema/Dropbox/data/elecciones/11codmun.csv", 
                         dtype = {'CPRO': str, 'CMUN': str})
town_names.columns = ['prov_code', 'town_code', 'dc', 'name']
town_totals = pd.merge(town_totals, town_names)
display(town_totals)

Unnamed: 0,prov_code,town_code,votes,dc,name
0,28,79,1480082,6,Madrid
1,8,19,578876,3,Barcelona
2,46,250,388595,8,Valencia
3,41,91,327372,7,Sevilla
4,50,297,306935,3,Zaragoza
5,29,67,225845,2,Málaga
6,30,30,198146,8,Murcia
7,48,20,165285,9,Bilbao
8,47,186,164165,8,Valladolid
9,14,21,158779,4,Córdoba


In [42]:
for row in town_totals.itertuples():
    prov_code = row.prov_code
    town_code = row.town_code
    print(f"Processing {row.name}...")
    df = eldata.loc[(eldata['prov_code'] == prov_code) & (eldata['town_code'] == town_code), :]
    suspicious_df, total_boxes = find_suspicious(df)
    print(f"{suspicious_df.shape[0]} suspicious ballot boxes out of a total of {total_boxes}")

Processing Madrid...
12 suspicious ballot boxes out of a total of 3347
Processing Barcelona...
2 suspicious ballot boxes out of a total of 1232
Processing Valencia...
1 suspicious ballot boxes out of a total of 920
Processing Sevilla...
1 suspicious ballot boxes out of a total of 938
Processing Zaragoza...
0 suspicious ballot boxes out of a total of 971
Processing Málaga...
1 suspicious ballot boxes out of a total of 715
Processing Murcia...
1 suspicious ballot boxes out of a total of 501
Processing Bilbao...
1 suspicious ballot boxes out of a total of 407
Processing Valladolid...
2 suspicious ballot boxes out of a total of 433
Processing Córdoba...
2 suspicious ballot boxes out of a total of 413
Processing Palmas de Gran Canaria, Las...
3 suspicious ballot boxes out of a total of 539
Processing Gijón...
1 suspicious ballot boxes out of a total of 375
Processing Alicante/Alacant...
2 suspicious ballot boxes out of a total of 467
Processing Vigo...
0 suspicious ballot boxes out of a tot

In [49]:
# Second method
for row in town_totals.itertuples():
    prov_code = row.prov_code
    town_code = row.town_code
    print(f"Processing {row.name}...")
    df = eldata.loc[(eldata['prov_code'] == prov_code) & (eldata['town_code'] == town_code), :]
    suspicious_df, total_boxes = find_suspicious_2(df)
    print(f"{suspicious_df.shape[0]} suspicious ballot boxes out of a total of {total_boxes}")

Processing Madrid...
     dist_code section_code table_code  bad_counts
1735        11          022          A           5
507         04          053          U           5
435         03          097          A           5
393         03          070          U           5
2869        17          026          U           5
3347 suspicious ballot boxes out of a total of 3347
Processing Barcelona...
    dist_code section_code table_code  bad_counts
336        03          046          U           5
133        02          054          A           5
583        05          094          B           5
119        02          045          U           5
415        04          002          B           5
1232 suspicious ballot boxes out of a total of 1232
Processing Valencia...
    dist_code section_code table_code  bad_counts
371        12          042          B           7
379        13          005          A           7
133        06          004          U           7
132        06         

In [51]:
dist_code = '11'
section_code = '022'
table_code = 'A'
summary = eldata.loc[(eldata['section_code'] == section_code) \
                      & (eldata['dist_code'] == dist_code) \
                      & (eldata['table_code'] == table_code), :]\
    .sort_values('votes', ascending = False)
display(summary)

Unnamed: 0,dist_code,party_code,party_name,prov_code,section_code,table_code,town_code,votes
389191,11,123076,PARTIDO POPULAR,28,22,A,79,158
389192,11,123186,PARTIDO SOCIALISTA OBRERO ESPAÑOL,28,22,A,79,99
389181,11,122515,IZQUIERDA UNIDA-LOS VERDES,28,22,A,79,56
389197,11,123780,UNION PROGRESO Y DEMOCRACIA,28,22,A,79,37
389175,11,121182,CIUDADANOS EN BLANCO,28,22,A,79,4
389198,11,123960,FAMILIA Y VIDA,28,22,A,79,2
389185,11,122895,PARTIDO COMUNISTA DE LOS PUEBLOS DE ESPAÑA,28,22,A,79,2
389177,11,121446,ECOLO VERDES,28,22,A,79,2
389182,11,122620,LA FALANGE,28,22,A,79,2
389193,11,123438,POR UN MUNDO MAS JUSTO,28,22,A,79,1
