In [42]:
import requests
import json

In [3]:
BASE_URL = "https://resultados.gob.ar/backend-difu/scope/data/getScopeData/{desk_code}/1"

In [4]:
desk_code = "0201600047X"

In [8]:
response = requests.get(BASE_URL.format(desk_code = desk_code))

In [9]:
response.ok

True

In [10]:
content = response.json()

In [11]:
content['id']

{'idAmbito': {'level': 8.0,
  'codigo': '0201600047X',
  'scopeId': {'timestamp': 0.0,
   'counter': 5019782.0,
   'randomValue1': 0.0,
   'randomValue2': 12016.0}},
 'eleccionId': 1.0,
 'envio': 37.0}

In [12]:
content

{'id': {'idAmbito': {'level': 8.0,
   'codigo': '0201600047X',
   'scopeId': {'timestamp': 0.0,
    'counter': 5019782.0,
    'randomValue1': 0.0,
    'randomValue2': 12016.0}},
  'eleccionId': 1.0,
  'envio': 37.0},
 'date': 'Oct 22, 2023, 9:28:35 PM',
 'fathers': [{'level': 7.0,
   'name': 'COLEGIO SAN ROQUE',
   'codigo': '020160015529009',
   'scopeId': {'timestamp': 0.0,
    'counter': 10783217.0,
    'randomValue1': 183.0,
    'randomValue2': 23217.0}},
  {'level': 6.0,
   'name': '00155',
   'codigo': '0201600155',
   'scopeId': {'timestamp': 0.0,
    'counter': 2735636.0,
    'randomValue1': 0.0,
    'randomValue2': 120.0}},
  {'level': 5.0,
   'name': 'Campana',
   'codigo': '02016016',
   'scopeId': {'timestamp': 0.0,
    'counter': 3382949.0,
    'randomValue1': 0.0,
    'randomValue2': 1.0}},
  {'level': 4.0,
   'name': 'Campana',
   'codigo': '02016',
   'scopeId': {'timestamp': 0.0,
    'counter': 20164.0,
    'randomValue1': 0.0,
    'randomValue2': 0.0}},
  {'level': 3.

In [23]:
content.keys()

dict_keys(['id', 'date', 'fathers', 'cargos', 'showCargos', 'census', 'pollingCensus', 'numStation', 'pollingNumStation', 'pollingPercCensus', 'pollingPercStation', 'electores', 'sobres', 'nulos', 'percNulos', 'recurridos', 'percRecurridos', 'blancos', 'percBlancos', 'comando', 'percComando', 'impugnados', 'percImpugnados', 'totalVotos', 'afirmativos', 'percAfirmativos', 'abstencion', 'percAbstencion', 'valid', 'percValid', 'recImpCom', 'percRecImpCom', 'participation', 'isCircus', 'umbral', 'calcPerc', 'imagen', 'hayEmpates', 'partidos'])

In [261]:
def get_geo_info(content: dict) -> dict:
    record = {}
    for lvl in content['fathers']:
        record[f"lvl_{int(lvl['level'])}"] = lvl['name']
    return record

INTERESTING_KEYS = ['census', 'electores', 'sobres', 'nulos', 'recurridos', 'blancos', 'comando', 'impugnados', 'totalVotos', 'afirmativos', 'abstencion', 'valid']

def get_desk_data(content: dict) -> dict:
    return{key:value for key, value in content.items() if key in INTERESTING_KEYS}

ACRONYMS = {
    'UNION POR LA PATRIA': "UXP",
    'JUNTOS POR EL CAMBIO': "JXC",
    'LA LIBERTAD AVANZA': "LLA",
    'HACEMOS POR NUESTRO PAIS': "HXNP",
    'FRENTE DE IZQUIERDA Y DE TRABAJADORES - UNIDAD': "FIT"
}

def get_votes_per_party(content:dict) -> dict:
    record = {}
    for party in content['partidos']:
        record[ACRONYMS[party['name']]] = party['votos']
    return record


def process_desk_response(content:dict) -> dict:
    return {**get_geo_info(content), **get_desk_data(content), **get_votes_per_party(content)}



def request_desk_info(desk_code):
    response = requests.get(BASE_URL.format(desk_code = desk_code))
    if response.ok:
        content = response.json()
        with open(f'../data/raw/mesas/{desk_code}.json', 'w') as file:
            json.dump(content, file)
        return True, process_desk_response(response.json())
    else:
        return False, response.reason

def load_desk_into_df():
    desk_raw_dir = "../data/raw/mesas"
    records = []
    for desk_file in os.listdir(desk_raw_dir):
        if not desk_file.endswith("X.json"):
            continue
        with open(os.path.join(desk_raw_dir, desk_file), "r") as file:
            desk_json = json.load(file)
            record = process_desk_response(desk_json)
            record["id_mesa"] = desk_file.replace(".json", "")
            records.append(record)
    return pd.DataFrame(records)
            
    

In [262]:
df = load_desk_into_df()

In [39]:
process_desk_response(content)

{'lvl_7': 'COLEGIO SAN ROQUE',
 'lvl_6': '00155',
 'lvl_5': 'Campana',
 'lvl_4': 'Campana',
 'lvl_3': 'Sección Primera',
 'lvl_2': 'Buenos Aires',
 'lvl_1': 'Argentina',
 'census': 350.0,
 'electores': 253.0,
 'sobres': 253.0,
 'nulos': 3.0,
 'recurridos': 0.0,
 'blancos': 8.0,
 'comando': 0.0,
 'impugnados': 1.0,
 'totalVotos': 253.0,
 'afirmativos': 241.0,
 'abstencion': 97.0,
 'valid': 249.0,
 'UXP': 93.0,
 'JXC': 76.0,
 'LLA': 57.0,
 'HXNP': 9.0,
 'FIT': 6.0}

In [29]:
get_desk_data(content)

{'census': 350.0,
 'electores': 253.0,
 'sobres': 253.0,
 'nulos': 3.0,
 'recurridos': 0.0,
 'blancos': 8.0,
 'comando': 0.0,
 'impugnados': 1.0,
 'totalVotos': 253.0,
 'afirmativos': 241.0,
 'abstencion': 97.0,
 'valid': 249.0}

In [34]:
get_votes_per_party(content)

{'UXP': 93.0, 'JXC': 76.0, 'LLA': 57.0, 'HXNP': 9.0, 'FIT': 6.0}

In [57]:
records = []
broke = False
for province in range(1, 25):
    province_id = str(province).zfill(2)
    for district in range(1, 999):
        district_id = str(district).zfill(3)
        for desk in range(1, 99999):
            desk_id = str(desk).zfill(5)
            desk_code = f'{province_id}{district_id}{desk_id}X'
            ok, record = request_desk_info(desk_code)
            if ok:
                records.append(record)
            else:
                print(desk_code, record)
                break
        if desk_id == '00001':
            break
        
            

0100100516X Not Found
0100200001X Not Found
0200100047X Not Found
0200200031X Not Found
0200301308X Not Found
0200400848X Not Found
0200500062X Not Found
0200600178X Not Found
0200700727X Not Found
0200800122X Not Found
0200900095X Not Found
0201000078X Not Found
0201100788X Not Found
0201200236X Not Found
0201300097X Not Found
0201400115X Not Found
0201500079X Not Found
0201600249X Not Found
0201700150X Not Found
0201800040X Not Found
0201900063X Not Found
0202000033X Not Found
0202100042X Not Found
0202200087X Not Found
0202300026X Not Found
0202400066X Not Found
0202500048X Not Found
0202600061X Not Found
0202700152X Not Found
0202800107X Not Found
0202900132X Not Found
0203000108X Not Found
0203100182X Not Found
0203200047X Not Found
0203300076X Not Found
0203400168X Not Found
0203500566X Not Found
0203600688X Not Found
0203700097X Not Found
0203800963X Not Found
0203900115X Not Found
0204000030X Not Found
0204100049X Not Found
0204200051X Not Found
0204300012X Not Found
0204400031

In [263]:
df.shape

(104450, 25)

In [264]:
df[ACRONYMS.values()].sum().sort_values(ascending=False) / df.totalVotos.sum()

UXP     0.355953
LLA     0.290895
JXC     0.231277
HXNP    0.065847
FIT     0.026194
dtype: float64

In [267]:
df[ACRONYMS.values()].sum().sort_values(ascending=False)

UXP     9640838.0
LLA     7878772.0
JXC     6264045.0
HXNP    1783438.0
FIT      709442.0
dtype: float64

In [265]:
df.census.sum()

35387828.0

In [266]:
df.totalVotos.sum()

27084619.0

In [256]:
(df.electores != df.sobres).mean()

0.046624793262014706

In [150]:
df.groupby("lvl_2").lvl_1.count()

lvl_2
Buenos Aires                                             38074
Catamarca                                                 1058
Chaco                                                     2947
Chubut                                                    1416
Ciudad Autónoma de Buenos Aires                           7360
Corrientes                                                2760
Córdoba                                                   9097
Entre Ríos                                                 934
Formosa                                                   1484
Jujuy                                                     1825
La Pampa                                                   901
La Rioja                                                   955
Mendoza                                                   4357
Misiones                                                  2855
Neuquén                                                   1156
Río Negro                                        

In [119]:
df.to_csv("../data/first_iteration.csv", index = False)
df.head()

Unnamed: 0,lvl_7,lvl_6,lvl_5,lvl_4,lvl_2,lvl_1,census,electores,sobres,nulos,...,totalVotos,afirmativos,abstencion,valid,UXP,JXC,LLA,FIT,HXNP,lvl_3
0,ESC. Nº26 HIPOLITO YRIGOYEN,1,Comuna 1,Comuna 01,Ciudad Autónoma de Buenos Aires,Argentina,345.0,233.0,233.0,3.0,...,233.0,222.0,112.0,230.0,96.0,65.0,44.0,13.0,4.0,
1,ESC. Nº26 HIPOLITO YRIGOYEN,1,Comuna 1,Comuna 01,Ciudad Autónoma de Buenos Aires,Argentina,339.0,238.0,238.0,1.0,...,238.0,236.0,101.0,237.0,102.0,86.0,38.0,6.0,4.0,
2,ESC. Nº26 HIPOLITO YRIGOYEN,1,Comuna 1,Comuna 01,Ciudad Autónoma de Buenos Aires,Argentina,342.0,234.0,234.0,4.0,...,234.0,227.0,108.0,230.0,97.0,84.0,35.0,6.0,5.0,
3,ESC. Nº26 HIPOLITO YRIGOYEN,1,Comuna 1,Comuna 01,Ciudad Autónoma de Buenos Aires,Argentina,345.0,255.0,255.0,0.0,...,255.0,252.0,90.0,255.0,118.0,77.0,41.0,12.0,4.0,
4,ESC. Nº26 HIPOLITO YRIGOYEN,1,Comuna 1,Comuna 01,Ciudad Autónoma de Buenos Aires,Argentina,351.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,


In [66]:
import os


In [84]:
already_scrapped = [desk.replace('.json', '') for desk in os.listdir("../data/raw/mesas")]

In [85]:
len(already_scrapped)

79811

In [246]:
for province in list(range(24, 25)):
    province_id = str(province).zfill(2)
    desk = 510
    for district in range(5, 999):
        ok = True
        first_desk = desk
        district_id = str(district).zfill(3)
        print(f"{province_id=} {district_id=} {desk=}")
        while ok:
            desk_id = str(desk).zfill(5)
            desk_code = f'{province_id}{district_id}{desk_id}X'
            if desk_code in already_scrapped:
                desk += 1
                continue
            ok, record = request_desk_info(desk_code)
            if ok:
                records.append(record)
                desk += 1
            else:
                print(desk_code, record)
                break
            
        if first_desk == desk:
            break

province_id='24' district_id='005' desk=510
2400500531X Not Found
province_id='24' district_id='006' desk=531
2400600531X Not Found


In [233]:
salta = [desk.replace('.json', '') for desk in os.listdir("../data/raw/mesas") if desk.replace('.json', '').startswith('24')]

In [234]:
salta.sort()

In [235]:
salta[-10:]

['2400200483X',
 '2400200484X',
 '2400200485X',
 '2400200486X',
 '2400200487X',
 '2400200488X',
 '2400200489X',
 '2400200490X',
 '2400200491X',
 '2400200492X']