In [1]:
import time
import json
from typing import List, Dict, Any
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.remote.webelement import WebElement

In [179]:
def senator_data_from(senator_row: WebElement) -> Dict[str, Any]:
    cells = senator_row.find_elements(By.TAG_NAME, "td")
    a_tag = cells[0].find_element(By.TAG_NAME, "a")
    senator_link = a_tag.get_attribute("href")
    image_url = a_tag.find_element(By.TAG_NAME, "img").get_attribute("src")

    senator_name = cells[1].text
    current_status = cells[2].text
    bloc = cells[3].text
    province = cells[4].text
    affirmative_votes = cells[5].text
    negative_votes = cells[6].text
    abstention_votes = cells[7].text
    missing_votes = cells[8].text

    return {
        "senator_link": senator_link,
        "image_url": image_url,
        "senator_name": senator_name,
        "current_status": current_status,
        "bloc": bloc,
        "province": province,
        "affirmative_votes": affirmative_votes,
        "negative_votes": negative_votes,
        "abstention_votes": abstention_votes,
        "missing_votes": missing_votes
    }

In [180]:
def vote_from(vote_row: WebElement) -> Dict[str, Any]:
    cells = vote_row.find_elements(By.TAG_NAME, "td")
    date = cells[0].text
    record_number = cells[1].text
    record_title_column = cells[2]
    record_title = record_title_column.text.removesuffix("Ver Expedientes").strip()

    try:
        hidden_link = record_title_column.find_element(By.CSS_SELECTOR, "expedientesOcultos")
        record_link = hidden_link.find_element(By.TAG_NAME, "a").get_attribute("href")
    except:
        record_link = None
    vote = cells[3].text

    return {
        "date": date,
        "record_number": record_number,
        "record_title": record_title,
        "record_link": record_link,
        "vote": vote
    }

In [181]:
def votes_from_senator(senator: Dict[str, Any], driver: uc.Chrome) -> List[Dict[str, Any]]:
    driver.get(senator["senator_link"])
    votes = []

    table_info = driver.find_element(By.ID, "tabla_info")
    total_rows = int(table_info.text.split(" ")[-1])

    table_length_select = Select(driver.find_element(By.NAME, "tabla_length"))
    table_length_select.select_by_value("100")

    first_row = None
    while True:
        table_body = driver.find_element(By.ID, "tabla").find_element(By.TAG_NAME, "tbody")
        page_rows = table_body.find_elements(By.TAG_NAME, "tr")
        new_first_row = page_rows[0].get_attribute("innerHTML")
        if first_row != new_first_row:
            first_row = new_first_row
        else:
            print(f"First row repeated: {first_row}")
            raise Exception("Could not advance to next page. First row repeated")

        for row in page_rows:
            votes.append(vote_from(row))

        if len(votes) < total_rows:
            next_page_button = driver.find_element(By.ID, "tabla_next")
            next_page_button.click()
        elif len(votes) == total_rows:
            print(f"\tProcessed {len(votes)} votes for {senator['senator_name']} (known total: {total_rows})")
            break
        else:
            print(f"\n\n{votes}")
            raise Exception(f"Processed {len(votes)} votes for {senator['senator_name']} instead {total_rows}")
    return votes

In [182]:
def senators_data_at(year: int, driver: uc.Chrome) -> List[Dict[str, Any]]:
    print("============================================================")
    print(f"Processing year {year}")
    driver.get("https://www.senado.gob.ar/votaciones/estadisticasSena")
    periodo_select = Select(driver.find_element(By.ID, "periodo"))
    periodo_select.select_by_visible_text(str(year))
    search_button = driver.find_element(By.XPATH, "//input[@type='image' and @title='Realizar Búsqueda']")
    search_button.click()

    tabla_length_select = Select(driver.find_element(By.NAME, "tabla_length"))
    tabla_length_select.select_by_value("-1")  # "Todos"

    table_body = driver.find_element(By.ID, "tabla").find_element(By.TAG_NAME, "tbody")
    rows = table_body.find_elements(By.TAG_NAME, "tr")
    print(f"> {len(rows)} senators found\n")
    senators_data = [senator_data_from(row) for row in rows]

    for index, senator in enumerate(senators_data):
        print(f"{index + 1}) Processing senator {senator['senator_name']} for year {year}")
        try:
            votes = votes_from_senator(senator, driver)
            senators_data[index]["votes"] = votes
        except Exception as e:
            raise Exception(f"Error processing data related to {senator['senator_name']}: {e}")
    return senators_data

In [183]:
options = uc.ChromeOptions()
driver = uc.Chrome(options=options, browser_executable_path="/usr/bin/chromium-browser")
senators_data_by_year = {year: senators_data_at(year, driver) for year in range(2005, 2025)}

with open("senators_data.json", "w", encoding="utf-8") as file:
    json.dump(senators_data_by_year, file, ensure_ascii=False, indent=4)

driver.quit()

Processing year 2005
> 88 senators found

1) Processing senator CAFIERO, ANTONIO FRANCISCO for year 2005
	Processed 211 votes for CAFIERO, ANTONIO FRANCISCO (known total: 211)
2) Processing senator CONTI, DIANA BEATRIZ for year 2005
	Processed 211 votes for CONTI, DIANA BEATRIZ (known total: 211)
3) Processing senator FERNÁNDEZ DE KIRCHNER, CRISTINA E. for year 2005
	Processed 226 votes for FERNÁNDEZ DE KIRCHNER, CRISTINA E. (known total: 226)
4) Processing senator GONZÁLEZ DE DUHALDE, HILDA BEATRIZ for year 2005
	Processed 15 votes for GONZÁLEZ DE DUHALDE, HILDA BEATRIZ (known total: 15)
5) Processing senator LEGUIZAMÓN, MARÍA LAURA for year 2005
	Processed 226 votes for LEGUIZAMÓN, MARÍA LAURA (known total: 226)
6) Processing senator MULLER, MABEL HILDA for year 2005
	Processed 211 votes for MULLER, MABEL HILDA (known total: 211)
7) Processing senator PAMPURO, JOSÉ JUAN BAUTISTA for year 2005
	Processed 15 votes for PAMPURO, JOSÉ JUAN BAUTISTA (known total: 15)
8) Processing senator 

In [185]:
blocs = set()
for year in range(2005, 2025):
    for senator in senators_data_by_year[year]:
        blocs.add(senator["bloc"])

In [186]:
blocs

{'',
 'ALIANZA COALICIÓN CÍVICA',
 'AVANZAR SAN LUIS',
 'CAMBIO FEDERAL',
 'CONCERTACIÓN PLURAL',
 'DESPIERTA CHUBUT',
 'ESPERANZA FEDERAL',
 'FEDERALISMO SANTAFESINO',
 'FRENTE CIVICO DE LA PROVINCIA DE CORDOBA',
 'FRENTE CÍVICO JUJEÑO',
 'FRENTE CÍVICO POR SANTIAGO',
 'FRENTE CÍVICO Y SOCIAL DE CATAMARCA',
 'FRENTE DE TODOS',
 'FRENTE NACIONAL Y POPULAR',
 'FRENTE PARA LA VICTORIA - PJ',
 'FRENTE PRO',
 'FRENTE RENOVADOR DE LA CONCORDIA SOCIAL',
 'FUERZA REPUBLICANA',
 'GEN',
 'HAY FUTURO ARGENTINA',
 'JUNTOS SOMOS RÍO NEGRO',
 'JUSTICIALISTA',
 'JUSTICIALISTA 8 DE OCTUBRE',
 'JUSTICIALISTA PARA EL DIALOGO DE LOS ARGENTINOS',
 'JUSTICIALISTA SAN LUIS',
 'LA LIBERTAD AVANZA',
 'LEALTAD Y DIGNIDAD JUSTICIALISTA',
 'LIBERTAD, TRABAJO Y PROGRESO',
 'MISIONES',
 'MOVIMIENTO NEUQUINO',
 'MOVIMIENTO POPULAR FUEGUINO',
 'MOVIMIENTO POPULAR NEUQUINO',
 'NUEVO ENCUENTRO',
 'PARES',
 'PARTIDO DE LA VICTORIA',
 'PARTIDO JUSTICIALISTA LA PAMPA',
 'PARTIDO LIBERAL DE CORRIENTES',
 'PARTIDO RENOVAD