In [1]:
import os
import json
import time
from typing import List, Dict, Tuple
from datetime import datetime

import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import (
    TimeoutException,
    ElementClickInterceptedException,
    StaleElementReferenceException,
)

URL = "https://pesqele-divulgacao.tse.jus.br/app/pesquisa/listar.xhtml"

ID_ELEICAO_LABEL = "formPesquisa:eleicoes_label"
ID_ELEICAO_PANEL = "formPesquisa:eleicoes_panel"

ID_BTN_PESQUISAR = "formPesquisa:idBtnPesquisar"

ID_TBODY = "formPesquisa:tabelaPesquisas_data"
ID_PAGINATOR = "formPesquisa:tabelaPesquisas_paginator_bottom"

STATE_PATH = "pesqele_seen.json"


def make_driver(profile_dir: str = "./chrome-profile-pesqele", headless: bool = False) -> webdriver.Chrome:
    opts = webdriver.ChromeOptions()
    opts.add_argument("--start-maximized")
    opts.add_argument(f"--user-data-dir={os.path.abspath(profile_dir)}")
    if headless:
        opts.add_argument("--headless=new")
    return webdriver.Chrome(options=opts)


def wait_dom_ready(driver: webdriver.Chrome, timeout: int = 30) -> None:
    WebDriverWait(driver, timeout).until(
        lambda d: d.execute_script("return document.readyState") in ("interactive", "complete")
    )


def safe_click(driver: webdriver.Chrome, wait: WebDriverWait, by: By, value: str, timeout: int = 30):
    el = WebDriverWait(driver, timeout).until(EC.element_to_be_clickable((by, value)))
    try:
        el.click()
        return el
    except ElementClickInterceptedException:
        driver.execute_script("arguments[0].click();", el)
        return el


def select_one_menu_by_text(driver: webdriver.Chrome, wait: WebDriverWait, label_id: str, panel_id: str, text: str) -> None:
    safe_click(driver, wait, By.ID, label_id)
    panel = wait.until(EC.visibility_of_element_located((By.ID, panel_id)))
    item = panel.find_element(By.XPATH, f".//li[normalize-space()='{text}']")
    driver.execute_script("arguments[0].scrollIntoView({block:'center'});", item)
    item.click()
    wait.until(EC.invisibility_of_element_located((By.ID, panel_id)))


def click_and_wait_table_refresh(driver: webdriver.Chrome, wait: WebDriverWait, btn_id: str, tbody_id: str) -> None:
    try:
        old_tbody = driver.find_element(By.ID, tbody_id)
    except Exception:
        old_tbody = None

    btn = safe_click(driver, wait, By.ID, btn_id)
    driver.execute_script("arguments[0].scrollIntoView({block:'center'});", btn)

    try:
        btn.click()
    except Exception:
        driver.execute_script("arguments[0].click();", btn)

    if old_tbody is not None:
        try:
            wait.until(EC.staleness_of(old_tbody))
        except TimeoutException:
            pass

    wait.until(EC.presence_of_element_located((By.ID, tbody_id)))


def parse_current_table(driver: webdriver.Chrome, tbody_id: str) -> List[Dict[str, str]]:
    tbody = driver.find_element(By.ID, tbody_id)
    rows = tbody.find_elements(By.XPATH, ".//tr")
    out: List[Dict[str, str]] = []

    for r in rows:
        cols = [c.text.strip() for c in r.find_elements(By.XPATH, "./td")]
        if len(cols) < 5:
            continue
        out.append({
            "numero_identificacao": cols[0],
            "eleicao": cols[1],
            "empresa_contratada": cols[2],
            "data_registro": cols[3],
            "abrangencia": cols[4],
        })

    return out


def get_page_numbers(driver: webdriver.Chrome, wait: WebDriverWait, paginator_id: str) -> List[int]:
    pag = wait.until(EC.presence_of_element_located((By.ID, paginator_id)))
    links = pag.find_elements(By.CSS_SELECTOR, "a.ui-paginator-page")
    nums = []
    for a in links:
        txt = (a.text or "").strip()
        if txt.isdigit():
            nums.append(int(txt))
    return sorted(set(nums))


def go_to_page(driver: webdriver.Chrome, wait: WebDriverWait, paginator_id: str, tbody_id: str, page_num: int) -> None:
    pag = wait.until(EC.presence_of_element_located((By.ID, paginator_id)))
    a = pag.find_element(By.CSS_SELECTOR, f"a.ui-paginator-page[aria-label='Page {page_num}']")

    tbody_before = driver.find_element(By.ID, tbody_id)

    try:
        a.click()
    except (ElementClickInterceptedException, StaleElementReferenceException):
        driver.execute_script("arguments[0].click();", a)

    try:
        wait.until(EC.staleness_of(tbody_before))
    except TimeoutException:
        pass
    wait.until(EC.presence_of_element_located((By.ID, tbody_id)))


def scrape_all_pages_current_query(driver: webdriver.Chrome, wait: WebDriverWait, paginator_id: str, tbody_id: str) -> List[Dict[str, str]]:
    pages = get_page_numbers(driver, wait, paginator_id)
    if not pages:
        return parse_current_table(driver, tbody_id)

    all_rows: List[Dict[str, str]] = []
    for p in pages:
        go_to_page(driver, wait, paginator_id, tbody_id, p)
        all_rows.extend(parse_current_table(driver, tbody_id))

    seen = set()
    dedup = []
    for r in all_rows:
        k = r.get("numero_identificacao")
        if not k or k in seen:
            continue
        seen.add(k)
        dedup.append(r)

    return dedup


def load_seen(path: str) -> set:
    if os.path.exists(path):
        with open(path, "r", encoding="utf-8") as f:
            return set(json.load(f))
    return set()


def save_seen(path: str, seen: set) -> None:
    with open(path, "w", encoding="utf-8") as f:
        json.dump(sorted(seen), f, ensure_ascii=False, indent=2)


def run(eleicao_text: str = "Eleições Municipais 2024", headless: bool = False) -> List[Dict[str, str]]:
    driver = make_driver(headless=headless)
    wait = WebDriverWait(driver, 30)

    try:
        driver.get(URL)
        wait_dom_ready(driver)

        select_one_menu_by_text(driver, wait, ID_ELEICAO_LABEL, ID_ELEICAO_PANEL, eleicao_text)
        click_and_wait_table_refresh(driver, wait, ID_BTN_PESQUISAR, ID_TBODY)

        rows = scrape_all_pages_current_query(driver, wait, ID_PAGINATOR, ID_TBODY)
        return rows

    finally:
        driver.quit()


def persist_and_diff(rows: List[Dict[str, str]], state_path: str = STATE_PATH) -> Tuple[pd.DataFrame, pd.DataFrame]:
    df = pd.DataFrame(rows)
    df["capturado_em"] = datetime.now().strftime("%d/%m/%Y %H:%M:%S")

    seen = load_seen(state_path)
    cur = set(df["numero_identificacao"].dropna().astype(str))

    novos = sorted(cur - seen)
    df_novos = df[df["numero_identificacao"].isin(novos)].copy()

    save_seen(state_path, seen | cur)
    return df, df_novos


if __name__ == "__main__":
    rows = run("Eleições Municipais 2024", headless=False)
    df, df_novos = persist_and_diff(rows)

    print(f"Registros coletados (até o cap de 50): {len(df)}")
    print(f"Novos nesta rodada: {len(df_novos)}")

    df.to_csv("pesqele_municipais_2024_last50.csv", index=False, encoding="utf-8")
    df_novos.to_csv("pesqele_municipais_2024_novos.csv", index=False, encoding="utf-8")

    print(df.head(5).to_dict(orient="records"))


Registros coletados (até o cap de 50): 50
Novos nesta rodada: 50
[{'numero_identificacao': 'RJ-03505/2024', 'eleicao': 'Eleições Municipais 2024', 'empresa_contratada': 'INSTITUTO FRANCA DE PESQUISAS LTDA / INSTITUTO FRANCA DE PESQUISA PESQUISA E ASSESSORIA', 'data_registro': '27/09/2025', 'abrangencia': 'RJ / TRÊS RIOS', 'capturado_em': '17/12/2025 15:07:59'}, {'numero_identificacao': 'BA-09599/2024', 'eleicao': 'Eleições Municipais 2024', 'empresa_contratada': 'PAINEL BRASIL CONSULTORIA E PESQUISA DE MERCADO E OPINIAO LTDA - ME / PAINEL BRASIL', 'data_registro': '12/12/2024', 'abrangencia': 'BA / RUY BARBOSA', 'capturado_em': '17/12/2025 15:07:59'}, {'numero_identificacao': 'RO-00581/2024', 'eleicao': 'Eleições Municipais 2024', 'empresa_contratada': 'INSTITUTO NOVO PERFIL PESQUISAS LTDA / PERFIL PESQUISAS', 'data_registro': '26/10/2024', 'abrangencia': 'RO / PORTO VELHO', 'capturado_em': '17/12/2025 15:07:59'}, {'numero_identificacao': 'CE-02235/2024', 'eleicao': 'Eleições Municipai

In [3]:
import os
import re
import time
from datetime import datetime
from typing import List, Dict, Tuple

import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import (
    TimeoutException,
    ElementClickInterceptedException,
    StaleElementReferenceException,
)

URL = "https://pesqele-divulgacao.tse.jus.br/app/pesquisa/listar.xhtml"

ID_ELEICAO_LABEL = "formPesquisa:eleicoes_label"
ID_ELEICAO_PANEL = "formPesquisa:eleicoes_panel"

ID_UF_LABEL = "formPesquisa:filtroUF_label"
ID_UF_PANEL = "formPesquisa:filtroUF_panel"

ID_BTN_PESQUISAR = "formPesquisa:idBtnPesquisar"

ID_TBODY = "formPesquisa:tabelaPesquisas_data"
ID_PAGINATOR = "formPesquisa:tabelaPesquisas_paginator_bottom"


def make_driver(profile_dir: str = "./chrome-profile-pesqele", headless: bool = False) -> webdriver.Chrome:
    opts = webdriver.ChromeOptions()
    opts.add_argument("--start-maximized")
    opts.add_argument(f"--user-data-dir={os.path.abspath(profile_dir)}")
    if headless:
        opts.add_argument("--headless=new")
    return webdriver.Chrome(options=opts)


def wait_dom_ready(driver: webdriver.Chrome, timeout: int = 30) -> None:
    WebDriverWait(driver, timeout).until(
        lambda d: d.execute_script("return document.readyState") in ("interactive", "complete")
    )


def safe_click(driver: webdriver.Chrome, wait: WebDriverWait, by: By, value: str, timeout: int = 30):
    el = WebDriverWait(driver, timeout).until(EC.element_to_be_clickable((by, value)))
    try:
        el.click()
        return el
    except ElementClickInterceptedException:
        driver.execute_script("arguments[0].click();", el)
        return el


def force_close_any_menu(driver: webdriver.Chrome):
    try:
        driver.switch_to.active_element.send_keys(Keys.ESCAPE)
    except Exception:
        pass
    try:
        driver.find_element(By.TAG_NAME, "body").click()
    except Exception:
        pass


def open_menu(driver: webdriver.Chrome, wait: WebDriverWait, label_id: str, panel_id: str) -> None:
    safe_click(driver, wait, By.ID, label_id)
    wait.until(EC.presence_of_element_located((By.ID, panel_id)))
    wait.until(EC.visibility_of_element_located((By.ID, panel_id)))


def select_one_menu_by_text(driver: webdriver.Chrome, wait: WebDriverWait, label_id: str, panel_id: str, text: str) -> None:
    open_menu(driver, wait, label_id, panel_id)

    panel = driver.find_element(By.ID, panel_id)
    item = panel.find_element(By.XPATH, f".//li[normalize-space()='{text}']")
    driver.execute_script("arguments[0].scrollIntoView({block:'center'});", item)
    try:
        item.click()
    except Exception:
        driver.execute_script("arguments[0].click();", item)

    force_close_any_menu(driver)


def list_one_menu_items(driver: webdriver.Chrome, wait: WebDriverWait, label_id: str, panel_id: str) -> List[str]:
    open_menu(driver, wait, label_id, panel_id)

    panel = driver.find_element(By.ID, panel_id)
    lis = panel.find_elements(By.CSS_SELECTOR, "li.ui-selectonemenu-item")

    items = []
    for li in lis:
        t = (li.text or "").strip()
        if not t:
            continue
        if t.lower() in {"selecione"}:
            continue
        items.append(t)

    force_close_any_menu(driver)
    return items


def click_and_wait_table_refresh(driver: webdriver.Chrome, wait: WebDriverWait, btn_id: str, tbody_id: str) -> None:
    try:
        old_tbody = driver.find_element(By.ID, tbody_id)
    except Exception:
        old_tbody = None

    btn = safe_click(driver, wait, By.ID, btn_id)
    driver.execute_script("arguments[0].scrollIntoView({block:'center'});", btn)
    try:
        btn.click()
    except Exception:
        driver.execute_script("arguments[0].click();", btn)

    if old_tbody is not None:
        try:
            wait.until(EC.staleness_of(old_tbody))
        except TimeoutException:
            pass

    wait.until(EC.presence_of_element_located((By.ID, tbody_id)))


def parse_current_table(driver: webdriver.Chrome, tbody_id: str) -> List[Dict[str, str]]:
    tbody = driver.find_element(By.ID, tbody_id)
    rows = tbody.find_elements(By.XPATH, ".//tr")

    out: List[Dict[str, str]] = []
    for r in rows:
        cols = [c.text.strip() for c in r.find_elements(By.XPATH, "./td")]
        if len(cols) < 5:
            continue
        out.append({
            "numero_identificacao": cols[0],
            "eleicao": cols[1],
            "empresa_contratada": cols[2],
            "data_registro": cols[3],
            "abrangencia": cols[4],
        })
    return out


def dedup_by_numero(rows: List[Dict[str, str]]) -> List[Dict[str, str]]:
    seen = set()
    out = []
    for r in rows:
        k = (r.get("numero_identificacao") or "").strip()
        if not k or k in seen:
            continue
        seen.add(k)
        out.append(r)
    return out


def get_page_numbers(driver: webdriver.Chrome, wait: WebDriverWait, paginator_id: str) -> List[int]:
    pag = wait.until(EC.presence_of_element_located((By.ID, paginator_id)))
    links = pag.find_elements(By.CSS_SELECTOR, "a.ui-paginator-page")

    nums = []
    for a in links:
        txt = (a.text or "").strip()
        if txt.isdigit():
            nums.append(int(txt))

    return sorted(set(nums))


def go_to_page(driver: webdriver.Chrome, wait: WebDriverWait, paginator_id: str, tbody_id: str, page_num: int, max_tries: int = 6) -> None:
    last_err = None

    for _ in range(max_tries):
        try:
            pag = wait.until(EC.presence_of_element_located((By.ID, paginator_id)))

            # Re-localiza SEMPRE (pra não ficar stale)
            a = pag.find_element(By.CSS_SELECTOR, f"a.ui-paginator-page[aria-label='Page {page_num}']")
            driver.execute_script("arguments[0].scrollIntoView({block:'center'});", a)

            tbody_before = driver.find_element(By.ID, tbody_id)

            # JS click tende a ser mais estável em PrimeFaces
            driver.execute_script("arguments[0].click();", a)

            try:
                wait.until(EC.staleness_of(tbody_before))
            except TimeoutException:
                pass

            wait.until(EC.presence_of_element_located((By.ID, tbody_id)))

            # Confirma que a página ficou ativa (a ou span, depende da renderização)
            try:
                wait.until(EC.presence_of_element_located(
                    (By.CSS_SELECTOR, f"a.ui-paginator-page.ui-state-active[aria-label='Page {page_num}']")
                ))
            except TimeoutException:
                try:
                    wait.until(EC.presence_of_element_located(
                        (By.CSS_SELECTOR, f"span.ui-paginator-page.ui-state-active[aria-label='Page {page_num}']")
                    ))
                except TimeoutException:
                    pass

            return

        except (StaleElementReferenceException, ElementClickInterceptedException, TimeoutException) as e:
            last_err = e
            time.sleep(0.5)

    raise last_err


def scrape_all_pages_current_query(driver: webdriver.Chrome, wait: WebDriverWait, paginator_id: str, tbody_id: str) -> List[Dict[str, str]]:
    pages = get_page_numbers(driver, wait, paginator_id)
    if not pages:
        return dedup_by_numero(parse_current_table(driver, tbody_id))

    all_rows: List[Dict[str, str]] = []
    for p in pages:
        go_to_page(driver, wait, paginator_id, tbody_id, p)
        all_rows.extend(parse_current_table(driver, tbody_id))

    return dedup_by_numero(all_rows)


def sheet_safe(name: str) -> str:
    s = re.sub(r"[\[\]\:\*\?\/\\]", "-", name.strip())
    return s[:31] if len(s) > 31 else s


def run_by_uf(eleicao_text: str = "Eleições Municipais 2024", headless: bool = False) -> str:
    driver = make_driver(headless=headless)
    wait = WebDriverWait(driver, 30)

    slug = re.sub(r"[^a-zA-Z0-9]+", "_", eleicao_text).strip("_").lower()
    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
    out_xlsx = f"pesqele_{slug}_por_uf_{ts}.xlsx"

    dfs: Dict[str, pd.DataFrame] = {}
    resumo: List[Dict[str, str]] = []

    try:
        driver.get(URL)
        wait_dom_ready(driver)

        select_one_menu_by_text(driver, wait, ID_ELEICAO_LABEL, ID_ELEICAO_PANEL, eleicao_text)

        ufs = list_one_menu_items(driver, wait, ID_UF_LABEL, ID_UF_PANEL)
        ufs = [u for u in ufs if u.upper() != "BRASIL"]

        for uf in ufs:
            try:
                select_one_menu_by_text(driver, wait, ID_UF_LABEL, ID_UF_PANEL, uf)

                click_and_wait_table_refresh(driver, wait, ID_BTN_PESQUISAR, ID_TBODY)
                rows = scrape_all_pages_current_query(driver, wait, ID_PAGINATOR, ID_TBODY)

                df = pd.DataFrame(rows)
                df["uf_filtro"] = uf
                df["capturado_em"] = datetime.now().strftime("%d/%m/%Y %H:%M:%S")

                dfs[uf] = df

                resumo.append({
                    "uf": uf,
                    "status": "ok",
                    "linhas": str(len(df)),
                })

            except Exception as e:
                resumo.append({
                    "uf": uf,
                    "status": "erro",
                    "linhas": "0",
                    "erro": repr(e)[:300],
                })

        # Sempre escreve pelo menos o __resumo__ (evita "At least one sheet must be visible")
        df_resumo = pd.DataFrame(resumo).sort_values(["status", "uf"], ascending=[True, True])

        with pd.ExcelWriter(out_xlsx, engine="openpyxl") as writer:
            df_resumo.to_excel(writer, sheet_name="__resumo__", index=False)

            for uf, df in dfs.items():
                df.to_excel(writer, sheet_name=sheet_safe(uf), index=False)

        return out_xlsx

    finally:
        driver.quit()


if __name__ == "__main__":
    path = run_by_uf(eleicao_text="Eleições Municipais 2024", headless=False)
    print(f"XLSX gerado: {path}")

XLSX gerado: pesqele_elei_es_municipais_2024_por_uf_20251217_153446.xlsx
