# PreProcessamentoDeDadosBoletimDeUrna
Este notebook tem o objetivo realizar um pré-processamento nos dados baixados. <br>
Estes processamentos são os seguintes:
- Boletim de Urna
 - Adição do modelo de urna
 - Tratamento dos nomes das cidades

In [1]:
import working_dir
working_dir.set_wd()
working_dir.get_wd()

'/Users/tales.pimentel/ds/audit2022p'

In [2]:
import pandas as pd
import geopandas as gpd
import numpy as np
import zipfile
import shutil
import os
from src.dao import data_urls, data_globals
from src.utils import pretties as prt
from src.utils import stats, dflib

In [3]:
pd.set_option('display.max_columns', None)

In [4]:
SIGLAS_ESTADOS_TSE = data_globals.SIGLAS_ESTADOS_TSE
FILEPATH_PROC_ESTADOS = data_globals.FILEPATH_PROC_ESTADOS

FILEPATH_RAW_TSE_MODELOS_URNA_ZIP = data_globals.FILEPATH_RAW_TSE_MODELOS_URNA_ZIP
FILEPATH_RAW_TSE_VOTACAO = data_globals.FILEPATH_RAW_TSE_VOTACAO
FILEPATH_RAW_TSE_PATTERN_BOLETINS_DE_URNA_2T = data_globals.FILEPATH_RAW_TSE_PATTERN_BOLETINS_DE_URNA_2T
FILEPATH_PROC_TSE_BOLETINS_DE_URNA_2T = data_globals.FILEPATH_PROC_TSE_BOLETINS_DE_URNA_2T

In [5]:
# Mapa para ajustar nomes de cidades.
# Com o objetivo que fiquem identicos aos nomes nas bases do TSE
REPLACE_NOMES_MAP = {"SAO TOME DAS LETRAS": "SAO THOME DAS LETRAS",
                     "CAMACAN": "CAMACA", "GRACHO CARDOSO": "GRACCHO CARDOSO",
                     "DONA EUZEBIA": "DONA EUSEBIA",
                     "ELDORADO DO CARAJAS": "ELDORADO DOS CARAJAS",
                     "SEM-PEIXE": "SEM PEIXE", "ARES": "AREZ",
                     "SANTA IZABEL DO PARA": "SANTA ISABEL DO PARA",
                     "AMPARO DO SAO FRANCISCO": "AMPARO DE SAO FRANCISCO",
                     "SAO LUIZ DO PARAITINGA": "SAO LUIS DO PARAITINGA",
                     "ACU": "ASSU", "ESPIGAO D'OESTE": "ESPIGAO DO OESTE",
                     "ALVORADA D'OESTE": "ALVORADA DO OESTE"}

PADROES_MAP = [("-D'", " D'")]

In [6]:
DFV_DTYPE = {"DT_ELEICAO": str, "SG_UF": str, "NR_ZONA": str, "NR_SECAO": str, "NR_LOCAL_VOTACAO": str,
             "NM_LOCAL_VOTACAO": str, "NM_MUNICIPIO": str, "NR_VOTAVEL": str, "modelo_urna": str,
             "NR_FAIXA_INICIAL": int, "NR_FAIXA_INICIAL.1": int}

In [7]:
def unzip_and_load(filepath_zip, filename_csv_to_load):

    dir_temp = "/".join(filepath_zip.split("/")[0:-1]) + "/temp/"
    
    with zipfile.ZipFile(filepath_zip, 'r') as zip_ref:    
        zip_ref.extractall(dir_temp)
        
    print(filepath_zip)
    df = pd.read_csv(dir_temp + filename_csv_to_load, sep=";", encoding='latin1', dtype=DFV_DTYPE)
    shutil.rmtree(dir_temp)
    return df

def compara_intervalo(numero, limites):
    
    for modelo_urna, faixas in limites.iterrows():
        if numero >= faixas["NR_FAIXA_INICIAL"] and numero <= faixas["NR_FAIXA_INICIAL.1"]:
            return modelo_urna
    
    return "MODELO_NAO_IDENTIFICADO"

# Estados

In [8]:
estados = pd.read_csv(FILEPATH_PROC_ESTADOS, sep=";")

cols_locais = ["ESTADO", "REGIAO", "CAPITAL"]

estados[cols_locais] = estados[cols_locais].apply(lambda col : col.str.upper())
estados[cols_locais] = estados[cols_locais].apply(lambda col: dflib.remove_acento_list(col.str.upper()))

estados.sample(5)

Unnamed: 0,SIGLA,ESTADO,REGIAO,CAPITAL
1,AL,ALAGOAS,NORDESTE,MACEIO
25,SE,SERGIPE,NORDESTE,ARACAJU
2,AP,AMAPA,NORTE,MACAPA
14,PB,PARAIBA,NORDESTE,JOAO PESSOA
23,SC,SANTA CATARINA,SUL,FLORIANOPOLIS


# Locais de Votação

In [9]:
csv_filename = FILEPATH_RAW_TSE_VOTACAO.split("/")[-1].replace(".zip", ".csv")
locais = unzip_and_load(filepath_zip=FILEPATH_RAW_TSE_VOTACAO, 
                        filename_csv_to_load=csv_filename)

data/raw/tse/votacao_secao_2022_BR.zip


In [10]:
locais = locais[(locais["NR_TURNO"] == 2) & (locais["DS_CARGO"] == "PRESIDENTE")][["SG_UF", "NR_ZONA", "NR_SECAO", "NR_LOCAL_VOTACAO", "NM_LOCAL_VOTACAO"]].drop_duplicates()

In [11]:
locais.sample(5)

Unnamed: 0,SG_UF,NR_ZONA,NR_SECAO,NR_LOCAL_VOTACAO,NM_LOCAL_VOTACAO
691829,MG,1,126,1023,ESCOLA MUNICIPAL HELOISA DE CAMPOS SANTOS
3430805,SC,64,124,1333,ESCOLA DE EDUCAÇÃO BÁSICA BELCHIOR
3721332,MG,185,387,1490,ESCOLA ESTADUAL MARIA DA CONCEIÇÃO AVELAR
375941,SP,139,84,1031,EM PROF MODESTO BOHRER
3965842,RJ,126,248,1503,ESCOLA MUNICIPAL PARQUE CAPIVARI


In [12]:
locais[["SG_UF", "NR_ZONA", "NR_SECAO", "NR_LOCAL_VOTACAO", "NM_LOCAL_VOTACAO"]].sort_values(["SG_UF", "NR_ZONA", "NR_SECAO", "NR_LOCAL_VOTACAO", "NM_LOCAL_VOTACAO"]).drop_duplicates()

Unnamed: 0,SG_UF,NR_ZONA,NR_SECAO,NR_LOCAL_VOTACAO,NM_LOCAL_VOTACAO
2233029,AC,1,10,1090,PREFEITURA MUNICIPAL DE RIO BRANCO
680709,AC,1,100,1589,SEINFRA - SECRETARIA DE ESTADO DE INFRAESTRUTURA
969132,AC,1,12,1066,OCA RIO BRANCO
1138382,AC,1,129,1384,ESCOLA ANITA GARIBALDI
2338187,AC,1,13,1961,SECRETARIA MUNICIPAL DE INFRAESTRUTURA E MOBIL...
...,...,...,...,...,...
896904,ZZ,1,98,1031,MUNIQUE
352289,ZZ,1,99,1015,PROVÍNCIA DE CÓRDOBA
1045527,ZZ,1,990,1015,MANILA
612958,ZZ,1,991,1015,SYDNEY


In [13]:
locais[["SG_UF", "NR_ZONA", "NR_SECAO", "NR_LOCAL_VOTACAO", "NM_LOCAL_VOTACAO"]].drop_duplicates().shape

(472028, 5)

In [14]:
locais[["SG_UF", "NR_ZONA", "NR_SECAO", "NR_LOCAL_VOTACAO"]].drop_duplicates().shape

(472028, 4)

In [15]:
locais[["SG_UF", "NR_ZONA", "NR_LOCAL_VOTACAO"]].drop_duplicates().shape

(75406, 3)

# Modelos de Urna

In [16]:
df_modelos_de_urna = unzip_and_load(filepath_zip=FILEPATH_RAW_TSE_MODELOS_URNA_ZIP, 
                                    filename_csv_to_load="modelourna_numerointerno.csv")

df_modelos_de_urna["DS_MODELO_URNA"] = df_modelos_de_urna["DS_MODELO_URNA"].astype(str).astype(int)
df_modelos_de_urna = df_modelos_de_urna.set_index("DS_MODELO_URNA")

for num_cols in ["NR_FAIXA_INICIAL", "NR_FAIXA_INICIAL.1"]:
    df_modelos_de_urna[num_cols] = df_modelos_de_urna[num_cols].astype(int)
    
prt.display(df_modelos_de_urna)

data/raw/tse/modelourna_numerointerno.zip


Unnamed: 0_level_0,NR_FAIXA_INICIAL,NR_FAIXA_INICIAL.1
DS_MODELO_URNA,Unnamed: 1_level_1,Unnamed: 2_level_1
2009,999500,1220500
2010,1220501,1345500
2011,1368501,1370500
2011,1600000,1650000
2013,1650001,1701000
2015,1750000,1950000
2020,2000000,2250000


# Boletins de Urna

In [17]:
dfbu = pd.DataFrame()

for sigla in SIGLAS_ESTADOS_TSE:

    filepath_bu = FILEPATH_RAW_TSE_PATTERN_BOLETINS_DE_URNA_2T.replace("{SIGLA}", sigla)
    filename_csv = filepath_bu.split("/")[-1].replace(".zip", ".csv")
    
    dfbu_sigla = unzip_and_load(filepath_zip=filepath_bu, 
                                filename_csv_to_load=filename_csv)
    
    dfbu_sigla = dfbu_sigla[dfbu_sigla["DS_CARGO_PERGUNTA"] == "Presidente"]
    dfbu = dfbu.append(dfbu_sigla)

print()
print("dfbu.shape", dfbu.shape)

data/raw/tse/bweb_2t_AC_311020221535.zip
data/raw/tse/bweb_2t_AL_311020221535.zip
data/raw/tse/bweb_2t_AP_311020221535.zip
data/raw/tse/bweb_2t_AM_311020221535.zip
data/raw/tse/bweb_2t_BA_311020221535.zip
data/raw/tse/bweb_2t_CE_311020221535.zip
data/raw/tse/bweb_2t_DF_311020221535.zip
data/raw/tse/bweb_2t_ES_311020221535.zip
data/raw/tse/bweb_2t_GO_311020221535.zip
data/raw/tse/bweb_2t_MA_311020221535.zip
data/raw/tse/bweb_2t_MT_311020221535.zip
data/raw/tse/bweb_2t_MS_311020221535.zip
data/raw/tse/bweb_2t_MG_311020221535.zip
data/raw/tse/bweb_2t_PA_311020221535.zip
data/raw/tse/bweb_2t_PB_311020221535.zip
data/raw/tse/bweb_2t_PR_311020221535.zip
data/raw/tse/bweb_2t_PE_311020221535.zip
data/raw/tse/bweb_2t_PI_311020221535.zip
data/raw/tse/bweb_2t_RJ_311020221535.zip
data/raw/tse/bweb_2t_RN_311020221535.zip
data/raw/tse/bweb_2t_RS_311020221535.zip
data/raw/tse/bweb_2t_RO_311020221535.zip
data/raw/tse/bweb_2t_RR_311020221535.zip
data/raw/tse/bweb_2t_SC_311020221535.zip
data/raw/tse/bwe

In [18]:
dfbu.sample(8)

Unnamed: 0,DT_GERACAO,HH_GERACAO,ANO_ELEICAO,CD_TIPO_ELEICAO,NM_TIPO_ELEICAO,CD_PLEITO,DT_PLEITO,NR_TURNO,CD_ELEICAO,DS_ELEICAO,SG_UF,CD_MUNICIPIO,NM_MUNICIPIO,NR_ZONA,NR_SECAO,NR_LOCAL_VOTACAO,CD_CARGO_PERGUNTA,DS_CARGO_PERGUNTA,NR_PARTIDO,SG_PARTIDO,NM_PARTIDO,DT_BU_RECEBIDO,QT_APTOS,QT_COMPARECIMENTO,QT_ABSTENCOES,CD_TIPO_URNA,DS_TIPO_URNA,CD_TIPO_VOTAVEL,DS_TIPO_VOTAVEL,NR_VOTAVEL,NM_VOTAVEL,QT_VOTOS,NR_URNA_EFETIVADA,CD_CARGA_1_URNA_EFETIVADA,CD_CARGA_2_URNA_EFETIVADA,CD_FLASHCARD_URNA_EFETIVADA,DT_CARGA_URNA_EFETIVADA,DS_CARGO_PERGUNTA_SECAO,DS_AGREGADAS,DT_ABERTURA,DT_ENCERRAMENTO,QT_ELEITORES_BIOMETRIA_NH,DT_EMISSAO_BU,NR_JUNTA_APURADORA,NR_TURMA_APURADORA
212495,31/10/2022,15:52:42,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,SP,69051,PONTAL,135,443,1104,1,Presidente,-1,#NULO#,#NULO#,30/10/2022 19:25:31,300,258,42,1,APURADA,3,Nulo,96,Nulo,17,1630280,364.721.332.275.076.507.,979.782,0FA88125,23/09/2022 13:21:00,1 - 443,#NULO#,30/10/2022 08:00:01,30/10/2022 17:00:19,26,30/10/2022 17:02:47,-1,-1
308349,31/10/2022,15:52:42,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,SP,66958,MIGUELÓPOLIS,208,17,1040,1,Presidente,-1,#NULO#,#NULO#,30/10/2022 17:36:34,256,203,53,1,APURADA,3,Nulo,96,Nulo,6,1787708,829.288.661.208.815.135.,977.89,BCA6B3C1,22/09/2022 14:16:00,1 - 17,#NULO#,30/10/2022 08:00:01,30/10/2022 17:00:48,16,30/10/2022 17:02:19,-1,-1
160224,31/10/2022,15:49:55,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,MG,54194,VAZANTE,295,25,1023,1,Presidente,-1,#NULO#,#NULO#,30/10/2022 17:27:49,371,282,89,1,APURADA,2,Branco,95,Branco,2,1247963,498.285.183.577.072.580.,889.494,BE3E6729,23/09/2022 18:43:00,1 - 25,#NULO#,30/10/2022 08:00:01,30/10/2022 17:00:40,20,30/10/2022 17:03:10,-1,-1
112509,31/10/2022,15:51:48,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,SC,81612,ITAJAÍ,97,86,1236,1,Presidente,13,PT,Partido dos Trabalhadores,30/10/2022 18:22:10,342,273,69,1,APURADA,1,Nominal,13,LULA,88,2180805,814.921.039.571.151.997.,356.876,196E55CA,17/09/2022 12:28:00,1 - 86,#NULO#,30/10/2022 08:00:01,30/10/2022 17:01:31,17,30/10/2022 17:04:05,-1,-1
7349,31/10/2022,15:50:18,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,PA,5517,SÃO MIGUEL DO GUAMÁ,11,432,1520,1,Presidente,22,PL,Partido Liberal,30/10/2022 18:42:15,288,245,43,1,APURADA,1,Nominal,22,JAIR BOLSONARO,74,2025627,672.185.467.986.033.790.,973.508,D6FB47BA,22/09/2022 10:22:00,1 - 432,#NULO#,30/10/2022 08:00:01,30/10/2022 17:00:34,3,30/10/2022 17:03:14,-1,-1
64987,31/10/2022,15:48:51,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,BA,36390,ITIRUÇU,37,147,1082,1,Presidente,22,PL,Partido Liberal,30/10/2022 19:24:48,278,211,67,1,APURADA,1,Nominal,22,JAIR BOLSONARO,59,1075597,303.728.445.823.505.647.,190.151,605CD944,22/09/2022 17:23:00,1 - 147,#NULO#,30/10/2022 08:00:01,30/10/2022 17:03:36,46,30/10/2022 17:06:31,-1,-1
61587,31/10/2022,15:50:55,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,PR,84573,FIGUEIRA,119,56,1074,1,Presidente,-1,#NULO#,#NULO#,30/10/2022 17:40:28,228,190,38,1,APURADA,3,Nulo,96,Nulo,4,1133561,602.375.604.631.222.331.,566.798,9B12FE65,01/10/2022 12:52:00,1 - 56,#NULO#,30/10/2022 08:00:01,30/10/2022 17:00:23,48,30/10/2022 17:02:34,-1,-1
35886,31/10/2022,15:51:32,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,RS,87211,JAGUARÃO,25,82,1376,1,Presidente,-1,#NULO#,#NULO#,30/10/2022 18:09:58,398,304,94,1,APURADA,2,Branco,95,Branco,4,1309959,675.236.457.793.441.642.,406.425,3FB651E1,22/09/2022 15:20:00,1 - 82,#NULO#,30/10/2022 08:00:01,30/10/2022 17:01:00,55,30/10/2022 17:05:42,-1,-1


In [19]:
dfbu["modelo_urna"] = dfbu["NR_URNA_EFETIVADA"].apply(lambda v : compara_intervalo(int(v), df_modelos_de_urna))
dfbu["modelo_urna"] = dfbu["modelo_urna"].astype(str)
dfbu = dfbu[dfbu["modelo_urna"] != "MODELO_NAO_IDENTIFICADO"]

In [20]:
unique_municipios = dfbu[["NM_MUNICIPIO"]].drop_duplicates()
print(len(unique_municipios))
unique_municipios["NM_MUNICIPIO_clean"] = dflib.remove_acento_list(unique_municipios["NM_MUNICIPIO"].str.upper())

5426


In [21]:
unique_municipios.sample(8)

Unnamed: 0,NM_MUNICIPIO,NM_MUNICIPIO_clean
11459,VALENÇA DO PIAUÍ,VALENCA DO PIAUI
30226,FAZENDA VILANOVA,FAZENDA VILANOVA
41774,SANTANA DO CARIRI,SANTANA DO CARIRI
14179,RECURSOLÂNDIA,RECURSOLANDIA
73811,MORMAÇO,MORMACO
236041,RIFAINA,RIFAINA
22448,GOIATUBA,GOIATUBA
33902,CAMBUÍ,CAMBUI


In [22]:
print("dfbu.shape", dfbu.shape)
dfbu = dfbu.merge(unique_municipios, on=["NM_MUNICIPIO"], how="left")
print("dfbu.shape", dfbu.shape)
dfbu = dfbu.merge(locais, on=["SG_UF", "NR_ZONA", "NR_SECAO", "NR_LOCAL_VOTACAO"], how="left")
print("dfbu.shape", dfbu.shape)
del dfbu["NM_MUNICIPIO"]

dfbu = dfbu.rename(columns={"NM_MUNICIPIO_clean": "NM_MUNICIPIO"})

dfbu.shape (1850891, 46)
dfbu.shape (1850891, 47)
dfbu.shape (1850891, 48)


In [23]:
dfbu.sample(8)

Unnamed: 0,DT_GERACAO,HH_GERACAO,ANO_ELEICAO,CD_TIPO_ELEICAO,NM_TIPO_ELEICAO,CD_PLEITO,DT_PLEITO,NR_TURNO,CD_ELEICAO,DS_ELEICAO,SG_UF,CD_MUNICIPIO,NR_ZONA,NR_SECAO,NR_LOCAL_VOTACAO,CD_CARGO_PERGUNTA,DS_CARGO_PERGUNTA,NR_PARTIDO,SG_PARTIDO,NM_PARTIDO,DT_BU_RECEBIDO,QT_APTOS,QT_COMPARECIMENTO,QT_ABSTENCOES,CD_TIPO_URNA,DS_TIPO_URNA,CD_TIPO_VOTAVEL,DS_TIPO_VOTAVEL,NR_VOTAVEL,NM_VOTAVEL,QT_VOTOS,NR_URNA_EFETIVADA,CD_CARGA_1_URNA_EFETIVADA,CD_CARGA_2_URNA_EFETIVADA,CD_FLASHCARD_URNA_EFETIVADA,DT_CARGA_URNA_EFETIVADA,DS_CARGO_PERGUNTA_SECAO,DS_AGREGADAS,DT_ABERTURA,DT_ENCERRAMENTO,QT_ELEITORES_BIOMETRIA_NH,DT_EMISSAO_BU,NR_JUNTA_APURADORA,NR_TURMA_APURADORA,modelo_urna,NM_MUNICIPIO,NM_LOCAL_VOTACAO
1210604,31/10/2022,15:51:14,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,RN,18376,51,126,1414,1,Presidente,-1,#NULO#,#NULO#,30/10/2022 19:20:00,385,332,53,1,APURADA,2,Branco,95,Branco,4,2060452,054.051.949.018.029.001.,642.741,7D9E08F9,20/09/2022 11:00:00,1 - 126,#NULO#,30/10/2022 08:00:01,30/10/2022 17:04:29,20,30/10/2022 17:06:34,-1,-1,2020,SAO GONCALO DO AMARANTE,CENTRO ESTADUAL DE EDUCAÇÃO PROFISSIONAL DR. R...
926007,31/10/2022,15:50:55,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,PR,75353,177,766,1864,1,Presidente,-1,#NULO#,#NULO#,30/10/2022 17:36:29,448,331,117,1,APURADA,2,Branco,95,Branco,9,2065034,356.070.063.372.736.423.,70.14,09C693C8,25/09/2022 12:47:00,1 - 766,#NULO#,30/10/2022 08:00:01,30/10/2022 17:04:20,8,30/10/2022 17:05:51,-1,-1,2020,CURITIBA,COLEGIO BOM JESUS (CENTRO)
1372354,31/10/2022,15:51:48,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,SC,81450,39,18,1058,1,Presidente,-1,#NULO#,#NULO#,30/10/2022 19:25:06,377,343,34,1,APURADA,3,Nulo,96,Nulo,6,1286374,705.090.656.980.575.692.,551.472,07502B0D,22/09/2022 13:42:00,1 - 18,#NULO#,30/10/2022 08:00:01,30/10/2022 17:00:19,59,30/10/2022 17:02:33,-1,-1,2010,IMBUIA,ESCOLA MUNICIPAL CAMPO DAS FLORES
1681421,31/10/2022,15:52:42,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,SP,67890,332,58,1066,1,Presidente,13,PT,Partido dos Trabalhadores,30/10/2022 18:04:47,349,275,74,1,APURADA,1,Nominal,13,LULA,107,2203813,271.197.508.020.180.743.,101.384,9299B436,22/09/2022 11:37:00,1 - 58,#NULO#,30/10/2022 08:00:01,30/10/2022 17:00:28,8,30/10/2022 17:02:05,-1,-1,2020,OSASCO,EE. JOSÉ GERALDO VIEIRA
676728,31/10/2022,15:49:55,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,MG,53856,272,193,1252,1,Presidente,13,PT,Partido dos Trabalhadores,30/10/2022 18:53:43,343,283,60,1,APURADA,1,Nominal,13,LULA,110,1266475,782.567.837.113.241.467.,292.102,0ABD2832,23/09/2022 16:27:00,1 - 193,#NULO#,30/10/2022 08:00:01,30/10/2022 17:00:41,17,30/10/2022 17:02:06,-1,-1,2010,TRES CORACOES,ESCOLA MUNICIPAL ZILAH RESENDE PINTO
1678450,31/10/2022,15:52:42,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,SP,63770,329,230,1260,1,Presidente,13,PT,Partido dos Trabalhadores,30/10/2022 18:12:42,395,330,65,1,APURADA,1,Nominal,13,LULA,190,1829421,981.738.372.599.349.497.,945.529,9B6CA29E,26/09/2022 15:32:00,1 - 230,#NULO#,30/10/2022 08:00:01,30/10/2022 17:00:17,9,30/10/2022 17:02:40,-1,-1,2015,DIADEMA,EM. SEN. TEOTONIO BRANDÃO VILELA
945081,31/10/2022,15:50:39,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,PE,25313,8,405,1686,1,Presidente,-1,#NULO#,#NULO#,30/10/2022 17:47:08,323,303,20,1,APURADA,3,Nulo,96,Nulo,4,2044090,376.325.283.472.160.292.,506.126,1A31D8BC,23/09/2022 08:47:00,1 - 405,#NULO#,30/10/2022 08:00:01,30/10/2022 17:00:44,5,30/10/2022 17:02:56,-1,-1,2020,RECIFE,FACULDADE DAMAS
465398,31/10/2022,15:49:41,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,MA,9210,89,251,1422,1,Presidente,22,PL,Partido Liberal,30/10/2022 18:27:47,386,321,65,1,APURADA,1,Nominal,22,JAIR BOLSONARO,114,2125648,616.716.251.181.607.334.,105.882,32319A71,25/09/2022 08:56:00,1 - 251,#NULO#,30/10/2022 08:00:01,30/10/2022 17:01:25,2,30/10/2022 17:03:22,-1,-1,2020,SAO LUIS,UEB JOSE RIBAMAR BOGEIA


In [24]:
dfbu["NR_TURNO"].value_counts()

2    1850891
Name: NR_TURNO, dtype: int64

In [25]:
print(FILEPATH_PROC_TSE_BOLETINS_DE_URNA_2T)
dfbu.to_csv(FILEPATH_PROC_TSE_BOLETINS_DE_URNA_2T, sep=";", index=False)

data/processed/bu_presidente_2t.csv


In [26]:
dfbu.sample(4)

Unnamed: 0,DT_GERACAO,HH_GERACAO,ANO_ELEICAO,CD_TIPO_ELEICAO,NM_TIPO_ELEICAO,CD_PLEITO,DT_PLEITO,NR_TURNO,CD_ELEICAO,DS_ELEICAO,SG_UF,CD_MUNICIPIO,NR_ZONA,NR_SECAO,NR_LOCAL_VOTACAO,CD_CARGO_PERGUNTA,DS_CARGO_PERGUNTA,NR_PARTIDO,SG_PARTIDO,NM_PARTIDO,DT_BU_RECEBIDO,QT_APTOS,QT_COMPARECIMENTO,QT_ABSTENCOES,CD_TIPO_URNA,DS_TIPO_URNA,CD_TIPO_VOTAVEL,DS_TIPO_VOTAVEL,NR_VOTAVEL,NM_VOTAVEL,QT_VOTOS,NR_URNA_EFETIVADA,CD_CARGA_1_URNA_EFETIVADA,CD_CARGA_2_URNA_EFETIVADA,CD_FLASHCARD_URNA_EFETIVADA,DT_CARGA_URNA_EFETIVADA,DS_CARGO_PERGUNTA_SECAO,DS_AGREGADAS,DT_ABERTURA,DT_ENCERRAMENTO,QT_ELEITORES_BIOMETRIA_NH,DT_EMISSAO_BU,NR_JUNTA_APURADORA,NR_TURMA_APURADORA,modelo_urna,NM_MUNICIPIO,NM_LOCAL_VOTACAO
1765984,31/10/2022,15:52:42,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,SP,64777,395,525,1848,1,Presidente,-1,#NULO#,#NULO#,30/10/2022 19:02:43,280,239,41,1,APURADA,3,Nulo,96,Nulo,12,1321738,887.933.020.322.344.880.,311.452,F0060BE8,25/09/2022 09:12:00,1 - 525,#NULO#,30/10/2022 08:00:01,30/10/2022 17:01:33,23,30/10/2022 17:03:06,-1,-1,2010,GUARULHOS,EE. JARDIM MARIA DIRCE 3
513237,31/10/2022,15:50:08,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,MS,90735,18,256,1031,1,Presidente,-1,#NULO#,#NULO#,30/10/2022 18:01:53,314,257,57,1,APURADA,3,Nulo,96,Nulo,9,2152566,477.342.660.049.899.084.,11.977,73A4A75A,24/09/2022 17:41:00,1 - 256,#NULO#,30/10/2022 07:00:01,30/10/2022 16:00:21,6,30/10/2022 16:01:51,-1,-1,2020,DOURADOS,EM LOIDE BONFIM ANDRADE
1432345,31/10/2022,15:52:42,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,SP,62197,23,334,2100,1,Presidente,-1,#NULO#,#NULO#,30/10/2022 18:24:18,370,273,97,1,APURADA,3,Nulo,96,Nulo,12,1314290,278.074.191.711.948.661.,103.393,55BCAFEE,24/09/2022 09:25:00,1 - 334,#NULO#,30/10/2022 08:00:01,30/10/2022 17:01:58,41,30/10/2022 17:03:49,-1,-1,2010,BAURU,EMEI LILIAN APARECIDA PASSONE HADDAD
1037923,31/10/2022,15:50:48,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,PI,10057,47,84,1082,1,Presidente,22,PL,Partido Liberal,30/10/2022 17:29:53,321,272,49,1,APURADA,1,Nominal,22,JAIR BOLSONARO,45,2196358,750.480.750.168.578.335.,245.381,2CBB0801,28/09/2022 09:22:00,1 - 84,85,30/10/2022 08:00:01,30/10/2022 17:00:15,10,30/10/2022 17:02:37,-1,-1,2020,ALTO LONGA,SINDICATO DOS TRABALHADORES RURAIS


# Validação

In [27]:
# SAO PAULO
# Jair Bolsonaro: 14.216.587 (55,24%) 
# Lula 11.519.882 (44,76%)

# PARAIBA
# Lula (PT): 66,62% dos votos válidos (1.601.953 votos)
# Jair Bolsonaro (PL): 33,38% dos votos válidos (802.502 votos)

# BRASIL
# Lula 60.345.999 votos (50,90% dos votos válidos)
# Jair Bolsonaro 58.206.354 votos (49,10% dos votos válidos).

In [28]:
print("Brasil")
dfbu.groupby("NM_VOTAVEL")["QT_VOTOS"].sum().sort_values(ascending=False).to_frame("qtd_votos")

Brasil


Unnamed: 0_level_0,qtd_votos
NM_VOTAVEL,Unnamed: 1_level_1
LULA,60345999
JAIR BOLSONARO,58206354
Nulo,3930765
Branco,1769678


In [29]:
print("São Paulo")
dfbu[dfbu["SG_UF"] == "SP"].groupby("NM_VOTAVEL")["QT_VOTOS"].sum().sort_values(ascending=False).to_frame("qtd_votos")

São Paulo


Unnamed: 0_level_0,qtd_votos
NM_VOTAVEL,Unnamed: 1_level_1
JAIR BOLSONARO,14216587
LULA,11519882
Nulo,1117345
Branco,526677


In [30]:
stats.freq(dfbu["modelo_urna"])

Unnamed: 0,Absoluto,Relativo
2020,761555,0.411453
2010,365968,0.197725
2015,334332,0.180633
2009,188074,0.101613
2011,104719,0.056578
2013,96243,0.051998
