# PreProcessamentoDeDadosBoletimDeUrna
Este notebook tem o objetivo realizar um pré-processamento nos dados baixados. <br>
Estes processamentos são os seguintes:
- Boletim de Urna
 - Adição do modelo de urna
 - Tratamento dos nomes das cidades

In [1]:
import working_dir
working_dir.set_wd()
working_dir.get_wd()

'/Users/tales.pimentel/ds/learning/audit2022p'

In [2]:
import pandas as pd
import geopandas as gpd
import numpy as np
import zipfile
import shutil
import os
from src.dao import data_urls, data_globals
from src.utils import pretties as prt
from src.utils import stats, dflib

In [3]:
pd.set_option('display.max_columns', None)

In [4]:
SIGLAS_ESTADOS_TSE = data_globals.SIGLAS_ESTADOS_TSE
FILEPATH_PROC_ESTADOS = data_globals.FILEPATH_PROC_ESTADOS

FILEPATH_RAW_TSE_MODELOS_URNA_ZIP = data_globals.FILEPATH_RAW_TSE_MODELOS_URNA_ZIP
FILEPATH_RAW_TSE_VOTACAO = data_globals.FILEPATH_RAW_TSE_VOTACAO
FILEPATH_RAW_TSE_PATTERN_BOLETINS_DE_URNA_2T = data_globals.FILEPATH_RAW_TSE_PATTERN_BOLETINS_DE_URNA_2T
FILEPATH_PROC_TSE_BOLETINS_DE_URNA_2T = data_globals.FILEPATH_PROC_TSE_BOLETINS_DE_URNA_2T

In [5]:
# Mapa para ajustar nomes de cidades.
# Com o objetivo que fiquem identicos aos nomes nas bases do TSE
REPLACE_NOMES_MAP = {"SAO TOME DAS LETRAS": "SAO THOME DAS LETRAS",
                     "CAMACAN": "CAMACA", "GRACHO CARDOSO": "GRACCHO CARDOSO",
                     "DONA EUZEBIA": "DONA EUSEBIA",
                     "ELDORADO DO CARAJAS": "ELDORADO DOS CARAJAS",
                     "SEM-PEIXE": "SEM PEIXE", "ARES": "AREZ",
                     "SANTA IZABEL DO PARA": "SANTA ISABEL DO PARA",
                     "AMPARO DO SAO FRANCISCO": "AMPARO DE SAO FRANCISCO",
                     "SAO LUIZ DO PARAITINGA": "SAO LUIS DO PARAITINGA",
                     "ACU": "ASSU", "ESPIGAO D'OESTE": "ESPIGAO DO OESTE",
                     "ALVORADA D'OESTE": "ALVORADA DO OESTE"}

PADROES_MAP = [("-D'", " D'")]

In [6]:
DFV_DTYPE = {"DT_ELEICAO": str, "SG_UF": str, "NR_ZONA": str, "NR_SECAO": str, "NR_LOCAL_VOTACAO": str,
             "NM_LOCAL_VOTACAO": str, "NM_MUNICIPIO": str, "NR_VOTAVEL": str, "modelo_urna": str,
             "NR_FAIXA_INICIAL": int, "NR_FAIXA_INICIAL.1": int}

In [7]:
def unzip_and_load(filepath_zip, filename_csv_to_load):

    dir_temp = "/".join(filepath_zip.split("/")[0:-1]) + "/temp/"
    
    with zipfile.ZipFile(filepath_zip, 'r') as zip_ref:    
        zip_ref.extractall(dir_temp)
        
    print(filepath_zip)
    df = pd.read_csv(dir_temp + filename_csv_to_load, sep=";", encoding='latin1', dtype=DFV_DTYPE)
    shutil.rmtree(dir_temp)
    return df

def compara_intervalo(numero, limites):
    
    for modelo_urna, faixas in limites.iterrows():
        if numero >= faixas["NR_FAIXA_INICIAL"] and numero <= faixas["NR_FAIXA_INICIAL.1"]:
            return modelo_urna
    
    return "MODELO_NAO_IDENTIFICADO"

# Estados

In [8]:
estados = pd.read_csv(FILEPATH_PROC_ESTADOS, sep=";")

cols_locais = ["ESTADO", "REGIAO", "CAPITAL"]

estados[cols_locais] = estados[cols_locais].apply(lambda col : col.str.upper())
estados[cols_locais] = estados[cols_locais].apply(lambda col: dflib.remove_acento_list(col.str.upper()))

estados.sample(5)

Unnamed: 0,SIGLA,ESTADO,REGIAO,CAPITAL
16,PE,PERNAMBUCO,NORDESTE,RECIFE
9,MA,MARANHAO,NORDESTE,SAO LUIS
5,CE,CEARA,NORDESTE,FORTALEZA
23,SC,SANTA CATARINA,SUL,FLORIANOPOLIS
22,RR,RORAIMA,NORTE,BOA VISTA


# Locais de Votação

In [9]:
csv_filename = FILEPATH_RAW_TSE_VOTACAO.split("/")[-1].replace(".zip", ".csv")
locais = unzip_and_load(filepath_zip=FILEPATH_RAW_TSE_VOTACAO, 
                        filename_csv_to_load=csv_filename)

data/raw/tse/votacao_secao_2022_BR.zip


In [10]:
locais = locais[(locais["NR_TURNO"] == 2) & (locais["DS_CARGO"] == "PRESIDENTE")][["SG_UF", "NR_ZONA", "NR_SECAO", "NR_LOCAL_VOTACAO", "NM_LOCAL_VOTACAO"]].drop_duplicates()

In [11]:
locais.sample(5)

Unnamed: 0,SG_UF,NR_ZONA,NR_SECAO,NR_LOCAL_VOTACAO,NM_LOCAL_VOTACAO
2859035,PA,36,75,1180,EMEIF IRMÃ MARLENE FONSECA
3470425,SC,4,101,1090,ESCOLA NUCLEADA RIO VACARIANOS
3737909,SC,105,143,1112,ESCOLA DE EDUCAÇÃO BÁSICA SENADOR RODRIGO LOBO
3094291,RJ,84,446,1449,IGREJA NOSSA SENHORA DE FATIMA
1325134,SC,17,115,1953,ESCOLA MUNICIPAL DE EDUCAÇÃO BÁSICA HELMUTH GU...


In [12]:
locais[["SG_UF", "NR_ZONA", "NR_SECAO", "NR_LOCAL_VOTACAO", "NM_LOCAL_VOTACAO"]].sort_values(["SG_UF", "NR_ZONA", "NR_SECAO", "NR_LOCAL_VOTACAO", "NM_LOCAL_VOTACAO"]).drop_duplicates()

Unnamed: 0,SG_UF,NR_ZONA,NR_SECAO,NR_LOCAL_VOTACAO,NM_LOCAL_VOTACAO
2233029,AC,1,10,1090,PREFEITURA MUNICIPAL DE RIO BRANCO
680709,AC,1,100,1589,SEINFRA - SECRETARIA DE ESTADO DE INFRAESTRUTURA
969132,AC,1,12,1066,OCA RIO BRANCO
1138382,AC,1,129,1384,ESCOLA ANITA GARIBALDI
2338187,AC,1,13,1961,SECRETARIA MUNICIPAL DE INFRAESTRUTURA E MOBIL...
...,...,...,...,...,...
896904,ZZ,1,98,1031,MUNIQUE
352289,ZZ,1,99,1015,PROVÍNCIA DE CÓRDOBA
1045527,ZZ,1,990,1015,MANILA
612958,ZZ,1,991,1015,SYDNEY


In [13]:
locais[["SG_UF", "NR_ZONA", "NR_SECAO", "NR_LOCAL_VOTACAO", "NM_LOCAL_VOTACAO"]].drop_duplicates().shape

(472028, 5)

In [14]:
locais[["SG_UF", "NR_ZONA", "NR_SECAO", "NR_LOCAL_VOTACAO"]].drop_duplicates().shape

(472028, 4)

In [15]:
locais[["SG_UF", "NR_ZONA", "NR_LOCAL_VOTACAO"]].drop_duplicates().shape

(75406, 3)

# Modelos de Urna

In [16]:
df_modelos_de_urna = unzip_and_load(filepath_zip=FILEPATH_RAW_TSE_MODELOS_URNA_ZIP, 
                                    filename_csv_to_load="modelourna_numerointerno.csv")

df_modelos_de_urna["DS_MODELO_URNA"] = df_modelos_de_urna["DS_MODELO_URNA"].astype(str).astype(int)
df_modelos_de_urna = df_modelos_de_urna.set_index("DS_MODELO_URNA")

for num_cols in ["NR_FAIXA_INICIAL", "NR_FAIXA_INICIAL.1"]:
    df_modelos_de_urna[num_cols] = df_modelos_de_urna[num_cols].astype(int)
    
prt.display(df_modelos_de_urna)

data/raw/tse/modelourna_numerointerno.zip


Unnamed: 0_level_0,NR_FAIXA_INICIAL,NR_FAIXA_INICIAL.1
DS_MODELO_URNA,Unnamed: 1_level_1,Unnamed: 2_level_1
2009,999500,1220500
2010,1220501,1345500
2011,1368501,1370500
2011,1600000,1650000
2013,1650001,1701000
2015,1750000,1950000
2020,2000000,2250000


# Boletins de Urna

In [17]:
dfbu = pd.DataFrame()

for sigla in SIGLAS_ESTADOS_TSE:

    filepath_bu = FILEPATH_RAW_TSE_PATTERN_BOLETINS_DE_URNA_2T.replace("{SIGLA}", sigla)
    filename_csv = filepath_bu.split("/")[-1].replace(".zip", ".csv")
    
    dfbu_sigla = unzip_and_load(filepath_zip=filepath_bu, 
                                filename_csv_to_load=filename_csv)
    
    dfbu_sigla = dfbu_sigla[dfbu_sigla["DS_CARGO_PERGUNTA"] == "Presidente"]
    dfbu = dfbu.append(dfbu_sigla)

print()
print("dfbu.shape", dfbu.shape)

data/raw/tse/bweb_2t_AC_311020221535.zip
data/raw/tse/bweb_2t_AL_311020221535.zip
data/raw/tse/bweb_2t_AP_311020221535.zip
data/raw/tse/bweb_2t_AM_311020221535.zip
data/raw/tse/bweb_2t_BA_311020221535.zip
data/raw/tse/bweb_2t_CE_311020221535.zip
data/raw/tse/bweb_2t_DF_311020221535.zip
data/raw/tse/bweb_2t_ES_311020221535.zip
data/raw/tse/bweb_2t_GO_311020221535.zip
data/raw/tse/bweb_2t_MA_311020221535.zip
data/raw/tse/bweb_2t_MT_311020221535.zip
data/raw/tse/bweb_2t_MS_311020221535.zip
data/raw/tse/bweb_2t_MG_311020221535.zip
data/raw/tse/bweb_2t_PA_311020221535.zip
data/raw/tse/bweb_2t_PB_311020221535.zip
data/raw/tse/bweb_2t_PR_311020221535.zip
data/raw/tse/bweb_2t_PE_311020221535.zip
data/raw/tse/bweb_2t_PI_311020221535.zip
data/raw/tse/bweb_2t_RJ_311020221535.zip
data/raw/tse/bweb_2t_RN_311020221535.zip
data/raw/tse/bweb_2t_RS_311020221535.zip
data/raw/tse/bweb_2t_RO_311020221535.zip
data/raw/tse/bweb_2t_RR_311020221535.zip
data/raw/tse/bweb_2t_SC_311020221535.zip
data/raw/tse/bwe

In [18]:
dfbu.sample(8)

Unnamed: 0,DT_GERACAO,HH_GERACAO,ANO_ELEICAO,CD_TIPO_ELEICAO,NM_TIPO_ELEICAO,CD_PLEITO,DT_PLEITO,NR_TURNO,CD_ELEICAO,DS_ELEICAO,SG_UF,CD_MUNICIPIO,NM_MUNICIPIO,NR_ZONA,NR_SECAO,NR_LOCAL_VOTACAO,CD_CARGO_PERGUNTA,DS_CARGO_PERGUNTA,NR_PARTIDO,SG_PARTIDO,NM_PARTIDO,DT_BU_RECEBIDO,QT_APTOS,QT_COMPARECIMENTO,QT_ABSTENCOES,CD_TIPO_URNA,DS_TIPO_URNA,CD_TIPO_VOTAVEL,DS_TIPO_VOTAVEL,NR_VOTAVEL,NM_VOTAVEL,QT_VOTOS,NR_URNA_EFETIVADA,CD_CARGA_1_URNA_EFETIVADA,CD_CARGA_2_URNA_EFETIVADA,CD_FLASHCARD_URNA_EFETIVADA,DT_CARGA_URNA_EFETIVADA,DS_CARGO_PERGUNTA_SECAO,DS_AGREGADAS,DT_ABERTURA,DT_ENCERRAMENTO,QT_ELEITORES_BIOMETRIA_NH,DT_EMISSAO_BU,NR_JUNTA_APURADORA,NR_TURMA_APURADORA
332699,31/10/2022,15:52:42,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,SP,63770,DIADEMA,222,341,1163,1,Presidente,-1,#NULO#,#NULO#,30/10/2022 17:53:30,391,320,71,1,APURADA,2,Branco,95,Branco,3,2122771,082.762.740.863.707.110.,683.193,883803F0,23/09/2022 09:43:00,1 - 341,#NULO#,30/10/2022 08:00:02,30/10/2022 17:02:41,10,30/10/2022 17:05:01,-1,-1
734488,31/10/2022,15:52:42,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,SP,64157,FERRAZ DE VASCONCELOS,401,258,1325,1,Presidente,22,PL,Partido Liberal,30/10/2022 19:00:09,327,243,84,1,APURADA,1,Nominal,22,JAIR BOLSONARO,109,1666393,679.414.088.556.019.954.,567.996,4C2CC4FA,24/09/2022 15:43:00,1 - 258,#NULO#,30/10/2022 08:00:01,30/10/2022 17:02:38,16,30/10/2022 17:04:09,-1,-1
101971,31/10/2022,15:51:07,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,RJ,60011,RIO DE JANEIRO,180,33,1074,1,Presidente,-1,#NULO#,#NULO#,30/10/2022 18:26:17,301,256,45,1,APURADA,2,Branco,95,Branco,1,2178458,574.322.118.611.696.204.,710.52,56EF86C4,22/09/2022 08:30:00,1 - 33,#NULO#,30/10/2022 08:00:01,30/10/2022 17:05:35,7,30/10/2022 17:07:36,-1,-1
233801,31/10/2022,15:52:42,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,SP,69892,RUBIÁCEA,151,46,1015,1,Presidente,-1,#NULO#,#NULO#,30/10/2022 17:59:36,287,222,65,1,APURADA,2,Branco,95,Branco,2,1157381,072.762.015.931.153.963.,87.473,D245EC4E,22/09/2022 14:26:00,1 - 46,#NULO#,30/10/2022 08:00:01,30/10/2022 17:02:40,18,30/10/2022 17:04:48,-1,-1
66152,31/10/2022,15:49:18,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,CE,13897,FORTALEZA,93,240,1244,1,Presidente,-1,#NULO#,#NULO#,30/10/2022 18:47:41,362,307,55,1,APURADA,3,Nulo,96,Nulo,9,2219191,268.222.726.576.528.704.,508.756,A929317F,23/09/2022 09:03:00,1 - 240,#NULO#,30/10/2022 08:00:01,30/10/2022 17:00:46,16,30/10/2022 17:02:24,-1,-1
133182,31/10/2022,15:50:39,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,PE,24570,JABOATÃO DOS GUARARAPES,118,199,1228,1,Presidente,-1,#NULO#,#NULO#,30/10/2022 19:29:32,390,338,52,1,APURADA,2,Branco,95,Branco,1,2173008,097.913.801.019.140.121.,785.585,007994EB,27/09/2022 10:08:00,1 - 199,#NULO#,30/10/2022 08:00:01,30/10/2022 17:09:34,10,30/10/2022 17:11:58,-1,-1
804343,31/10/2022,15:52:42,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,SP,66192,JUNDIAÍ,424,249,1201,1,Presidente,-1,#NULO#,#NULO#,30/10/2022 18:19:35,391,297,94,1,APURADA,2,Branco,95,Branco,7,2151511,479.141.929.110.870.210.,990.46,AE93AA43,22/09/2022 11:21:00,1 - 249,#NULO#,30/10/2022 08:00:01,30/10/2022 17:05:27,22,30/10/2022 17:08:15,-1,-1
605143,31/10/2022,15:52:42,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,SP,71455,SOROCABA,356,437,1503,1,Presidente,13,PT,Partido dos Trabalhadores,30/10/2022 18:08:54,362,272,90,1,APURADA,1,Nominal,13,LULA,115,2144680,213.549.555.665.367.721.,660.268,5B8307D1,25/09/2022 11:54:00,1 - 437,#NULO#,30/10/2022 08:00:01,30/10/2022 17:03:14,10,30/10/2022 17:05:32,-1,-1


In [19]:
dfbu["modelo_urna"] = dfbu["NR_URNA_EFETIVADA"].apply(lambda v : compara_intervalo(int(v), df_modelos_de_urna))
dfbu["modelo_urna"] = dfbu["modelo_urna"].astype(str)
dfbu = dfbu[dfbu["modelo_urna"] != "MODELO_NAO_IDENTIFICADO"]

In [20]:
unique_municipios = dfbu[["NM_MUNICIPIO"]].drop_duplicates()
print(len(unique_municipios))
unique_municipios["NM_MUNICIPIO_clean"] = dflib.remove_acento_list(unique_municipios["NM_MUNICIPIO"].str.upper())

5426


In [21]:
unique_municipios.sample(8)

Unnamed: 0,NM_MUNICIPIO,NM_MUNICIPIO_clean
64708,BRASILÂNDIA DO SUL,BRASILANDIA DO SUL
102641,GUANAMBI,GUANAMBI
165788,PINDAÍ,PINDAI
74616,RAFAEL JAMBEIRO,RAFAEL JAMBEIRO
155831,NOVO MACHADO,NOVO MACHADO
40007,SÃO JOSÉ DO BARREIRO,SAO JOSE DO BARREIRO
2286,PASSA VINTE,PASSA VINTE
191704,PARAÍSO DO SUL,PARAISO DO SUL


In [22]:
print("dfbu.shape", dfbu.shape)
dfbu = dfbu.merge(unique_municipios, on=["NM_MUNICIPIO"], how="left")
print("dfbu.shape", dfbu.shape)
dfbu = dfbu.merge(locais, on=["SG_UF", "NR_ZONA", "NR_SECAO", "NR_LOCAL_VOTACAO"], how="left")
print("dfbu.shape", dfbu.shape)
del dfbu["NM_MUNICIPIO"]

dfbu = dfbu.rename(columns={"NM_MUNICIPIO_clean": "NM_MUNICIPIO"})

dfbu.shape (1850891, 46)
dfbu.shape (1850891, 47)
dfbu.shape (1850891, 48)


In [23]:
dfbu.sample(8)

Unnamed: 0,DT_GERACAO,HH_GERACAO,ANO_ELEICAO,CD_TIPO_ELEICAO,NM_TIPO_ELEICAO,CD_PLEITO,DT_PLEITO,NR_TURNO,CD_ELEICAO,DS_ELEICAO,SG_UF,CD_MUNICIPIO,NR_ZONA,NR_SECAO,NR_LOCAL_VOTACAO,CD_CARGO_PERGUNTA,DS_CARGO_PERGUNTA,NR_PARTIDO,SG_PARTIDO,NM_PARTIDO,DT_BU_RECEBIDO,QT_APTOS,QT_COMPARECIMENTO,QT_ABSTENCOES,CD_TIPO_URNA,DS_TIPO_URNA,CD_TIPO_VOTAVEL,DS_TIPO_VOTAVEL,NR_VOTAVEL,NM_VOTAVEL,QT_VOTOS,NR_URNA_EFETIVADA,CD_CARGA_1_URNA_EFETIVADA,CD_CARGA_2_URNA_EFETIVADA,CD_FLASHCARD_URNA_EFETIVADA,DT_CARGA_URNA_EFETIVADA,DS_CARGO_PERGUNTA_SECAO,DS_AGREGADAS,DT_ABERTURA,DT_ENCERRAMENTO,QT_ELEITORES_BIOMETRIA_NH,DT_EMISSAO_BU,NR_JUNTA_APURADORA,NR_TURMA_APURADORA,modelo_urna,NM_MUNICIPIO,NM_LOCAL_VOTACAO
1812870,31/10/2022,15:51:56,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,SE,31054,2,321,2780,1,Presidente,-1,#NULO#,#NULO#,30/10/2022 17:31:58,362,301,61,1,APURADA,2,Branco,95,Branco,3,2174521,993.221.510.385.527.802.,321.374,A8A10825,23/09/2022 15:47:00,1 - 321,#NULO#,30/10/2022 08:00:01,30/10/2022 17:00:56,10,30/10/2022 17:03:17,-1,-1,2020,ARACAJU,SALESIANO NOSSA SENHORA AUXILIADORA - UNIDADE ...
181348,31/10/2022,15:48:51,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,BA,33138,163,228,1171,1,Presidente,-1,#NULO#,#NULO#,30/10/2022 18:56:13,378,305,73,1,APURADA,3,Nulo,96,Nulo,11,1238327,936.577.132.740.509.312.,570.156,A190A6DA,21/09/2022 10:35:00,1 - 228,#NULO#,30/10/2022 08:25:39,30/10/2022 17:10:04,20,30/10/2022 17:12:55,-1,-1,2010,ALAGOINHAS,CENEC
292730,31/10/2022,15:49:18,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,CE,13730,123,491,1040,1,Presidente,22,PL,Partido Liberal,30/10/2022 18:25:47,337,290,47,1,APURADA,1,Nominal,22,JAIR BOLSONARO,118,2235256,779.733.871.017.288.683.,313.81,B28F044C,27/09/2022 16:40:00,1 - 491,#NULO#,30/10/2022 08:00:01,30/10/2022 17:00:16,5,30/10/2022 17:01:50,-1,-1,2020,CAUCAIA,COLÉGIO ESTADUAL LICEU DE CAUCAIA
1548620,31/10/2022,15:52:42,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,SP,65439,189,142,1023,1,Presidente,-1,#NULO#,#NULO#,30/10/2022 18:39:17,359,266,93,1,APURADA,2,Branco,95,Branco,6,1332450,803.344.394.462.023.254.,720.545,E873B70E,26/09/2022 12:26:00,1 - 142,#NULO#,30/10/2022 08:00:01,30/10/2022 17:04:02,33,30/10/2022 17:06:48,-1,-1,2010,ITANHAEM,EE. DR. JOSÉ CARLOS BRAGA DE SOUZA
24858,31/10/2022,15:36:48,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,AL,27855,33,71,1031,1,Presidente,-1,#NULO#,#NULO#,30/10/2022 18:25:36,251,208,43,1,APURADA,2,Branco,95,Branco,4,2072758,276.881.871.633.051.820.,129.598,B8A8C52A,24/09/2022 11:50:00,1 - 71,#NULO#,30/10/2022 08:00:01,30/10/2022 17:00:49,8,30/10/2022 17:02:54,-1,-1,2020,MACEIO,ESCOLA DE ENSINO FUNDAMENTAL OCTÁVIO BRANDÃO
1762312,31/10/2022,15:52:42,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,SP,64777,393,179,1279,1,Presidente,-1,#NULO#,#NULO#,30/10/2022 19:38:04,370,305,65,1,APURADA,3,Nulo,96,Nulo,12,1804511,817.204.383.757.846.102.,311.234,B997CA6A,24/09/2022 13:33:00,1 - 179,#NULO#,30/10/2022 08:00:01,30/10/2022 17:02:26,17,30/10/2022 17:04:57,-1,-1,2015,GUARULHOS,EE. RECREIO SÃO JORGE
715558,31/10/2022,15:49:55,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,MG,41238,331,162,1171,1,Presidente,22,PL,Partido Liberal,30/10/2022 19:26:06,465,392,73,1,APURADA,1,Nominal,22,JAIR BOLSONARO,193,2089928,067.865.795.006.334.556.,100.018,ED6E094E,17/09/2022 11:46:00,1 - 162,#NULO#,30/10/2022 08:00:01,30/10/2022 17:01:37,11,30/10/2022 17:04:32,-1,-1,2020,BELO HORIZONTE,ESCOLA ESTADUAL PROFESSOR HILTON ROCHA
879680,31/10/2022,15:50:55,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,PR,79278,75,235,1481,1,Presidente,13,PT,Partido dos Trabalhadores,30/10/2022 18:09:44,289,255,34,1,APURADA,1,Nominal,13,LULA,100,1610923,012.205.017.536.406.395.,824.261,17D9322C,26/09/2022 13:53:00,1 - 235,#NULO#,30/10/2022 08:00:01,30/10/2022 17:02:35,16,30/10/2022 17:04:24,-1,-1,2011,TOLEDO,ESCOLA MUNICIPAL PROFESSOR HENRIQUE BROD


In [24]:
dfbu["NR_TURNO"].value_counts()

2    1850891
Name: NR_TURNO, dtype: int64

In [25]:
print(FILEPATH_PROC_TSE_BOLETINS_DE_URNA_2T)
dfbu.to_csv(FILEPATH_PROC_TSE_BOLETINS_DE_URNA_2T, sep=";", index=False)

data/processed/bu_presidente_2t.csv


In [26]:
dfbu.sample(4)

Unnamed: 0,DT_GERACAO,HH_GERACAO,ANO_ELEICAO,CD_TIPO_ELEICAO,NM_TIPO_ELEICAO,CD_PLEITO,DT_PLEITO,NR_TURNO,CD_ELEICAO,DS_ELEICAO,SG_UF,CD_MUNICIPIO,NR_ZONA,NR_SECAO,NR_LOCAL_VOTACAO,CD_CARGO_PERGUNTA,DS_CARGO_PERGUNTA,NR_PARTIDO,SG_PARTIDO,NM_PARTIDO,DT_BU_RECEBIDO,QT_APTOS,QT_COMPARECIMENTO,QT_ABSTENCOES,CD_TIPO_URNA,DS_TIPO_URNA,CD_TIPO_VOTAVEL,DS_TIPO_VOTAVEL,NR_VOTAVEL,NM_VOTAVEL,QT_VOTOS,NR_URNA_EFETIVADA,CD_CARGA_1_URNA_EFETIVADA,CD_CARGA_2_URNA_EFETIVADA,CD_FLASHCARD_URNA_EFETIVADA,DT_CARGA_URNA_EFETIVADA,DS_CARGO_PERGUNTA_SECAO,DS_AGREGADAS,DT_ABERTURA,DT_ENCERRAMENTO,QT_ELEITORES_BIOMETRIA_NH,DT_EMISSAO_BU,NR_JUNTA_APURADORA,NR_TURMA_APURADORA,modelo_urna,NM_MUNICIPIO,NM_LOCAL_VOTACAO
547843,31/10/2022,15:49:55,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,MG,41238,29,229,1406,1,Presidente,-1,#NULO#,#NULO#,30/10/2022 19:20:00,347,290,57,1,APURADA,3,Nulo,96,Nulo,10,2080741,461.859.775.458.398.255.,69.942,E0D0C205,17/09/2022 10:34:00,1 - 229,#NULO#,30/10/2022 08:00:01,30/10/2022 17:01:37,6,30/10/2022 17:03:02,-1,-1,2020,BELO HORIZONTE,CENTRO EDUCACIONAL MINEIRO
29559,31/10/2022,15:36:48,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,AL,27278,47,10,1031,1,Presidente,22,PL,Partido Liberal,30/10/2022 18:28:41,393,297,96,1,APURADA,1,Nominal,22,JAIR BOLSONARO,86,1671905,853.390.923.057.689.097.,903.226,46BB24B1,19/09/2022 14:38:00,1 - 10,#NULO#,30/10/2022 08:00:01,30/10/2022 17:02:47,19,30/10/2022 17:04:29,-1,-1,2013,CAMPO ALEGRE,ESCOLA DOM CONSTANTINO LÜERS
295207,31/10/2022,15:49:23,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,DF,97012,2,430,1600,1,Presidente,22,PL,Partido Liberal,30/10/2022 18:26:58,398,304,94,1,APURADA,1,Nominal,22,JAIR BOLSONARO,146,2027045,221.344.791.448.801.664.,593.971,D454D32C,26/09/2022 11:01:00,1 - 430,#NULO#,30/10/2022 08:00:01,30/10/2022 17:04:58,6,30/10/2022 17:06:50,-1,-1,2020,BRASILIA,ESCOLA CLASSE 02 DO ITAPOÃ
510443,31/10/2022,15:50:08,2022,0,Eleição Ordinária,407,30/10/2022,2,545,Eleição Geral Federal 2022,MS,91430,11,92,1082,1,Presidente,-1,#NULO#,#NULO#,30/10/2022 17:45:39,304,224,80,1,APURADA,2,Branco,95,Branco,3,1235183,602.978.458.353.350.351.,446.434,0BD22BC6,22/09/2022 11:26:00,1 - 92,#NULO#,30/10/2022 07:00:01,30/10/2022 16:00:46,42,30/10/2022 16:02:31,-1,-1,2010,RIO BRILHANTE,ESCOLA MUNICIPAL RIO BRILHANTE


# Validação

In [27]:
# SAO PAULO
# Jair Bolsonaro: 14.216.587 (55,24%) 
# Lula 11.519.882 (44,76%)

# PARAIBA
# Lula (PT): 66,62% dos votos válidos (1.601.953 votos)
# Jair Bolsonaro (PL): 33,38% dos votos válidos (802.502 votos)

# BRASIL
# Lula 60.345.999 votos (50,90% dos votos válidos)
# Jair Bolsonaro 58.206.354 votos (49,10% dos votos válidos).

In [28]:
print("Brasil")
dfbu.groupby("NM_VOTAVEL")["QT_VOTOS"].sum().sort_values(ascending=False).to_frame("qtd_votos")

Brasil


Unnamed: 0_level_0,qtd_votos
NM_VOTAVEL,Unnamed: 1_level_1
LULA,60345999
JAIR BOLSONARO,58206354
Nulo,3930765
Branco,1769678


In [29]:
print("São Paulo")
dfbu[dfbu["SG_UF"] == "SP"].groupby("NM_VOTAVEL")["QT_VOTOS"].sum().sort_values(ascending=False).to_frame("qtd_votos")

São Paulo


Unnamed: 0_level_0,qtd_votos
NM_VOTAVEL,Unnamed: 1_level_1
JAIR BOLSONARO,14216587
LULA,11519882
Nulo,1117345
Branco,526677


In [30]:
stats.freq(dfbu["modelo_urna"])

Unnamed: 0,Absoluto,Relativo
2020,761555,0.411453
2010,365968,0.197725
2015,334332,0.180633
2009,188074,0.101613
2011,104719,0.056578
2013,96243,0.051998
