In [52]:
import pandas as pd
import itertools

In [53]:
# Import df
votes_df = pd.read_csv("datasets/votacoesVotos-2021.csv", delimiter=";")
orientation_df = pd.read_csv("datasets/votacoesOrientacoes-2021.csv", delimiter=";")
votes_df = votes_df[['idVotacao', 'voto',
                     'deputado_id', 'deputado_nome',
                     'deputado_siglaPartido']]
orientation_df = orientation_df[['idVotacao',
                                 'siglaBancada',
                                 'orientacao']]

In [54]:
votes_df = votes_df.rename(columns={"deputado_siglaPartido": "siglaBancada"})
votes_df.head(5)

Unnamed: 0,idVotacao,voto,deputado_id,deputado_nome,siglaBancada
0,14666-138,Sim,204545,Alê Silva,PSL
1,14666-138,Sim,73696,Angela Amin,PP
2,14666-138,Sim,160512,Aureo Ribeiro,SOLIDARIEDADE
3,14666-138,Sim,204374,Bia Kicis,PSL
4,14666-138,Sim,141513,Bilac Pinto,DEM


In [55]:
free_orientation_df = orientation_df.query('orientacao=="Liberado"')
free_orientation_df.info()
free_orientation_df.head(5)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 345 entries, 163 to 16242
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   idVotacao     345 non-null    object
 1   siglaBancada  345 non-null    object
 2   orientacao    345 non-null    object
dtypes: object(3)
memory usage: 10.8+ KB


Unnamed: 0,idVotacao,siglaBancada,orientacao
163,2299545-8,Oposição,Liberado
280,2273453-69,Minoria,Liberado
290,2273453-69,GOV.,Liberado
302,2273453-69,Oposição,Liberado
318,2273453-64,Minoria,Liberado


In [56]:
vo_df = pd.merge(free_orientation_df,
         votes_df,
         on=["idVotacao", "siglaBancada"],
         how="inner")
vo_df.head(5)

Unnamed: 0,idVotacao,siglaBancada,orientacao,voto,deputado_id,deputado_nome
0,2292163-167,PSD,Liberado,Não,74471,André de Paula
1,2292163-167,PSD,Liberado,Não,160553,Antonio Brito
2,2292163-167,PSD,Liberado,Sim,204504,Cezinha de Madureira
3,2292163-167,PSD,Liberado,Sim,205476,Charles Fernandes
4,2292163-167,PSD,Liberado,Sim,116379,Darci de Matos


In [57]:
grouped_vo_df = vo_df.groupby(by=["deputado_id", "deputado_nome"]).agg({'idVotacao': lambda x: ','.join(x),
                                                                        'voto': lambda x: ','.join(x),
                                                                        #'deputado_id': 'max',
                                                                        #'deputado_nome': 'max'
                                                                        }).reset_index()
grouped_vo_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 447 entries, 0 to 446
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   deputado_id    447 non-null    int64 
 1   deputado_nome  447 non-null    object
 2   idVotacao      447 non-null    object
 3   voto           447 non-null    object
dtypes: int64(1), object(3)
memory usage: 14.1+ KB


In [58]:
grouped_vo_df.head(5)

Unnamed: 0,deputado_id,deputado_nome,idVotacao,voto
0,62881,Danilo Forte,"2292163-104,2292163-103,2292163-97,2292163-88,...","Não,Sim,Sim,Sim,Não,Sim,Sim"
1,66179,Norma Ayub,"2288389-105,2288389-78,2271124-123,2241695-65,...","Não,Sim,Não,Sim,Não,Sim"
2,66828,Fausto Pinato,"2220292-229,2241695-65","Não,Sim"
3,67138,Iracema Portella,"2220292-229,2241695-65","Sim,Sim"
4,68720,Fábio Henrique,"2288389-146,2266116-87,2266116-83","Não,Sim,Não"


In [59]:
dep_votes = dict()
for index, row in grouped_vo_df.iterrows():
    votes = dict(zip(row["idVotacao"].split(","), row["voto"].split(",")))
    dep_votes[row["deputado_id"]] = votes

dep_combinations = list(itertools.combinations(dep_votes.keys(), 2))
corr_dict = dict()
for deps in dep_combinations:
  shared_items = {k: dep_votes[deps[0]][k] for k in dep_votes[deps[0]] if k in dep_votes[deps[1]] and dep_votes[deps[0]][k] == dep_votes[deps[1]][k]}
  corr_dict[deps]= len(shared_items)

aa_sorted = sorted(corr_dict.items(), key=lambda x: x[1], reverse=True)

[((204352, 204561), 15), ((204389, 204394), 15), ((139285, 204352), 14), ((139285, 204561), 14), ((141508, 204352), 14), ((141508, 204525), 14), ((141508, 204561), 14), ((204352, 204389), 14), ((204352, 204394), 14), ((204389, 204525), 14), ((204389, 204561), 14), ((204394, 204525), 14), ((204394, 204561), 14), ((139285, 141508), 13), ((139285, 204389), 13), ((139285, 204394), 13), ((141508, 204389), 13), ((141508, 204394), 13), ((178922, 178927), 13), ((178922, 204352), 13), ((178922, 204561), 13), ((204352, 204525), 13), ((204371, 204558), 13), ((204389, 204530), 13), ((204394, 204530), 13), ((204483, 204558), 13), ((204525, 204561), 13), ((73586, 204389), 12), ((73586, 204394), 12), ((73586, 204530), 12), ((73586, 213274), 12), ((74262, 178990), 12), ((139285, 178922), 12), ((139285, 204525), 12), ((141508, 178922), 12), ((178922, 204389), 12), ((178922, 204394), 12), ((178922, 204530), 12), ((178922, 206231), 12), ((178927, 204389), 12), ((178927, 204394), 12), ((178927, 204530), 1