In [10]:
import pandas as pd
import itertools
import networkx as nx

In [11]:
# Import df
def import_df(date):
    votes_df = pd.read_csv(f"datasets/votacoesVotos-{date}.csv", delimiter=";")
    orientation_df = pd.read_csv(f"datasets/votacoesOrientacoes-{date}.csv", delimiter=";")
    votes_df = votes_df[['idVotacao', 'voto',
                         'deputado_id', 'deputado_nome',
                         'deputado_siglaPartido']]
    orientation_df = orientation_df[['idVotacao',
                                     'siglaBancada',
                                     'orientacao']]
    votes_df = votes_df.rename(columns={"deputado_siglaPartido": "siglaBancada"})
    #free_orientation_df = orientation_df.query('orientacao=="Sim" | orientacao=="Não"')
    free_orientation_df = orientation_df.query('orientacao=="Liberado"')
    return votes_df, free_orientation_df

In [12]:
def merge_dfs(votes_df, free_orientation_df):
    return pd.merge(free_orientation_df,
             votes_df,
             on=["idVotacao", "siglaBancada"],
             how="inner")

In [13]:
def group_by_df(vo_df):
    return vo_df.groupby(by=["deputado_id", "deputado_nome"]).agg({'idVotacao': lambda x: ','.join(x),
                                                                            'voto': lambda x: ','.join(x),
                                                                            #'deputado_siglaPartido': 'max',
                                                                            #'deputado_nome': 'max'
                                                                       }).reset_index()

In [14]:
def get_dep_votes(grouped_vo_df):
    dep_votes = dict()
    for index, row in grouped_vo_df.iterrows():
        votes = dict(zip(row["idVotacao"].split(","), row["voto"].split(",")))
        dep_votes[row["deputado_id"]] = votes
    return dep_votes

def combine_deps(dep_votes):
    return list(itertools.combinations(dep_votes.keys(), 2))


In [15]:
def calculate_corr(dep_combinations, dep_votes):
    corr_dict = dict()
    for deps in dep_combinations:
        shared_items = dict()
        same_propositions = list()
        for k in dep_votes[deps[0]]:
            if k in dep_votes[deps[1]]:
                same_propositions.append(k)
                if dep_votes[deps[0]][k] == dep_votes[deps[1]][k]:
                    shared_items[k] = dep_votes[deps[0]][k]
        if len(same_propositions) == 0:
            corr_dict[deps] = 0
        else:
            #corr_dict[deps]= len(shared_items)/len(same_propositions)
            corr_dict[deps]= len(shared_items)
    return corr_dict

def sort_corr(corr_dict):
    return sorted(corr_dict.items(), key=lambda x: x[1], reverse=True)


In [16]:
def process():
    all_dates_dict = dict()
    for vote_date in ['2021','2019','2020']:
        votes_df, free_orientation_df = import_df(vote_date)
        vo_df = merge_dfs(votes_df, free_orientation_df)
        grouped_vo_df = group_by_df(vo_df)
        grouped_vo_df.head(5)
        dep_votes = get_dep_votes(grouped_vo_df)
        dep_combinations = combine_deps(dep_votes)
        corr_dict = calculate_corr(dep_combinations, dep_votes)
        all_dates_dict = {**all_dates_dict, **corr_dict}
    return all_dates_dict

all_dates_dict = process()
all_dates_dict_sorted = sort_corr(all_dates_dict)

In [17]:
G=nx.Graph()
for edge in all_dates_dict_sorted:
    if edge[1] > 0:
        G.add_edge(edge[0][0], edge[0][1], weight=edge[1])

In [18]:
nx.write_edgelist(G, "networks/vote19-20-21-liberado-not-normalized.txt", delimiter=' ', data=['weight', "party"])
