In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
dir_ = 'votacao/'

file_name = 'camara_municipal_sp_sessoes_12_20.csv'

file_dir = dir_ + file_name

In [3]:
df = pd.read_csv(file_dir)

In [12]:
from netpixi.integration.gt import *
from regression.integration.gt import *
import math
from graph_tool import centrality, clustering

In [18]:
# Caminho para as pastas de read e save"
dir_save = "networks/"

# Nome do dataset
file_save_name = "vereadores"

# Extensões dos arquivos 
file_save_extension = ".net.gz"

##DATA
data_dir = "data/"

In [20]:
def model_cut(df, y1, y2):
    df_weighted = df[df['id_parlamentar'].notna() & df['voto'].notna() & df['id_votacao'].notna()].copy()
    df_weighted.loc[df_weighted['voto'] == 'Sim', 'voto'] = '1'
    df_weighted.loc[df_weighted['voto'] == 'Não', 'voto'] = '2'
    df_weighted.loc[df_weighted['voto'] == 'Abstenção', 'voto'] = '0'
    
    df_clear_idx = list(x.isdigit() for x in df_weighted['voto'])
    df_clear = df_weighted[df_clear_idx].copy()
    df_clear.loc[df_clear['voto'] == '2', 'voto'] = '-1'
    df_clear = df_clear.astype({'voto': 'int64'})
    
    df_clear["ano"] = df_clear.apply(lambda row: int(row["data"][-4:]), axis=1)
    idx = ((df_clear["ano"] >= y1) & (df_clear["ano"] <= y2))
    df_corte = df_clear[idx].copy()
    
    cte = int(1e5)
    df_corte['id_votacao'] = df_corte['id_votacao'] + cte
        
    vereadores = np.unique(df_clear['id_parlamentar'])
    votacoes = np.unique(df_clear['id_votacao'])

    n_vereadores = len(vereadores)
    n_votacoes = len(votacoes)
    
    vereadores_votos = []
    for i in range(n_vereadores):
        votes = df_clear[df_clear['id_parlamentar'] == vereadores[i]][['id_votacao', 'voto']]
        vereadores_votos.append(votes)
    vereadores_votos[0]
    
    vereadores_corr = np.zeros(shape=(n_vereadores, n_vereadores))
    list_vereadores_corr = []

    for i in range(n_vereadores):
        v1 = vereadores_votos[i]
        for j in range(i+1, n_vereadores - 1):
            v2 = vereadores_votos[j]
            corrs = (
                    v1.merge(v2,how='inner', on = 'id_votacao')
                    .drop(['id_votacao'], axis = 1)
                    .corr(numeric_only = False)
                   )
            corr = corrs['voto_x']['voto_y']
            if math.isnan(corr):
                corr = 0
            vereadores_corr[i][j] = corr
            list_vereadores_corr.append(corr)
    
    cut_point = np.median(list_vereadores_corr)
    
    g = Graph(directed=False)
    
    for i in range(n_vereadores):
        g.add_vertex(int(vereadores[i]))

    for i in range(n_vereadores):    
        for j in range(i+1, n_vereadores - 1):
            corr = vereadores_corr[i][j]
            if corr > cut_point:
                g.add_edge(int(vereadores[i]), int(vereadores[j]))
    
    path_save = dir_save + file_save_name + "_" + str(y1) + "_" + str(y2) + file_save_extension
    
    gt_save(g, path_save)
    
    g_betweenness = gt_load(path_save)
    bc, _ = centrality.betweenness(g_betweenness)
    g_betweenness.add_vp('betweenness', bc)
    
    g_cluster = gt_load(path_save)
    lc =  clustering.local_clustering(g_cluster)
    g_cluster.add_vp('clustering', lc)
    
    df_parlamentar = df_corte.drop_duplicates(subset = "id_parlamentar").copy()
    df_tam = (
        df_parlamentar[["partido", "id_parlamentar"]]
        .groupby("partido").count().copy()
        .rename(columns = {"id_parlamentar": "parlamentares"})
    )
    
    df_rep = (
        df_corte.merge(df_tam, on = "partido", how = "inner") 
        .copy()[["id_parlamentar","parlamentares"]]
        .drop_duplicates("id_parlamentar")
        .rename(columns = {"id_parlamentar": "id"})
    )
    
    data_cluster = gt_data(g_betweenness)
    data_bet = gt_data(g_cluster)
    data = (
        data_bet
        .merge(data_cluster, how='inner', on = 'id')
        .merge(df_rep, on="id", how = "inner")
    )
    
    data_networks_path = data_dir + file_save_name + "_" + str(y1) + "_" + str(y2) + ".csv"
    
    data.to_csv(data_networks_path)
    
    return data

In [21]:
model_cut(df, 2013, 2016)

Unnamed: 0,id,clustering,betweenness,parlamentares
0,132,0.813665,0.002178,2
1,155,0.751308,0.006335,13
2,176,0.835821,0.001720,7
3,179,0.786318,0.003198,2
4,187,0.621311,0.007907,11
...,...,...,...,...
69,2108,0.974490,0.000096,7
70,2127,0.962353,0.000160,6
71,2129,0.768298,0.003209,2
72,2131,0.911348,0.001294,2
