In [2]:
import os
import pickle
import pandas as pd
import numpy as np
import networkx as nx
import igraph as ig
import leidenalg as la

In [8]:
# 1) Load the main DataFrame
print("Loading df_merged ...")
df_merged = pd.read_csv("../data/processed_Data/df_merged.csv")

# Make a copy so we don't overwrite the original
df_merged_communities = df_merged.copy()

# Identify all legislatures
all_legislatures = df_merged_communities["deputado_idlegislatura"].unique()
all_legislatures = sorted(all_legislatures)
print(f"Total legislatures found: {len(all_legislatures)}")

# Path where the legislature graphs were saved
graphs_path = "data/graphs"

Loading df_merged ...
Total legislatures found: 6


In [9]:
for i, leg in enumerate(all_legislatures, start=1):
    print(f"\nProcessing legislature {leg} ({i}/{len(all_legislatures)})...")
    
    # Expected file for the saved graph
    graph_filename = os.path.join(graphs_path, f"graph_legislature_{leg}.gpickle")
    if not os.path.exists(graph_filename):
        print(f"  -> Graph file not found: {graph_filename}. Skipping.")
        continue
    
    # 2) Load the NetworkX graph
    with open(graph_filename, "rb") as f:
        G_nx = pickle.load(f)
    
    if len(G_nx.nodes) == 0:
        print("  -> Graph is empty. Skipping community detection.")
        continue
    
    # 3) Convert NetworkX -> igraph
    node_list = list(G_nx.nodes())
    index_of = {node: idx for idx, node in enumerate(node_list)}
    edges = []
    weights = []
    
    for u, v, d in G_nx.edges(data=True):
        edges.append((index_of[u], index_of[v]))  # undirected
        weights.append(d.get("weight", 1.0))      # default weight=1 if missing
    
    # Build an igraph Graph
    g_ig = ig.Graph(n=len(node_list), edges=edges, directed=False)
    g_ig.es["weight"] = weights
    
    # 4) Detect communities with the Leiden algorithm
    print(f"  -> Running Leiden on {len(node_list)} nodes, {len(edges)} edges...")
    # Here we use ModularityVertexPartition; other partitions (e.g. RBConfiguration) are possible
    partition = la.find_partition(
        g_ig,
        la.ModularityVertexPartition,
        weights=g_ig.es["weight"]
    )
    
    membership = partition.membership  # community index for each node
    n_communities = max(membership) + 1
    print(f"  -> Found {n_communities} communities.")
    
    # 5) Create a small DataFrame linking (legislature, deputy_id) -> community
    col_name = f"community_leg_{leg}"
    temp_df = pd.DataFrame({
        "deputado_idlegislatura": [leg]*len(node_list),
        "deputado_id": node_list,
        col_name: membership
    })
    
    # 6) Merge this back into df_merged_communities so *every row* with the same 
    #    (legislature, deputy_id) gets the same community ID
    df_merged_communities = df_merged_communities.merge(
        temp_df, 
        on=["deputado_idlegislatura", "deputado_id"], 
        how="left"
    )
    
    print(f"  -> Merged communities for legislature {leg} into the main DataFrame.")

print("\nAll legislatures processed.")
print("Final DataFrame shape:", df_merged_communities.shape)
print("Columns:", df_merged_communities.columns)


Processing legislature 52 (1/6)...
  -> Graph file not found: data/graphs\graph_legislature_52.gpickle. Skipping.

Processing legislature 53 (2/6)...
  -> Graph file not found: data/graphs\graph_legislature_53.gpickle. Skipping.

Processing legislature 54 (3/6)...
  -> Graph file not found: data/graphs\graph_legislature_54.gpickle. Skipping.

Processing legislature 55 (4/6)...
  -> Graph file not found: data/graphs\graph_legislature_55.gpickle. Skipping.

Processing legislature 56 (5/6)...
  -> Graph file not found: data/graphs\graph_legislature_56.gpickle. Skipping.

Processing legislature 57 (6/6)...
  -> Graph file not found: data/graphs\graph_legislature_57.gpickle. Skipping.

All legislatures processed.
Final DataFrame shape: (587443, 13)
Columns: Index(['idvotacao', 'datahoravoto', 'voto', 'deputado_id', 'deputado_nome',
       'deputado_siglapartido', 'deputado_siglauf', 'deputado_idlegislatura',
       'aprovacao', 'year', 'proposicao_id', 'proposicao_ano',
       'classificat

In [10]:
# 7) (Optional) Save the augmented DataFrame
output_csv = "../data/processed_Data/df_merged_with_leiden_communities.csv"
df_merged_communities.to_csv(output_csv, index=False)
print(f"\nSaved updated DataFrame to {output_csv}.")


Saved updated DataFrame to ../data/processed_Data/df_merged_with_leiden_communities.csv.


In [11]:
df_merged_communities

Unnamed: 0,idvotacao,datahoravoto,voto,deputado_id,deputado_nome,deputado_siglapartido,deputado_siglauf,deputado_idlegislatura,aprovacao,year,proposicao_id,proposicao_ano,classification
0,143282-5,2003-11-18T19:02:33,Não,73764,Abelardo Lupion,PFL,PR,52,1,2003,26694.0,2001.0,Approval of Requests
1,143282-5,2003-11-18T19:09:55,Sim,73886,Adão Pretto,PT,RS,52,1,2003,26694.0,2001.0,Approval of Requests
2,143282-5,2003-11-18T18:58:01,Não,73765,Affonso Camargo,PSDB,PR,52,1,2003,26694.0,2001.0,Approval of Requests
3,143282-5,2003-11-18T18:57:59,Sim,74358,Agnaldo Muniz,PPS,RO,52,1,2003,26694.0,2001.0,Approval of Requests
4,143282-5,2003-11-18T19:04:07,Sim,73457,Airton Roveda,PMDB,PR,52,1,2003,26694.0,2001.0,Approval of Requests
...,...,...,...,...,...,...,...,...,...,...,...,...,...
587438,565397-185,2024-10-16T19:58:10,Sim,220536,Zé Haroldo Cathedral,PSD,RR,57,0,2024,565397.0,2013.0,Approval of Substitutes
587439,565397-185,2024-10-16T20:00:02,Não,160632,Zé Silva,SOLIDARIEDADE,MG,57,0,2024,565397.0,2013.0,Approval of Substitutes
587440,565397-185,2024-10-16T19:59:56,Não,204517,Zé Vitor,PL,MG,57,0,2024,565397.0,2013.0,Approval of Substitutes
587441,565397-185,2024-10-16T19:59:29,Não,220592,Zezinho Barbary,PP,AC,57,0,2024,565397.0,2013.0,Approval of Substitutes
