In [1]:
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import os

In [2]:
G = nx.DiGraph() # Grafo direcionado

In [3]:
data = "twitter"

for file in os.listdir(data):
    if file.endswith(".edges"):
        node_id = file.split(".")[0] # Node Id do nome do arquivo
        file_path = os.path.join(data, file)

        with open(file_path, "r") as f:
            for line in f:
                connection_node = line.strip()
                if connection_node:
                    G.add_edge(node_id, connection_node) # Adiciona a aresta ao grafo entre o nó e o nó de conexão

In [4]:
print(f"Grafo criado com {G.number_of_nodes()} nós e {G.number_of_edges()} arestas.")

Grafo criado com 1668858 nós e 2286909 arestas.


In [18]:
node_features = {}
node_circles = {}
feature_names = {}

In [17]:
## Processando as comunidades
for file in os.listdir(data):
    if file.endswith(".circles"):
        node_id = file.split(".")[0]
        file_path = os.path.join(data, file)

        with open(file_path, "r") as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) > 1:
                    circle = parts[0]
                    members = parts[1:]
                    for member in members:
                        if member not in node_circles.keys():
                            # Adiciona o nó à lista de círculos
                            # e inicializa a lista de círculos
                            node_circles[member] = []
                        node_circles[member].append(circle)

In [8]:
## Processando as características .feat
for file in os.listdir(data):
    if file.endswith(".feat"):
        node_id = file.split(".")[0]
        file_path = os.path.join(data, file)

        with open(file_path, "r") as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) > 1:
                    user = parts[0]
                    features = list(map(int, parts[1:]))
                    node_features[user] = features
        
    # Processar os arquivos .egofeat (adicionar características ao nó ego)
    ego_file = os.path.join(data, f"{node_id}.egofeat")
    if os.path.exists(ego_file):
        with open(ego_file, "r") as f:
            ego_features = list(map(int, f.readline().strip().split()))
            node_features[node_id] = ego_features
                

In [9]:
# 4. Processar os nomes das características (.featnames)
for file in os.listdir(data):
    if file.endswith(".featnames"):
        file_path = os.path.join(data, file)

        with open(file_path, "r") as f:
            for line in f:
                index, name = line.strip().split(" ", 1)
                feature_names[int(index)] = name

In [10]:
# 5. Adicionar atributos ao grafo
print("Adicionando atributos aos nós...")
for node in G.nodes():
    # Convertendo círculos e características para strings para compatibilidade com GEXF
    G.nodes[node]["circles"] = ",".join(node_circles.get(node, []))  # Adiciona as comunidades
    G.nodes[node]["features"] = ",".join(map(str, node_features.get(node, [])))  # Adiciona características

print(f"Grafo finalizado com {G.number_of_nodes()} nós e {G.number_of_edges()} arestas.")



In [6]:
import random

def sample_graph_by_edges(G, num_edges=2000):
    """Seleciona um subconjunto aleatório de arestas e mantém os nós conectados a elas."""
    sampled_edges = random.sample(list(G.edges()), min(num_edges, len(G.edges())))
    
    # Criar um subgrafo apenas com as arestas escolhidas
    G_sample = nx.Graph()
    G_sample.add_edges_from(sampled_edges)

    return G_sample

# Defina quantas arestas deseja manter
num_edges_to_keep = 1000
G_sample = sample_graph_by_edges(G, num_edges=num_edges_to_keep)

nx.write_gexf(G_sample, "sampled_graph_edges.gexf")
print(f"Grafo amostrado com {G_sample.number_of_nodes()} nós e {G_sample.number_of_edges()} arestas.")


Grafo amostrado com 1507 nós e 1000 arestas.


In [7]:
# Detecção de comunidade com louvain ou girvan-newman
grupos = nx.community.girvan_newman(G_sample)

In [None]:
# Modularity -> measures the strength of division of a network into modules
modularity_df = pd.DataFrame(
    [
        [k + 1, nx.community.modularity(G, grupos[k])]
        for k in range(len(grupos))
    ],
    columns=["k", "modularity"],
)


# function to create node colour list
def create_community_node_colors(graph, grupos):
    number_of_colors = len(grupos)
    colors = ["#D4FCB1", "#CDC5FC", "#FFC2C4", "#F2D140", "#BCC6C8"][:number_of_colors]
    node_colors = []
    for node in graph:
        current_community_index = 0
        for community in grupos:
            if node in community:
                node_colors.append(colors[current_community_index])
                break
            current_community_index += 1
    return node_colors


# function to plot graph with node colouring based on grupos
def visualize_communities(graph, grupos, i):
    node_colors = create_community_node_colors(graph, grupos)
    modularity = round(nx.community.modularity(graph, grupos), 6)
    title = f"Community Visualization of {len(grupos)} grupos with modularity of {modularity}"
    pos = nx.spring_layout(graph, k=0.3, iterations=50, seed=2)
    plt.subplot(3, 1, i)
    plt.title(title)
    nx.draw(
        graph,
        pos=pos,
        node_size=1000,
        node_color=node_colors,
        with_labels=True,
        font_size=20,
        font_color="black",
    )


fig, ax = plt.subplots(3, figsize=(15, 20))

# Plot graph with colouring based on grupos
visualize_communities(G, grupos[0], 1)
visualize_communities(G, grupos[3], 2)

# Plot change in modularity as the important edges are removed
modularity_df.plot.bar(
    x="k",
    ax=ax[2],
    color="#F2D140",
    title="Modularity Trend for Girvan-Newman Community Detection",
)
plt.show()