In [2]:
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import os

In [3]:
G = nx.DiGraph() # Grafo direcionado

In [4]:
data = "twitter"

for file in os.listdir(data):
    if file.endswith(".edges"):
        node_id = file.split(".")[0] # Node Id do nome do arquivo
        file_path = os.path.join(data, file)

        with open(file_path, "r") as f:
            for line in f:
                connection_node = line.strip()
                if connection_node:
                    G.add_edge(node_id, connection_node) # Adiciona a aresta ao grafo entre o nó e o nó de conexão

In [None]:
print(f"Grafo criado com {G.number_of_nodes()} nós e {G.number_of_edges()} arestas.")



In [6]:
from collections import defaultdict

node_features = {}
node_circles = defaultdict(list)
feature_names = {}

In [7]:
## Processando as comunidades
for file in os.listdir(data):
    if file.endswith(".circles"):
        node_id = file.split(".")[0]
        file_path = os.path.join(data, file)

        with open(file_path, "r") as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) > 1:
                    circle = parts[0]
                    members = parts[1:]
                    for member in members:
                        node_circles[member].append(circle)

In [8]:
## Processando as características .feat
for file in os.listdir(data):
    if file.endswith(".feat"):
        node_id = file.split(".")[0]
        file_path = os.path.join(data, file)

        with open(file_path, "r") as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) > 1:
                    user = parts[0]
                    features = list(map(int, parts[1:]))
                    node_features[user] = features
        
    # Processar os arquivos .egofeat (adicionar características ao nó ego)
    ego_file = os.path.join(data, f"{node_id}.egofeat")
    if os.path.exists(ego_file):
        with open(ego_file, "r") as f:
            ego_features = list(map(int, f.readline().strip().split()))
            node_features[node_id] = ego_features
                

In [9]:
# 4. Processar os nomes das características (.featnames)
for file in os.listdir(data):
    if file.endswith(".featnames"):
        file_path = os.path.join(data, file)

        with open(file_path, "r") as f:
            for line in f:
                index, name = line.strip().split(" ", 1)
                feature_names[int(index)] = name

In [10]:
# 5. Adicionar atributos ao grafo
print("Adicionando atributos aos nós...")
for node in G.nodes():
    # Convertendo círculos e características para strings para compatibilidade com GEXF
    G.nodes[node]["circles"] = ",".join(node_circles.get(node, []))  # Adiciona as comunidades
    G.nodes[node]["features"] = ",".join(map(str, node_features.get(node, [])))  # Adiciona características

print(f"Grafo finalizado com {G.number_of_nodes()} nós e {G.number_of_edges()} arestas.")



In [28]:
import random

def sample_graph_by_edges(G, num_edges=2000):
    """Seleciona um subconjunto aleatório de arestas e mantém os nós conectados a elas."""
    sampled_edges = random.sample(list(G.edges()), min(num_edges, len(G.edges())))
    
    # Criar um subgrafo apenas com as arestas escolhidas
    G_sample = nx.Graph()
    G_sample.add_edges_from(sampled_edges)

    return G_sample

# Defina quantas arestas deseja manter
num_edges_to_keep = 80000
G_sample = sample_graph_by_edges(G, num_edges=num_edges_to_keep)

nx.write_gexf(G_sample, "sampled_graph_edges.gexf")
print(f"Grafo amostrado com {G_sample.number_of_nodes()} nós e {G_sample.number_of_edges()} arestas.")


