In [4]:
import networkx as nx
import community as community_louvain
import torch
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# 读取 GraphML 文件
graph = nx.read_graphml("../../data/trans_graph.graphml")

In [5]:
# 读取节点特征
node_features = torch.load("../../data/node_basic_features.pt")
node_features = node_features.numpy()  # 转换为 NumPy 数组

# 读取地址数据
addresses = pd.read_csv("../../original_data/每个地址被标记wash的次数.csv", header=None)
addresses = addresses[0].dropna().unique().tolist()  # 转换为列表

In [6]:
# 创建地址到索引的映射
address_to_index = {address: idx for idx, address in enumerate(addresses)}

# 计算边的余弦相似度并更新边权重
for u, v, data in graph.edges(data=True):
    if u in address_to_index and v in address_to_index:  # 检查节点是否在地址列表中
        u_index = address_to_index[u]
        v_index = address_to_index[v]
        u_feature = node_features[u_index].reshape(1, -1)
        v_feature = node_features[v_index].reshape(1, -1)
        similarity = cosine_similarity(u_feature, v_feature)[0][0]
        data['weight'] = similarity
    else:
        data['weight'] = 0.0  # 如果地址不在列表中，设置默认权重

In [7]:
# 设置进行社区检测的次数
num_iterations = 3

best_partition = None
best_modularity = float('-inf')

for i in range(num_iterations):
    print(f"Performing Louvain community detection iteration {i + 1}/{num_iterations}...")

    # Louvain 社区检测
    partition = community_louvain.best_partition(graph, weight='weight')

    # 计算模块度
    modularity = community_louvain.modularity(partition, graph, weight='weight')
    print(f"Iteration {i + 1} Modularity: {modularity}")

    # 检查是否为目前最好的划分
    if modularity > best_modularity:
        best_modularity = modularity
        best_partition = partition


Performing Louvain community detection iteration 1/3...
Iteration 1 Modularity: 0.31035410844479255
Performing Louvain community detection iteration 2/3...
Iteration 2 Modularity: 0.29401018422858094
Performing Louvain community detection iteration 3/3...
Iteration 3 Modularity: 0.3015931545419888


In [14]:
# 将节点分配到社区
communities = {}
for node, community in best_partition.items():
    if community not in communities:
        communities[community] = []
    communities[community].append(node)

# 将社区转换为列表
community_list = list(communities.values())

# 设定所需的社区数量
desired_communities = 9

# 合并社区直到达到所需的社区数量
while len(community_list) > desired_communities:
    # 找到两个最小的社区
    community_list.sort(key=len)
    smallest_community = community_list.pop(0)
    second_smallest_community = community_list.pop(0)
    
    # 合并这两个社区
    merged_community = smallest_community + second_smallest_community
    
    # 将合并后的社区添加回列表
    community_list.append(merged_community)
    
print(f'Number of communities: {len(community_list)}')

Number of communities: 9


In [15]:
# 保存合并后的社区结果
with open("community3.txt", "w") as file:
    file.write(f"Best Modularity: {best_modularity}\n")
    for community_id, community in enumerate(community_list):
        for node in community:
            file.write(f"{node} -- {community_id}\n")

print("Best community detection results saved.")


Best community detection results saved.
