将所有节点划分社区，并将同一社区的连边权值加大

In [1]:
import json

# 读取 JSON 文件
with open("../output_data/sourceJson_All.json", "r", encoding="utf-8") as f:
    jsonData = json.load(f)  # 解析 JSON 文件

nodes=[node["id"] for node in jsonData["nodes"]]
links=[(link["source"],link["target"]) for link in jsonData["links"]]

print(nodes[-1])
print(links[-1])
print(len(nodes))
print(len(links))

Vials: Visualizing Alternative Splicing of Genes
('The Connected Scatterplot for Presenting Paired Time Series', 'A Visual Analytics Approach to Dynamic Network Exploration')
11585
28435


In [7]:
import networkx as nx
from networkx.algorithms.community import louvain_communities

G = nx.Graph()

# 添加节点
G.add_nodes_from(nodes)

# 添加边
G.add_edges_from(links)

print(len(G.nodes()))

# 运行 Louvain 算法
communities = louvain_communities(G)
print(len(communities))

11585
71


In [8]:
# 将社区划分到最精确
res=1   # 分辨率默认为1
commNum=len(communities)
preCommNum=-1
print(commNum)
while preCommNum<commNum:
    preCommNum=commNum
    res=res/2
    communities=louvain_communities(G,resolution=res)
    commNum=len(communities)
print(commNum)

71
54


In [9]:
nodeNum=0
for comm in communities:
    nodeNum+=len(comm)
print(nodeNum)
print(len(nodes))
# 验证社区内节点没有变

11585
11585


In [11]:
pop_communites=[]
# 合并小社区
def merge_small_communities(G, communities, max_communities):
    """合并较小的社区，直到满足最大社区数量要求"""
    while len(communities) > max_communities:
        # 计算每个社区的大小 从小到大
        communities = sorted(communities, key=len)
        smallest = communities.pop(0)  # 弹出最小的社区
        # 找到它的邻居社区
        neighbor_counts = {}
        # 取出最小社区的节点——找到该节点直连节点
        # 找到直连节点所在社区号，计算为最小社区到该社区一次
        for node in smallest:
            for neighbor in G.neighbors(node):
                for idx, comm in enumerate(communities):
                    if neighbor in comm:
                        neighbor_counts[idx] = neighbor_counts.get(idx, 0) + 1
        # 合并到最近的邻居社区
        if neighbor_counts:
            # 返回最大的键
            best_match = max(neighbor_counts, key=neighbor_counts.get)
            communities[best_match] = communities[best_match].union(smallest)
        else:
            # 保留舍弃的小社区
            pop_communites.append(smallest)


    return communities


max_communities = 10  # 期望的最大社区数
merged_communities = merge_small_communities(G, communities, max_communities)
print(f"合并后社区数: {len(merged_communities)}")

合并后社区数: 10


In [14]:
# 验证合并后节点数量没变
count=0
for comm in pop_communites:
    count += len(comm)
print(len(pop_communites))
for comm in merged_communities:
    count+=len(comm)
    print(len(comm))
print(count)

30
268
362
408
646
912
1020
1042
2303
2201
2221
11585


In [15]:
# 为所有节点编cluster号
node_cluster_dc={}
for idx,comm in enumerate(merged_communities):
    for node in comm:
        node_cluster_dc[node]=idx+1
for comm in pop_communites:
    for node in comm:
        node_cluster_dc[node]=0  

In [16]:
# 验证所有节点都已编号
print(len(node_cluster_dc))

11585


In [17]:
# 为json数据添加cluster和权值改变
jsonNodes=jsonData["nodes"]
jsonLinks=jsonData["links"]
print(len(jsonNodes))
print(len(jsonLinks))

with open("../output_data/sourceJson_Draw.json", "r", encoding="utf-8") as f:
    jsonData_Draw = json.load(f)  # 解析 JSON 文件
jsonNodes_Draw = jsonData_Draw["nodes"]
jsonLinks_Draw = jsonData_Draw["links"]
print(len(jsonNodes_Draw))
print(len(jsonLinks_Draw))


def updateNodes(nodes):
    idx=0
    for idx in range(len(nodes)):
        jnode=nodes[idx]
        nodes[idx]["cluster"] = node_cluster_dc[jnode["id"]]
    return nodes

def updateLinks(links):
    idx=0
    for idx in range(len(links)):
        jlink=links[idx]
        i=node_cluster_dc[jlink["source"]]
        j=node_cluster_dc[jlink["target"]]
        if i==j:
            links[idx]["value"]=10
    return links

updateNodes(jsonNodes)
updateNodes(jsonNodes_Draw)
updateLinks(jsonLinks)
updateLinks(jsonLinks_Draw)

print(jsonNodes[-1])
print(jsonNodes_Draw[-1])
print(jsonLinks[-1])
print(jsonLinks_Draw[-1])

11585
28435
2989
13567
{'id': 'Vials: Visualizing Alternative Splicing of Genes', 'group': 1, 'doi': '10.1109/TVCG.2015.2467911', 'ab': 'Alternative splicing is a process by which the same DNA sequence is used to assemble different proteins, called protein isoforms. Alternative splicing works by selectively omitting some of the coding regions (exons) typically associated with a gene. Detection of alternative splicing is difficult and uses a combination of advanced data acquisition methods and statistical inference. Knowledge about the abundance of isoforms is important for understanding both normal processes and diseases and to eventually improve treatment through targeted therapies. The data, however, is complex and current visualizations for isoforms are neither perceptually efficient nor scalable. To remedy this, we developed Vials, a novel visual analysis tool that enables analysts to explore the various datasets that scientists use to make judgments about isoforms: the abundance o

In [18]:
# 导出为 JSON 文件
jsonData["nodes"] = jsonNodes
jsonData["links"] = jsonLinks
jsonData_Draw["nodes"]=jsonNodes_Draw
jsonData_Draw["links"] = jsonLinks_Draw

with open("../output_data/jsonCluster_Draw2.json", "w", encoding="utf-8") as f:
    json.dump(jsonData_Draw, f, indent=4, ensure_ascii=False)
with open("../output_data/jsonCluster_All2.json", "w", encoding="utf-8") as f:
    json.dump(jsonData, f, indent=4, ensure_ascii=False)

In [19]:
# 验证输出
with open("../output_data/jsonCluster_Draw2.json", "r", encoding="utf-8") as f:
    jsonData = json.load(f)  
print(len(jsonData["nodes"]))
print(len(jsonData["links"]))
with open("../output_data/jsonCluster_All2.json", "r", encoding="utf-8") as f:
    jsonData = json.load(f)  
print(len(jsonData["nodes"]))
print(len(jsonData["links"]))

2989
13567
11585
28435
