In [1]:
import networkx as nx
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# 全局网络 global network
G = nx.read_gml("../Base_main.gml")
G.number_of_nodes()

30454

In [3]:
# 删除度数为0的节点 delete nodes whose degree is zero
degrees = G.degree()
remove_nodes = []
for node_id, degree in degrees:
    if degree == 0:
        remove_nodes.append(node_id)
G.remove_nodes_from(remove_nodes)

G.number_of_nodes()

28664

In [19]:
G.number_of_edges()

6012773

In [4]:
# lauvain划分  louvain partitioning
parti_louvain = nx.community.louvain_communities(G, seed=123)
len(parti_louvain)

10

重要的社区为 1 3 4 5 7 8 9 \
important communities

In [7]:
len_lst = [len(x) for x in parti_louvain]
print(len_lst)
n = sum(len_lst)
print([f"{x/n:.2f}" for x in len_lst])

[1635, 489, 7660, 1298, 3248, 119, 4845, 3791, 5577, 2]
['0.06', '0.02', '0.27', '0.05', '0.11', '0.00', '0.17', '0.13', '0.19', '0.00']


将社区划分加入到网络中 \
add community attribute

In [8]:
class_cnt = 1
for nodes_each_class in parti_louvain:
    for node in nodes_each_class:
        G.nodes[node]["community_louvain"] = class_cnt
    class_cnt += 1

In [12]:
nx.write_gml(G,"../Base_main_community.gml")

提取出Comment子图 \
fetch comment subgraph

In [13]:
G_commented = nx.read_gml("../Commented_main.gml")
G_commented.number_of_nodes()

30454

In [14]:
# 删除度数为0的节点  delete nodes whose degree is zero
degrees_cmt = G_commented.degree()
remove_nodes = []
for node_id, degree in degrees_cmt:
    if degree == 0:
        remove_nodes.append(node_id)
G_commented.remove_nodes_from(remove_nodes)

G_commented.number_of_nodes()

17468

In [15]:
# 评论网络添加社区 add community attribute
for node in G_commented.nodes:
    G_commented.nodes[node]["community_louvain"] = G.nodes[node]["community_louvain"]
G_commented.nodes['0']

{'ui': 'D000001', 'name': 'Calcimycin', 'community_louvain': 3}

In [16]:
nx.write_gml(G_commented,"../Commented_main_community.gml")

In [17]:
# All 各个社区最高加权度 highest weighted degree in each community
weighted_degrees = dict(G.degree(weight='weight'))

for idx, c in enumerate(parti_louvain):
    print(f"Community {idx+1}: ")
    nodes = {G.nodes[n]["name"]:weighted_degrees[n] for n in c}
    print(sorted(nodes.items(), key=lambda x: x[1], reverse=True)[:10])

Community 1: 
[('Diet', 107802), ('Body Weight', 49132), ('Nutritional Status', 48401), ('Feeding Behavior', 43190), ('Nutritional Physiological Phenomena', 35662), ('Growth', 30891), ('Infant, Premature', 30383), ('Food', 28580), ('Plants, Medicinal', 27505), ('Body Mass Index', 27374)]
Community 2: 
[('Electroencephalography', 52892), ('Attention', 46385), ('Reflex', 37109), ('Visual Perception', 33681), ('Behavior, Animal', 32514), ('Pattern Recognition, Visual', 30079), ('Memory', 29289), ('Nervous System Physiological Phenomena', 26270), ('Movement', 25827), ('Speech Perception', 24496)]
Community 3: 
[('Research', 791779), ('Pharmacology', 246567), ('Toxicology', 156843), ('Metabolism', 148845), ('Anti-Bacterial Agents', 96754), ('Drug Therapy', 90666), ('Kidney', 87673), ('Physiology', 84185), ('Urine', 82301), ('Blood', 81409)]
Community 4: 
[('Blood Pressure', 85328), ('Anesthesia', 79942), ('Electrocardiography', 74638), ('Heart', 68801), ('Respiration', 60277), ('Hemodynamic

In [18]:
# Commented 各个社区最高加权度 highest weighted degree in each community
weighted_degrees = dict(G_commented.degree(weight='weight'))

for idx, c in enumerate(parti_louvain):
    print(f"Community {idx+1}: ")
    nodes = {G_commented.nodes[n]["name"]:weighted_degrees[n] for n in c if G_commented.has_node(n)}
    print(sorted(nodes.items(), key=lambda x: x[1], reverse=True)[:10])

Community 1: 
[('Diet', 1975), ('Body Mass Index', 1571), ('Diabetes Mellitus, Type 2', 1538), ('Feeding Behavior', 1136), ('Infant, Premature', 1001), ('Bariatric Surgery', 956), ('Weight Loss', 955), ('Pregnancy Outcome', 921), ('Obesity, Morbid', 917), ('Dietary Supplements', 854)]
Community 2: 
[('Attention', 1003), ('Behavior, Animal', 662), ('Mental Recall', 589), ('Visual Perception', 556), ('Reading', 536), ('Psychomotor Performance', 507), ('Semantics', 501), ('Memory', 485), ('Pattern Recognition, Visual', 471), ('Concept Formation', 448)]
Community 3: 
[('Research', 1642), ('Blood Transfusion', 674), ('Vaccination', 595), ('Anti-Bacterial Agents', 509), ('Blood Donors', 456), ('Antineoplastic Agents', 384), ('Vaccines', 381), ('Antiviral Agents', 341), ('Hypnotics and Sedatives', 335), ('Fishes', 331)]
Community 4: 
[('Stents', 3453), ('Percutaneous Coronary Intervention', 2684), ('Heart Failure', 2536), ('Heart Valve Prosthesis', 2231), ('Atrial Fibrillation', 2081), ('Card

In [4]:
G_commented = nx.read_gml("../Commented_main_community_norm.gml")
G_commented.number_of_nodes()

17468

In [6]:
attribute_counts = {}

# 遍历所有节点 traver all nodes
for node, attributes in G_commented.nodes(data=True):
    # 获取节点的"community_louvain"属性值 get community attribute
    community_value = attributes.get("community_louvain")

    # 更新属性值计数 update attribute count
    if community_value in attribute_counts:
        attribute_counts[community_value] += 1
    else:
        attribute_counts[community_value] = 1

for attribute_value, count in attribute_counts.items():
    print(f"属性值 {attribute_value}: {count} 个节点")

属性值 3: 3356 个节点
属性值 5: 2015 个节点
属性值 8: 3240 个节点
属性值 9: 3996 个节点
属性值 7: 2610 个节点
属性值 1: 777 个节点
属性值 4: 1017 个节点
属性值 2: 391 个节点
属性值 6: 66 个节点


In [8]:
[i[1] for i in sorted(list(attribute_counts.items()), key=lambda x:x[0])]

[777, 391, 3356, 1017, 2015, 66, 2610, 3240, 3996]