In [1]:
import pandas as pd
import networkx as nx

In [5]:
df= pd.read_csv('./df.csv',index_col=0)
df.head()

Unnamed: 0,A,B
0,H04N5,H04N7
1,G06F3,G02F1
2,G06F3,H03K17
3,G02F1,H03K17
4,,


In [7]:
# 检查 'A' 和 'B' 这两列是否有空值
df_cal = df.dropna(subset=['A', 'B'])


In [9]:
# 首先对数据进行分组，并聚合权重
grouped = df_cal.groupby(['A', 'B']).size().reset_index(name='weight')
grouped.head()

Unnamed: 0,A,B,weight
0,A61B6,G01T1,2
1,A61B6,G01T7,2
2,A61B6,G03B42,1
3,A61B6,G06T1,1
4,A61B6,H01L31,2


In [11]:
# 使用 DataFrame 构建网络图
G = nx.from_pandas_edgelist(grouped, 'A', 'B',edge_attr='weight')
print(G.edges(data = True))

[('A61B6', 'G01T1', {'weight': 2}), ('A61B6', 'G01T7', {'weight': 2}), ('A61B6', 'G03B42', {'weight': 1}), ('A61B6', 'G06T1', {'weight': 1}), ('A61B6', 'H01L31', {'weight': 2}), ('A61B6', 'H04L29', {'weight': 2}), ('A61B6', 'H04N5', {'weight': 2}), ('G01T1', 'G01T7', {'weight': 2}), ('G01T1', 'G03B42', {'weight': 1}), ('G01T1', 'H01L31', {'weight': 2}), ('G01T1', 'H04L29', {'weight': 2}), ('G01T1', 'H04N5', {'weight': 2}), ('G01T7', 'G03B42', {'weight': 1}), ('G01T7', 'H01L31', {'weight': 2}), ('G01T7', 'H04L29', {'weight': 2}), ('G01T7', 'H04N5', {'weight': 2}), ('G03B42', 'H01L31', {'weight': 2}), ('G03B42', 'H04L29', {'weight': 2}), ('G03B42', 'H04N5', {'weight': 2}), ('H01L31', 'H04L29', {'weight': 2}), ('H01L31', 'H04N5', {'weight': 2}), ('H01L31', 'H01L33', {'weight': 1}), ('H04L29', 'G06C1', {'weight': 1}), ('H04L29', 'G06F13', {'weight': 1}), ('H04L29', 'G06F17', {'weight': 1}), ('H04L29', 'G06F3', {'weight': 1}), ('H04L29', 'H04B1', {'weight': 1}), ('H04L29', 'H04L12', {'weigh

In [12]:

from tqdm import tqdm

def calculate_constraints_with_progress(G, nodes=None, weight='weight'):
    if nodes is None:
        nodes = list(G.nodes)
    constraint = {}

    # 包装nodes列表到tqdm进度条中
    for v in tqdm(nodes, desc="Calculating constraints"):
        # Constraint is not defined for isolated nodes
        if len(G[v]) == 0:
            constraint[v] = float("nan")
            continue
        constraint[v] = sum(
            nx.local_constraint(G, v, n, weight) for n in set(nx.all_neighbors(G, v))
        )
    return constraint


def degree_centrality_with_progress(G):
    if len(G) <= 1:
        return {n: 1 for n in G}

    s = 1.0 / (len(G) - 1.0)
    centrality = {}
    
    # 使用tqdm显示进度条
    for n, d in tqdm(G.degree(), desc="Calculating degree centrality"):
        centrality[n] = d * s

    return centrality
    

In [13]:
# 结构洞
print('****cal 结构洞 ********')
holes_dict= calculate_constraints_with_progress(G, nodes=None, weight='weight')

# 度数中心度
print('****cal degree_centrality ********')
degree_cen = degree_centrality_with_progress(G) 

****cal 结构洞 ********


Calculating constraints: 100%|██████████| 236/236 [00:00<00:00, 237.18it/s]


****cal degree_centrality ********


Calculating degree centrality: 100%|██████████| 236/236 [00:00<?, ?it/s]


In [14]:

# 将结果转换为 DataFrame
print('****将结果转换为 DataFrame ********')
holes_df = pd.DataFrame.from_dict(holes_dict, orient='index', columns=['Constraint'])
degree_cen_df = pd.DataFrame.from_dict(degree_cen, orient='index', columns=['Degree Centrality'])

print('****合并两个 DataFrame ********')
combined_df = pd.concat([holes_df, degree_cen_df], axis=1)

# 将结果保存为 Excel 文件
output_filepath = 'network_analysis_results_加权.xlsx'
combined_df.to_excel(output_filepath)

output_filepath

****将结果转换为 DataFrame ********
****合并两个 DataFrame ********


'network_analysis_results_加权.xlsx'