0. 导入库

In [3]:
# 数据处理和分析相关库
import os
import pandas as pd
import numpy as np

# 网络分析相关库
import networkx as nx

# 可视化相关库
import matplotlib.pyplot as plt
import seaborn as sns
from itertools import combinations

# 其他工具库
from sklearn.metrics.pairwise import cosine_similarity
# 定义输入和输出路径
input_folder = "E:/solid-state patent data/新建文件夹"  # 输入文件夹路径
output_folder = "E:/solid-state patent data/新建文件夹"  # 输出文件夹路径

# 如果输出文件夹不存在，则创建
os.makedirs(output_folder, exist_ok=True)

1.1 计算社群间的 Jaccard 系数
Jaccard 系数用于衡量两个社群之间的相似度，定义为：

J(A,B)= 
∣A∪B∣
∣A∩B∣
​
 
其中 A 和 B 是两个社群的专利集合。

In [4]:
from itertools import combinations

# 加载社群划分结果
community_partition_path = os.path.join(input_folder, "community_partition.csv")
#确定是filtered吗？
partition_df = pd.read_csv(community_partition_path)

# 构建每个社群的专利集合
community_patents = {}
for community_id, group in partition_df.groupby("Community"):
    community_patents[community_id] = set(group["Node"])

# 计算社群间的 Jaccard 系数
jaccard_matrix = {}
for (comm_a, patents_a), (comm_b, patents_b) in combinations(community_patents.items(), 2):
    intersection = len(patents_a & patents_b)
    union = len(patents_a | patents_b)
    jaccard_coefficient = intersection / union if union > 0 else 0
    jaccard_matrix[(comm_a, comm_b)] = jaccard_coefficient

# 转换为 DataFrame 格式
jaccard_df = pd.DataFrame(
    [(k[0], k[1], v) for k, v in jaccard_matrix.items()],
    columns=["Community_A", "Community_B", "Jaccard_Coefficient"]
)
jaccard_df.to_csv(os.path.join(output_folder, "jaccard_coefficients.csv"), index=False)
print("Jaccard 系数已保存到 jaccard_coefficients.csv")

Jaccard 系数已保存到 jaccard_coefficients.csv


1.2 计算社群的平均聚类系数
使用 NetworkX 计算每个社群的平均聚类系数，衡量社群内部的紧密程度。

In [6]:
# 计算每个社群的平均聚类系数
clustering_coefficients = {}
for community_id, patents in community_patents.items():
    subgraph = G.subgraph(patents)
    
    # 检查子图是否满足条件
    if subgraph.number_of_nodes() > 1 and subgraph.number_of_edges() > 0:
        try:
            clustering_coefficients[community_id] = nx.average_clustering(subgraph)
        except ZeroDivisionError:
            clustering_coefficients[community_id] = 0  # 如果计算失败，设为 0
    else:
        clustering_coefficients[community_id] = 0  # 如果子图过于稀疏，设为 0

# 转换为 DataFrame 格式
clustering_df = pd.DataFrame(
    list(clustering_coefficients.items()),
    columns=["Community", "Average_Clustering_Coefficient"]
)

# 打印结果
print("平均聚类系数:")
print(clustering_df)

平均聚类系数:
   Community  Average_Clustering_Coefficient
0          0                        0.900000
1          1                        0.000000
2          2                        0.000000
3          3                        0.000000
4          4                        0.433333
5          5                        0.000000
6          6                        0.000000
7          7                        0.000000
8          8                        0.000000
9          9                        0.000000


2. 技术融合价值评估
2.1 构建评价指标
从以下三个方面构建技术融合价值评估指标：

当前技术融合程度 ：使用 Jaccard 系数表示。
IPC 技术距离 ：通过计算两个社群的中心节点之间的最短路径长度。
技术中心性 ：使用社群中所有节点的 PageRank 值的平均值。

In [7]:
# 计算 IPC 技术距离
ipc_distances = {}
for (comm_a, patents_a), (comm_b, patents_b) in combinations(community_patents.items(), 2):
    min_distance = float("inf")
    for node_a in patents_a:
        for node_b in patents_b:
            try:
                distance = nx.shortest_path_length(G, source=node_a, target=node_b)
                min_distance = min(min_distance, distance)
            except nx.NetworkXNoPath:
                continue
    ipc_distances[(comm_a, comm_b)] = min_distance if min_distance < float("inf") else None

# 计算技术中心性
pagerank_scores = nx.pagerank(G)
community_centralities = {}
for community_id, patents in community_patents.items():
    centrality = np.mean([pagerank_scores[node] for node in patents if node in pagerank_scores])
    community_centralities[community_id] = centrality

# 构建技术融合价值评估表
fusion_value_data = []
for (comm_a, comm_b), jaccard in jaccard_matrix.items():
    fusion_degree = jaccard
    ipc_distance = ipc_distances.get((comm_a, comm_b), None)
    centrality_a = community_centralities.get(comm_a, 0)
    centrality_b = community_centralities.get(comm_b, 0)
    centrality = (centrality_a + centrality_b) / 2
    fusion_value = fusion_degree * (1 / ipc_distance if ipc_distance else 0) * centrality
    fusion_value_data.append([comm_a, comm_b, fusion_degree, ipc_distance, centrality, fusion_value])

fusion_value_df = pd.DataFrame(
    fusion_value_data,
    columns=["Community_A", "Community_B", "Fusion_Degree", "IPC_Distance", "Centrality", "Fusion_Value"]
)
fusion_value_df.to_csv(os.path.join(output_folder, "fusion_value_assessment.csv"), index=False)
print("技术融合价值评估结果已保存到 fusion_value_assessment.csv")

NodeNotFound: Either source C08J or target C25D is not in G

3. 可视化结果
3.1 社群间 Jaccard 系数热力图

In [None]:
# 绘制散点图
plt.figure(figsize=(10, 8))
scatter = plt.scatter(
    fusion_value_df["Fusion_Degree"],
    fusion_value_df["IPC_Distance"],
    s=fusion_value_df["Centrality"] * 1000,  # 气泡大小
    c=fusion_value_df["Fusion_Value"],       # 颜色表示融合价值
    cmap="coolwarm",
    alpha=0.7
)
plt.colorbar(scatter, label="Fusion Value")
plt.xlabel("当前技术融合程度")
plt.ylabel("IPC 技术距离")
plt.title("技术融合价值评估散点图")
plt.savefig(os.path.join(output_folder, "fusion_value_scatterplot.png"), dpi=300)
plt.close()
print("技术融合价值散点图已保存到 fusion_value_scatterplot.png")

  plt.savefig(os.path.join(output_folder, "fusion_value_scatterplot.png"), dpi=300)
  plt.savefig(os.path.join(output_folder, "fusion_value_scatterplot.png"), dpi=300)
  plt.savefig(os.path.join(output_folder, "fusion_value_scatterplot.png"), dpi=300)
  plt.savefig(os.path.join(output_folder, "fusion_value_scatterplot.png"), dpi=300)
  plt.savefig(os.path.join(output_folder, "fusion_value_scatterplot.png"), dpi=300)
  plt.savefig(os.path.join(output_folder, "fusion_value_scatterplot.png"), dpi=300)
  plt.savefig(os.path.join(output_folder, "fusion_value_scatterplot.png"), dpi=300)
  plt.savefig(os.path.join(output_folder, "fusion_value_scatterplot.png"), dpi=300)
  plt.savefig(os.path.join(output_folder, "fusion_value_scatterplot.png"), dpi=300)
  plt.savefig(os.path.join(output_folder, "fusion_value_scatterplot.png"), dpi=300)
  plt.savefig(os.path.join(output_folder, "fusion_value_scatterplot.png"), dpi=300)
  plt.savefig(os.path.join(output_folder, "fusion_value_scatterplot.png"), d

技术融合价值散点图已保存到 fusion_value_scatterplot.png


3.2 技术融合价值散点图

In [None]:
# 绘制散点图
plt.figure(figsize=(10, 8))
scatter = plt.scatter(
    fusion_value_df["Fusion_Degree"],
    fusion_value_df["IPC_Distance"],
    s=fusion_value_df["Centrality"] * 1000,  # 气泡大小
    c=fusion_value_df["Fusion_Value"],       # 颜色表示融合价值
    cmap="coolwarm",
    alpha=0.7
)
plt.colorbar(scatter, label="Fusion Value")
plt.xlabel("当前技术融合程度")
plt.ylabel("IPC 技术距离")
plt.title("技术融合价值评估散点图")
plt.savefig(os.path.join(output_folder, "fusion_value_scatterplot.png"), dpi=300)
plt.close()
print("技术融合价值散点图已保存到 fusion_value_scatterplot.png")

  plt.savefig(os.path.join(output_folder, "fusion_value_scatterplot.png"), dpi=300)
  plt.savefig(os.path.join(output_folder, "fusion_value_scatterplot.png"), dpi=300)
  plt.savefig(os.path.join(output_folder, "fusion_value_scatterplot.png"), dpi=300)
  plt.savefig(os.path.join(output_folder, "fusion_value_scatterplot.png"), dpi=300)
  plt.savefig(os.path.join(output_folder, "fusion_value_scatterplot.png"), dpi=300)
  plt.savefig(os.path.join(output_folder, "fusion_value_scatterplot.png"), dpi=300)
  plt.savefig(os.path.join(output_folder, "fusion_value_scatterplot.png"), dpi=300)
  plt.savefig(os.path.join(output_folder, "fusion_value_scatterplot.png"), dpi=300)
  plt.savefig(os.path.join(output_folder, "fusion_value_scatterplot.png"), dpi=300)
  plt.savefig(os.path.join(output_folder, "fusion_value_scatterplot.png"), dpi=300)
  plt.savefig(os.path.join(output_folder, "fusion_value_scatterplot.png"), dpi=300)
  plt.savefig(os.path.join(output_folder, "fusion_value_scatterplot.png"), d

技术融合价值散点图已保存到 fusion_value_scatterplot.png
