In [2]:
# 导入库
import os
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
from itertools import combinations

# 定义输入和输出路径
input_folder = "E:/solid-state patent data/新建文件夹"
output_folder = "E:/solid-state patent data/新建文件夹"

# 1. 技术融合潜能评估
## 1.1 计算 Jaccard 系数
def calculate_jaccard_coefficients(community_patents):
    jaccard_matrix = {}
    for (comm_a, patents_a), (comm_b, patents_b) in combinations(community_patents.items(), 2):
        intersection = len(patents_a & patents_b)
        union = len(patents_a | patents_b)
        jaccard_coefficient = intersection / union if union > 0 else 0
        jaccard_matrix[(comm_a, comm_b)] = jaccard_coefficient
    return jaccard_matrix

## 1.2 计算平均聚类系数
def calculate_clustering_coefficients(G, community_patents):
    clustering_coefficients = {}
    for community_id, patents in community_patents.items():
        subgraph = G.subgraph(patents)
        clustering_coefficients[community_id] = nx.average_clustering(subgraph)
    return clustering_coefficients

# 2. 技术融合价值评估
## 构建评价指标
def calculate_fusion_value(jaccard_matrix, ipc_distances, community_centralities):
    fusion_value_data = []
    for (comm_a, comm_b), jaccard in jaccard_matrix.items():
        fusion_degree = jaccard
        ipc_distance = ipc_distances.get((comm_a, comm_b), None)
        centrality_a = community_centralities.get(comm_a, 0)
        centrality_b = community_centralities.get(comm_b, 0)
        centrality = (centrality_a + centrality_b) / 2
        fusion_value = fusion_degree * (1 / ipc_distance if ipc_distance else 0) * centrality
        fusion_value_data.append([comm_a, comm_b, fusion_degree, ipc_distance, centrality, fusion_value])
    return fusion_value_data

# 3. 可视化结果
## 3.1 Jaccard 系数热力图
def plot_jaccard_heatmap(jaccard_matrix, communities, output_path):
    jaccard_heatmap = pd.DataFrame(index=communities, columns=communities, dtype=float)
    for (comm_a, comm_b), value in jaccard_matrix.items():
        jaccard_heatmap.at[comm_a, comm_b] = value
        jaccard_heatmap.at[comm_b, comm_a] = value
    plt.figure(figsize=(10, 8))
    sns.heatmap(jaccard_heatmap, annot=False, cmap="coolwarm", cbar=True)
    plt.title("社群间 Jaccard 系数热力图")
    plt.savefig(output_path, dpi=300)
    plt.close()

## 3.2 技术融合价值散点图
def plot_fusion_value_scatterplot(fusion_value_df, output_path):
    plt.figure(figsize=(10, 8))
    scatter = plt.scatter(
        fusion_value_df["Fusion_Degree"],
        fusion_value_df["IPC_Distance"],
        s=fusion_value_df["Centrality"] * 1000,
        c=fusion_value_df["Fusion_Value"],
        cmap="coolwarm",
        alpha=0.7
    )
    plt.colorbar(scatter, label="Fusion Value")
    plt.xlabel("当前技术融合程度")
    plt.ylabel("IPC 技术距离")
    plt.title("技术融合价值评估散点图")
    plt.savefig(output_path, dpi=300)
    plt.close()

# 主程序逻辑
if __name__ == "__main__":
    # 加载社群划分结果和共现网络
    community_partition_path = os.path.join(input_folder, "community_partition.csv")
    co_occurrence_matrix_path = os.path.join(input_folder, "co_occurrence_matrix_filtered.csv")
    partition_df = pd.read_csv(community_partition_path)
    co_occurrence_matrix = pd.read_csv(co_occurrence_matrix_path, index_col=0)
    G = nx.from_pandas_adjacency(co_occurrence_matrix, create_using=nx.Graph())

    # 构建每个社群的专利集合
    community_patents = {}
    for community_id, group in partition_df.groupby("Community"):
        community_patents[community_id] = set(group["Node"])

    # 计算 Jaccard 系数
    jaccard_matrix = calculate_jaccard_coefficients(community_patents)
    jaccard_df = pd.DataFrame(
        [(k[0], k[1], v) for k, v in jaccard_matrix.items()],
        columns=["Community_A", "Community_B", "Jaccard_Coefficient"]
    )
    jaccard_df.to_csv(os.path.join(output_folder, "jaccard_coefficients.csv"), index=False)

    # 计算平均聚类系数
    clustering_coefficients = calculate_clustering_coefficients(G, community_patents)
    clustering_df = pd.DataFrame(
        list(clustering_coefficients.items()),
        columns=["Community", "Average_Clustering_Coefficient"]
    )
    clustering_df.to_csv(os.path.join(output_folder, "clustering_coefficients.csv"), index=False)

    # 计算 IPC 技术距离
    ipc_distances = {}
    for (comm_a, patents_a), (comm_b, patents_b) in combinations(community_patents.items(), 2):
        min_distance = float("inf")
        for node_a in patents_a:
            for node_b in patents_b:
                try:
                    distance = nx.shortest_path_length(G, source=node_a, target=node_b)
                    min_distance = min(min_distance, distance)
                except nx.NetworkXNoPath:
                    continue
        ipc_distances[(comm_a, comm_b)] = min_distance if min_distance < float("inf") else None

    # 计算技术中心性
    pagerank_scores = nx.pagerank(G)
    community_centralities = {}
    for community_id, patents in community_patents.items():
        centrality = np.mean([pagerank_scores[node] for node in patents if node in pagerank_scores])
        community_centralities[community_id] = centrality

    # 构建技术融合价值评估表
    fusion_value_data = calculate_fusion_value(jaccard_matrix, ipc_distances, community_centralities)
    fusion_value_df = pd.DataFrame(
        fusion_value_data,
        columns=["Community_A", "Community_B", "Fusion_Degree", "IPC_Distance", "Centrality", "Fusion_Value"]
    )
    fusion_value_df.to_csv(os.path.join(output_folder, "fusion_value_assessment.csv"), index=False)

    # 可视化结果
    communities = sorted(community_patents.keys())
    plot_jaccard_heatmap(jaccard_matrix, communities, os.path.join(output_folder, "jaccard_heatmap.png"))
    plot_fusion_value_scatterplot(fusion_value_df, os.path.join(output_folder, "fusion_value_scatterplot.png"))

    print("技术融合潜能和技术融合价值评估完成！")

ZeroDivisionError: division by zero