In [1]:
import os
os.chdir('/home/a001/Documents/ZhengHaoyu/python/1_important/OCMN')

import glob
import networkx as nx
from typing import Dict, List, Tuple, Set
import time
import copy
from tqdm import tqdm
import pandas as pd

import config
from utils.utils import save_network, setup_logger, create_output_file
from matching import Matching, MultiMatching

logger = setup_logger(__name__)

In [2]:
def analyze_network_stats(network_dir: str) -> None:
    """分析网络的基本统计信息，包括层数、节点数和边数"""
    # 获取网络名称
    network_name = os.path.basename(network_dir)
    print(f"分析网络: {network_name}")
    
    # 查找layers文件
    layers_file = glob.glob(os.path.join(network_dir, "Dataset", "*_layers.txt"))[0]
    edges_file = glob.glob(os.path.join(network_dir, "Dataset", "*_multiplex.edges"))[0]
    
    # 读取层信息
    layers = {}
    with open(layers_file, 'r', encoding='utf-8') as f:
        next(f)  # 跳过headers行
        for line in f:
            layer_id, layer_name = line.strip().split()
            layers[layer_id] = {'name': layer_name, 'nodes': set(), 'edges': 0}
    
    # 读取边信息
    total_nodes = set()
    with open(edges_file, 'r', encoding='utf-8') as f:
        for line in f:
            layer_id, node1, node2, weight = line.strip().split()
            layers[layer_id]['nodes'].add(node1)
            layers[layer_id]['nodes'].add(node2)
            layers[layer_id]['edges'] += 1
            total_nodes.add(node1)
            total_nodes.add(node2)
    
    # 输出统计信息
    print(f"网络层数: {len(layers)}")
    print(f"总节点数: {len(total_nodes)}")
    total_edges = sum(layer['edges'] for layer in layers.values())
    print(f"总边数: {total_edges}")
    print("各层统计信息:")
    print("层ID\t层名称\t\t节点数\t边数")
    for layer_id, info in layers.items():
        print(f"{layer_id}\t{info['name']}\t\t{len(info['nodes'])}\t{info['edges']}")
    print("\n")


for network_dir in sorted(glob.glob(os.path.join(config.REAL_NET_PATH, "*"))):
    if os.path.isdir(network_dir):
        try:
            analyze_network_stats(network_dir)
        except Exception as e:
            logger.error(f"分析网络 {os.path.basename(network_dir)} 统计信息时出错: {str(e)}")

分析网络: Arabidopsis
网络层数: 7
总节点数: 6980
总边数: 18655
各层统计信息:
层ID	层名称		节点数	边数
1	direct_interaction		5493	13857
2	physical_association		2859	4411
3	additive_genetic_interaction_defined_by_inequality		47	64
4	suppressive_genetic_interaction_defined_by_inequality		78	86
5	synthetic_genetic_interaction_defined_by_inequality		18	14
6	association		83	74
7	colocalization		187	149


分析网络: Cannes
网络层数: 3
总节点数: 438537
总边数: 991854
各层统计信息:
层ID	层名称		节点数	边数
1	RT		340349	496982
2	MT		233735	411338
3	RE		85867	83534


分析网络: Celegans
网络层数: 6
总节点数: 3879
总边数: 8182
各层统计信息:
层ID	层名称		节点数	边数
1	direct_interaction		3126	5557
2	physical_association		239	313
3	additive_genetic_interaction_defined_by_inequality		1046	2128
4	suppressive_genetic_interaction_defined_by_inequality		120	166
5	association		12	7
6	colocalization		14	11


分析网络: CelegansConnectome
网络层数: 3
总节点数: 279
总边数: 5863
各层统计信息:
层ID	层名称		节点数	边数
1	ElectrJ		253	1031
2	MonoSyn		260	1639
3	PolySyn		278	3193


分析网络: Drosophila
网络层数: 7
总节点数: 8215
总边数: 43367
各层统计信

In [None]:
# “Structural reducibility of multilayer networks”
# M. De Domenico, V. Nicosia, A. Arenas, and V. Latora
# Nature Communications 2015 6, 6864

NETWORKS_TO_PROCESS = {
    # 生物
    # Ref: C. Stark, B. -J. Breitkreutz, T. Reguly, L. Boucher, A. Breitkreutz, and M. Tyers. - "Biogrid: a general repository for interaction datasets" - Nucleic Acids Research 2006 34 (1) D535–D539
    "Arabidopsis": [["direct_interaction", "physical_association"]],
    "Celegans": [["direct_interaction", "physical_association"]],
    "Drosophila": [["direct_interaction", "physical_association"]],
    "HumanHIV1": [["direct_interaction", "physical_association"]],
    "SacchPomb": [["direct_interaction", "physical_association"]],
    "Rattus": [["direct_interaction", "physical_association"]],
    # Ref: Beth L. Chen, David H. Hall, and Dmitri B. Chklovskii - "Wiring optimization can relate neuronal structure and function" - PNAS 2006 103 (12) 4723–4728
    "CelegansConnectome": [["ElectrJ", "MonoSyn"]],
    # Ref: M. Costanzo et al. - "The Genetic Landscape of a Cell" - Science 2010 327 (5964) 425-431
    "YeastLandscape": [["positive_interactions", "negative_interactions"]],
    # 社交
    # Ref: E. Omodei, M. De Domenico, A. Arenas. - Characterizing interactions in online social networks during exceptional events.. Front. Phys. 3, 59 (2015)
    "Cannes": [["RT", "MT"]],
    "MLKing": [["RT", "MT"]],
    "MoscowAthletics": [["RT", "MT"]],
    "NYClimate": [["RT", "MT"]],
    # Ref: M. De Domenico, E. G. Altmann. - Unraveling the Origin of Social Bursts in Collective Attention.. Scientific Reports 10, 4629 (2020)
    "NBAFinals": [["RT", "MT"]],
    "Sanremo": [["RT", "MT"]],
    "UCLFinal": [["RT", "MT"]],
    "GravitationalWaves": [["RT", "MT"]],
    # 人际关系
    # Ref: D. Krackhardt - "Cognitive social structures". Social Networks (1987), 9, 104-134
    "KrackhardtHighTech": [
        ["friendship", "advice"],
        ["friendship", "Reports_to"],
    ],
    # Ref: Emmanuel Lazega - "The Collegial Phenomenon: The Social Mechanisms of Cooperation Among Peers in a Corporate Law Partnership". Oxford University Press (2001)
    "LazegaLawFirm": [
        ["friendship", "advice"],
        ["friendship", "co-work"],
    ],
    # Ref: J. Coleman, E. Katz, and H. Menzel.- "The Diffusion of an Innovation Among Physicians". Sociometry (1957) 20:253-270.
    "PhysiciansInnovation": [
        ["friendship", "advice"],
        ["friendship", "discussion"],
    ],
    # "VickersChan7thGraders": [
    #     ["best_friends", "get_on_with"],
    #     ["best_friends", "work_with"]
    # ],
    # "KapfererTailorShop": [
    #     ["TS1", "TS2"], 
    #     ["TI1", "TI2"],
    #     ["TS1", "TI1"],
    #     ["TS2", "TI2"],
    # ],
    # 交通
    # "EUAirMultiplexTransport": [
    #     # german
    #     ["Lufthansa", "Air_Berlin"],
    #     # uk
    #     ["Easyjet", "British_Airways"],
    #     # 爱尔兰
    #     ["Ryanair", "Air_Lingus"],
    #     # Spain
    #     ["Iberia", "Vueling_Airlines"],
    #     # 荷兰
    #     ["KLM", "Transavia_Holland"],
    #     # 比利时
    #     ["TNT_Airways", "European_Air_Transport"],
    # ],
    # "LondonTransport": [
    #     ["Tube", "Overground"],
    # ],
}

UNDIRECTED_NETWORKS = [
    "EUAirMultiplexTransport",
    "LondonTransport",
]

USE_APPROXIMATE_ALGORITHM = [
]

HUGE_NETWORKS = [
    "Cannes",
    "GravitationalWaves",
    "MLKing",
    "MoscowAthletics",
    "NBAFinals",
    "NYClimate",
    "Sanremo",
    "UCLFinal",
]

MAX_SEARCH_DEPTH = 3

In [None]:
def read_layers_info(network_dir: str) -> Dict[str, str]:
    """读取网络层的信息，返回层ID到层名称的映射"""
    layers_file = glob.glob(os.path.join(network_dir, "Dataset", "*_layers.txt"))[0]
    layers = {}
    with open(layers_file, 'r', encoding='utf-8') as f:
        next(f)  # 跳过headers行
        for line in f:
            layer_id, layer_name = line.strip().split()
            layers[layer_id] = layer_name
    return layers

def get_layer_nodes(network_dir: str, layer_ids: List[str]) -> Set[int]:
    """获取指定层中出现的所有节点"""
    edges_file = glob.glob(os.path.join(network_dir, "Dataset", "*_multiplex.edges"))[0]
    nodes = set()
    
    node_1 = set()
    node_2 = set()
    with open(edges_file, 'r', encoding='utf-8') as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) != 4:
                continue
            
            lid, node1, node2, _ = parts
            if lid in layer_ids:
                # 节点编号加1（因为原始数据从0开始）
                nodes.add(int(node1) + 1)
                nodes.add(int(node2) + 1)
                if lid == layer_ids[0]:
                    node_1.add(int(node1) + 1)
                    node_1.add(int(node2) + 1)
                else:
                    node_2.add(int(node1) + 1)
                    node_2.add(int(node2) + 1)
    
    return nodes, node_1, node_2

def create_layer_graph(network_dir: str, layer_id: str, all_nodes: Set[int]) -> nx.DiGraph:
    """为指定的层创建有向图"""
    edges_file = glob.glob(os.path.join(network_dir, "Dataset", "*_multiplex.edges"))[0]
    graph = nx.DiGraph()
    
    # 添加所有节点
    for node in all_nodes:
        graph.add_node(node)
    
    edge_count = 0
    # 读取指定层的边
    with open(edges_file, 'r', encoding='utf-8') as f:
        for line_num, line in enumerate(f, 0):
            try:
                parts = line.strip().split()
                if len(parts) != 4:
                    logger.warning(f"第 {line_num} 行格式错误: {line.strip()}")
                    continue
                    
                lid, node1, node2, _ = parts
                if lid == layer_id:
                    try:
                        if os.path.basename(network_dir) in UNDIRECTED_NETWORKS:
                            graph.add_edge(int(node1) + 1, int(node2) + 1)
                            graph.add_edge(int(node2) + 1, int(node1) + 1)
                        else:
                            graph.add_edge(int(node1) + 1, int(node2) + 1)
                        edge_count += 1
                    except Exception as e:
                        logger.error(f"添加边 ({node1}, {node2}) 时出错: {str(e)}")
            except Exception as e:
                logger.error(f"处理第 {line_num} 行时出错: {line.strip()}, 错误信息: {str(e)}")
    
    return graph

def save_graph_to_file(graph: nx.DiGraph, net_name: str, layer_name: str):
    """将图保存为边列表格式"""
    filename = os.path.join(config.TEST_NET_PATH, f"{net_name}_{layer_name}.txt")
    save_network(graph, filename)

def process_network_layers(net_name: str, layer_names: List[str], output_file_name: str):
    """处理指定网络的两个层，返回intersection和union的变化"""
    network_dir = os.path.join(config.REAL_NET_PATH, net_name)
    
    # 获取层ID到层名称的映射
    layers_info = read_layers_info(network_dir)
    
    # 找到指定层名称对应的层ID
    layer_ids = []
    for lid, lname in layers_info.items():
        if lname in layer_names:
            layer_ids.append(lid)

    # 打印层ids
    if len(layer_ids) != 2:
        raise ValueError(f"在网络 {net_name} 中未找到指定的两个层: {layer_names}")
    logger.debug(f"在网络 {net_name} 中的层ID： {layer_ids}")
    
    # 获取两层中的所有节点
    all_nodes, node_1, node_2 = get_layer_nodes(network_dir, layer_ids)
    logger.debug(f"两层中共有 {len(all_nodes)} 个节点")
    
    # 为每一层创建图
    graphs = []
    for i, lid in enumerate(layer_ids):
        if os.path.basename(network_dir) in UNDIRECTED_NETWORKS:
            logger.debug(f"网络 {os.path.basename(network_dir)} 是无向网络")
        
        graph = create_layer_graph(network_dir, lid, all_nodes)
        # 保存图到文件
        save_graph_to_file(graph, net_name, layer_names[i])
        graphs.append(graph)

    # 打印图的信息
    for i, g in enumerate(graphs):
        logger.debug(f"图 {i} 的信息：节点数：{g.number_of_nodes()}, 边数：{g.number_of_edges()}")
    
    # 创建Matching对象
    matchings = []
    for graph in graphs:
        matching = Matching(graph)
        matching.HK_algorithm()
        matching.find_all_alternating_reachable_set()
        matchings.append(matching)
    
    multi_matching = MultiMatching(matchings)
    baseline_matching = copy.deepcopy(multi_matching)

    start_time = time.time()
    print(f"开始MOUI: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))}")
    if os.path.basename(network_dir) in USE_APPROXIMATE_ALGORITHM or os.path.basename(network_dir) in HUGE_NETWORKS:
        pre_diff_mds_1_size, pre_diff_mds_2_size, pre_union_size, union_size, average_depth = multi_matching.MOUI(max_clap_length=MAX_SEARCH_DEPTH)
    else:
        pre_diff_mds_1_size, pre_diff_mds_2_size, pre_union_size, union_size, average_depth = multi_matching.MOUI()
    end_time = time.time()
    print("MOUI结束")
    time_2 = end_time - start_time
    
    if os.path.basename(network_dir) not in HUGE_NETWORKS:
        start_time = time.time()
        print(f"开始RRMU: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))}")
        min_union_size = baseline_matching.RRMU()
        end_time = time.time()
        print("RRMU结束")
        time_1 = end_time - start_time
    else:
        min_union_size = None
        time_1 = None

    with open(output_file_name, "a", encoding="utf-8") as output_file:
        output_file.write(",".join([
            f"{net_name}", str(layer_names[0]), str(layer_names[1]), 
            str(len(all_nodes)), str(len(node_1)), str(len(node_2)), 
            str(len(graphs[0].edges)), str(len(graphs[1].edges)), 
            str(2 * (len(graphs[0].edges) + len(graphs[1].edges)) / len(all_nodes)), 
            str(2 * len(graphs[0].edges) / len(node_1)), str(2 * len(graphs[1].edges) / len(node_2)), 
            str(len(matchings[0].driver_nodes)), str(len(matchings[1].driver_nodes)),
            str(pre_diff_mds_1_size), str(pre_diff_mds_2_size), 
            str(pre_union_size), str(min_union_size), str(union_size), str(average_depth),
            str(time_1), str(time_2)
        ]) + "\n")

    # N, N_1, N_2, <k>, <k_1>, <k_2>
    return str(len(all_nodes)), str(len(node_1)), str(len(node_2)), str(2 * (len(graphs[0].edges) + len(graphs[1].edges)) / len(all_nodes)), str(2 * len(graphs[0].edges) / len(node_1)), str(2 * len(graphs[1].edges) / len(node_2))

def real_networks(
    result_columns=["network_name", "layer_name_1", "layer_name_2", "N", "N_1", "N_2", "E_1", "E_2", "<k>", "<k_1>", "<k_2>", "MDS_1", "MDS_2", "Diff_MDS_1", "Diff_MDS_2", "UMDS_0", "UMDS_1", "UMDS_2", "average_depth", "time_1", "time_2"]
):
    info_df = pd.DataFrame()
    output_file_name = create_output_file(result_columns, "real_networks")
    for net_name, layer_pairs in NETWORKS_TO_PROCESS.items():
        for layer_names in layer_pairs:
            if len(layer_pairs) > 1:
                net_abbr = f"{net_name}-{layer_names[0][0].lower()}&{layer_names[1][0].lower()}"
            else:
                net_abbr = net_name
            print(f"处理网络: {net_name}")
            print(f"层: {layer_names[0]} - {layer_names[1]}")
            N, N_1, N_2, k, k_1, k_2 = process_network_layers(net_name, layer_names, output_file_name)
            info_df = pd.concat([info_df, pd.DataFrame({
                "network_name": [net_abbr],
                "layer_name_1": [layer_names[0]],
                "layer_name_2": [layer_names[1]],
                "N": [N],
                "N_1": [N_1],
                "N_2": [N_2],
                "<k>": [round(float(k), 2)],
                "<k_1>": [round(float(k_1), 2)],
                "<k_2>": [round(float(k_2), 2)]
            })], ignore_index=True)
            print()
        print()
    return info_df

In [25]:
real_networks()

处理网络: Arabidopsis
层: direct_interaction - physical_association
开始MOUI: 2025-07-09 10:09:05
MOUI结束
开始RRMU: 2025-07-09 10:09:05
RRMU结束


处理网络: Celegans
层: direct_interaction - physical_association
开始MOUI: 2025-07-09 10:09:10
MOUI结束
开始RRMU: 2025-07-09 10:09:10
RRMU结束


处理网络: Drosophila
层: direct_interaction - physical_association
开始MOUI: 2025-07-09 10:09:14
MOUI结束
开始RRMU: 2025-07-09 10:09:16
RRMU结束


处理网络: HumanHIV1
层: direct_interaction - physical_association
开始MOUI: 2025-07-09 10:09:23
MOUI结束
开始RRMU: 2025-07-09 10:09:23
RRMU结束


处理网络: SacchPomb
层: direct_interaction - physical_association
开始MOUI: 2025-07-09 10:09:24
MOUI结束
开始RRMU: 2025-07-09 10:09:24
RRMU结束


处理网络: Rattus
层: direct_interaction - physical_association
开始MOUI: 2025-07-09 10:09:26
MOUI结束
开始RRMU: 2025-07-09 10:09:26
RRMU结束


处理网络: CelegansConnectome
层: ElectrJ - MonoSyn
开始MOUI: 2025-07-09 10:09:27
MOUI结束
开始RRMU: 2025-07-09 10:09:28
RRMU结束


处理网络: YeastLandscape
层: positive_interactions - negative_interactions
开始MOUI: 2025-07



MOUI结束


处理网络: MLKing
层: RT - MT
开始MOUI: 2025-07-09 10:46:29
MOUI结束


处理网络: MoscowAthletics
层: RT - MT
开始MOUI: 2025-07-09 10:53:10




MOUI结束


处理网络: NYClimate
层: RT - MT
开始MOUI: 2025-07-09 10:54:15
MOUI结束


处理网络: NBAFinals
层: RT - MT
开始MOUI: 2025-07-09 10:56:32




MOUI结束


处理网络: Sanremo
层: RT - MT
开始MOUI: 2025-07-09 11:35:36




MOUI结束


处理网络: UCLFinal
层: RT - MT
开始MOUI: 2025-07-09 11:36:59




MOUI结束


处理网络: GravitationalWaves
层: RT - MT
开始MOUI: 2025-07-09 12:08:44
MOUI结束


处理网络: KrackhardtHighTech
层: friendship - advice
开始MOUI: 2025-07-09 12:17:54
MOUI结束
开始RRMU: 2025-07-09 12:17:54
RRMU结束

处理网络: KrackhardtHighTech
层: friendship - Reports_to
开始MOUI: 2025-07-09 12:17:54
MOUI结束
开始RRMU: 2025-07-09 12:17:54
RRMU结束


处理网络: LazegaLawFirm
层: friendship - advice
开始MOUI: 2025-07-09 12:17:54
MOUI结束
开始RRMU: 2025-07-09 12:17:54
RRMU结束

处理网络: LazegaLawFirm
层: friendship - co-work
开始MOUI: 2025-07-09 12:17:55
MOUI结束
开始RRMU: 2025-07-09 12:17:55
RRMU结束


处理网络: PhysiciansInnovation
层: friendship - advice
开始MOUI: 2025-07-09 12:17:55
MOUI结束
开始RRMU: 2025-07-09 12:17:55
RRMU结束

处理网络: PhysiciansInnovation
层: friendship - discussion
开始MOUI: 2025-07-09 12:17:55
MOUI结束
开始RRMU: 2025-07-09 12:17:55
RRMU结束




Unnamed: 0,network_name,layer_name_1,layer_name_2,N,N_1,N_2,<k>,<k_1>,<k_2>
0,Arabidopsis,direct_interaction,physical_association,6903,5493,2859,5.29,5.05,3.09
1,Celegans,direct_interaction,physical_association,3191,3126,239,3.68,3.56,2.62
2,Drosophila,direct_interaction,physical_association,8060,7356,2851,9.2,6.55,9.11
3,HumanHIV1,direct_interaction,physical_association,994,758,380,2.62,2.29,2.28
4,SacchPomb,direct_interaction,physical_association,2622,971,2402,7.01,3.47,6.25
5,Rattus,direct_interaction,physical_association,2593,2035,1017,3.17,2.96,2.15
6,CelegansConnectome,ElectrJ,MonoSyn,275,253,260,19.42,8.15,12.61
7,YeastLandscape,positive_interactions,negative_interactions,4455,4422,4432,85.89,30.41,55.99
8,Cannes,RT,MT,438513,340349,233735,4.14,2.92,3.52
9,MLKing,RT,MT,327660,288738,79070,2.31,2.02,2.2
