In [None]:
import heapq
import csv
import re
import time
import cProfile
from collections import defaultdict
import networkx as nx
import itertools
import pandas as pd

def neighborhood_lower_bound(graph):
    nodes = list(graph.nodes())
    n = len(nodes)
    lower_bounds = {}

    k = 2
    max_iterations = 100000  # 设置最大迭代次数

    Y = {}
    S_un = {}
    nVisited = {}
    finished = {}

    for s in nodes:
        degree_s = graph.degree(s)
        Y[(1, s)] = degree_s
        S_un[s] = degree_s
        nVisited[s] = degree_s + 1
        finished[s] = False

    nFinished = 0

    while nFinished < n and k <= max_iterations:
        for s in nodes:
            if finished[s]:
                continue
            if k == 2:
                Y[(k, s)] = sum(Y.get((k-1, w), 0) for w in graph.neighbors(s)) - graph.degree(s)
            else:
                Y[(k, s)] = sum(Y.get((k-1, w), 0) for w in graph.neighbors(s)) - Y.get((k-2, s), 0) * (graph.degree(s) - 1)
        
        for s in nodes:
            if finished[s]:
                continue
            y_k_minus_2 = Y.get((k-2, s), 0)
            y_k_minus_1 = Y.get((k-1, s), 0)
            nVisited[s] += y_k_minus_1
            
            if nVisited[s] < n:
                S_un[s] += k * y_k_minus_1
            else:
                S_un[s] += k *(n-(nVisited[s] - y_k_minus_1))
                nFinished += 1
                finished[s] = True
            Y[(k-2, s)] = y_k_minus_1
            Y[(k-1, s)] = Y[(k, s)]
        
        k += 1

    for v in nodes:
        lower_bounds[v] = S_un[v] / (n - 1)

    return lower_bounds

def updateBoundsBFSCut(v, graph, x):
    """
    计算将顶点 v 分隔开的割集大小的下界。

    参数：
        v: 起始顶点。
        graph: networkx DiGraph。节点必须具有 'r' 属性。
        Farn: 存储下界的字典（必须初始化）。
        Top: top k 节点的列表（必须初始化）。
        x: 一个阈值。

    返回：
        如果割集值超过 x，则返回 +∞；否则返回计算出的割集值，或者如果无变化则返回当前 Farn[v]。
    """
    
    nodes = list(graph.nodes())
    n = len(nodes)
    Q = [(0,v)] #优先级队列，用于跟踪 BFS 的 (距离, 节点)
    heapq.heapify(Q)
    visited = {v}
    d = 0
    S = 0
    y = graph.degree(v) - 1
    nd = 1

    while Q:
        dist, u = heapq.heappop(Q)
        if dist > d:
            d += 1
            #LCUT 计算
            
            LCUT = ((d+2)*(n-nd) + S - y )/(n-1)
        

            if LCUT >= x:
                return float('inf')
            y = 0  # 重置 y

        for w in graph.neighbors(u):
            if w not in visited:
                visited.add(w)
                dist_vw = nx.shortest_path_length(graph,v,w) #假设存在最短路径
                heapq.heappush(Q,(dist_vw,w))
                S += dist_vw  # 距离 d(v,w)
                y += graph.degree(w)
                nd += 1
            else:
                LCUT = LCUT + 1/(n-1)

    #最终计算
    LCUT_final = S / (n-1)

    
    return LCUT_final


def create_mention_graph_with_centrality(filepath, top_k=10):
    start_time = time.time()

    data = pd.read_csv(filepath, sep="\t", header=None)
    directed_graph = nx.DiGraph()
    directed_graph.add_edges_from(data.values.tolist())
    graph = directed_graph.to_undirected()

    print(f"Undirected graph created with {graph.number_of_nodes()} nodes and {graph.number_of_edges()} edges.")
    print("Calculating initial lower bounds...")

    initial_lower_bounds = neighborhood_lower_bound(graph)
    
    k_lowest_farness = heapq.nsmallest(top_k, initial_lower_bounds.items(), key=lambda item: item[1])
    print(f"\n{top_k} nodes with lowest farness values:")
    for node, farness in k_lowest_farness:
        print(f"Node: {node}, Farness: {farness}")
    
    Farn = initial_lower_bounds.copy()
    Top = []
    Q = [(Farn[node], node) for node in graph.nodes()]
    heapq.heapify(Q)

    initial_threshold = float('inf')


    _, v = heapq.heappop(Q)
    
    print(f"Processing node {v}")

    threshold = initial_threshold if len(Top) < top_k else Farn[Top[-1]]
    refined_bound = updateBoundsBFSCut(v, graph, threshold)
    
    print(f"Refined bound for node {v}: {refined_bound}")

    Farn[v] = refined_bound

    while Q:
        
        if len(Top) < top_k:
            Top.append(v)
            Top.sort(key=lambda node: Farn[node])
            print(f"Added node {v} to Top. Current Top: {Top}")
        elif refined_bound < Farn[Top[-1]]:
            Top.append(v)
            Top.sort(key=lambda node: Farn[node])
            Top = Top[:top_k]  # 保持Top的长度为top_k
            print(f"Added node {v} to Top and trimmed. Current Top: {Top}")
        else:
            print(f"Node {v} not added to Top as its bound is not better than current top")

        _, v = heapq.heappop(Q)
    
        print(f"Processing node {v}")

        threshold = initial_threshold if len(Top) < top_k else Farn[Top[-1]]
        refined_bound = updateBoundsBFSCut(v, graph, threshold)
    
        print(f"Refined bound for node {v}: {refined_bound}")

        Farn[v] = refined_bound
        
        if len(Top) == top_k and Farn[v] >= Farn[Top[-1]]:
            print(Farn[Q[0][1]])
            print(Farn[Top[-1]])
            print("Remaining nodes in Q cannot improve Top. Stopping.")
            break

    print("\nFinal Top 10 nodes by refined lower centrality bound:")
    for i, node in enumerate(Top, 1):
        print(f"{i}. Node: {node}, Refined Lower Centrality: {Farn[node]:.10f}")

    end_time = time.time()
    total_time = end_time - start_time
    print(f"\nTotal time taken: {total_time:.4f} seconds")

    return graph

if __name__ == "__main__":
    filepath = "com-youtube.ungraph.tsv"  # 替换为你文件的路径
    print("Creating graph...")
    graph_with_centrality = create_mention_graph_with_centrality(filepath)

In [None]:
import heapq
import csv
import re
import time
import cProfile
from collections import defaultdict
import networkx as nx
import itertools
import pandas as pd

def calculate_closeness_centrality(graph, top_k=10):
    """
    计算图中节点的接近中心性,并返回 top k 个最高中心性的节点。

    参数:
    graph (networkx.Graph): 输入图
    top_k (int): 返回前 k 个最高中心性的节点

    返回:
    dict: 包含 top k 个节点及其接近中心性值的字典
    """
    # 计算每个节点的接近中心性
    closeness_centrality = nx.closeness_centrality(graph)

    # 获取 top k 个最高中心性的节点
    top_nodes = heapq.nlargest(top_k, closeness_centrality, key=closeness_centrality.get)
    top_centrality = {node: closeness_centrality[node] for node in top_nodes}

    return top_centrality

def create_mention_graph_with_centrality(filepath, top_k=10):
    start_time = time.time()

    data = pd.read_csv(filepath, sep="\t", header=None)
    directed_graph = nx.DiGraph()
    directed_graph.add_edges_from(data.values.tolist())
    graph = directed_graph.to_undirected()

    print(f"Undirected graph created with {graph.number_of_nodes()} nodes and {graph.number_of_edges()} edges.")

    print("Calculating top k nodes by closeness centrality...")
    top_closeness = calculate_closeness_centrality(graph, top_k)

    print("\nTop 10 nodes by closeness centrality:")
    for i, (node, centrality) in enumerate(top_closeness.items(), 1):
        print(f"{i}. Node: {node}, Closeness Centrality: {centrality:.10f}")

    end_time = time.time()
    total_time = end_time - start_time
    print(f"\nTotal time taken: {total_time:.4f} seconds")

    return graph

if __name__ == "__main__":
    filepath = "com-youtube.ungraph.tsv"  # 替换为你文件的路径
    print("Creating graph...")
    graph_with_centrality = create_mention_graph_with_centrality(filepath)

In [None]:
import heapq
import csv
import re
import time
import cProfile
from collections import defaultdict
import networkx as nx
import itertools
import pandas as pd

def pagerank_lower_bound(graph):
    # 计算每个节点的 PageRank 值
    page_ranks = nx.pagerank(graph)
    
    # 将 PageRank 值的倒数作为下界
    lower_bounds = {node: 1 / page_ranks[node] for node in graph.nodes()}
    
    return lower_bounds

def updateBoundsBFSCut(v, graph, x):
    """
    计算将顶点 v 分隔开的割集大小的下界。

    参数：
        v: 起始顶点。
        graph: networkx DiGraph。节点必须具有 'r' 属性。
        Farn: 存储下界的字典（必须初始化）。
        Top: top k 节点的列表（必须初始化）。
        x: 一个阈值。

    返回：
        如果割集值超过 x，则返回 +∞；否则返回计算出的割集值，或者如果无变化则返回当前 Farn[v]。
    """
    
    nodes = list(graph.nodes())
    n = len(nodes)
    Q = [(0,v)] #优先级队列，用于跟踪 BFS 的 (距离, 节点)
    heapq.heapify(Q)
    visited = {v}
    d = 0
    S = 0
    y = graph.degree(v) - 1
    nd = 1

    while Q:
        dist, u = heapq.heappop(Q)
        if dist > d:
            d += 1
            #LCUT 计算
            
            LCUT = ((d+2)*(n-nd) + S - y )/(n-1)
        

            if LCUT >= x:
                return float('inf')
            y = 0  # 重置 y

        for w in graph.neighbors(u):
            if w not in visited:
                visited.add(w)
                dist_vw = nx.shortest_path_length(graph,v,w) #假设存在最短路径
                heapq.heappush(Q,(dist_vw,w))
                S += dist_vw  # 距离 d(v,w)
                y += graph.degree(w)
                nd += 1
            else:
                LCUT = LCUT + 1/(n-1)

    #最终计算
    LCUT_final = S / (n-1)

    
    return LCUT_final


def create_mention_graph_with_centrality(filepath, top_k=10):
    start_time = time.time()

    data = pd.read_csv(filepath, sep="\t", header=None)
    directed_graph = nx.DiGraph()
    directed_graph.add_edges_from(data.values.tolist())
    graph = directed_graph.to_undirected()

    print(f"Undirected graph created with {graph.number_of_nodes()} nodes and {graph.number_of_edges()} edges.")
    print("Calculating initial lower bounds using PageRank...")

    initial_lower_bounds = pagerank_lower_bound(graph)
    
    k_lowest_farness = heapq.nsmallest(top_k, initial_lower_bounds.items(), key=lambda item: item[1])
    print(f"\n{top_k} nodes with lowest farness values:")
    for node, farness in k_lowest_farness:
        print(f"Node: {node}, Farness: {farness}")
    
    Farn = initial_lower_bounds.copy()
    Top = []
    Q = [(Farn[node], node) for node in graph.nodes()]
    heapq.heapify(Q)

    initial_threshold = float('inf')


    _, v = heapq.heappop(Q)
    
    print(f"Processing node {v}")

    threshold = initial_threshold if len(Top) < top_k else Farn[Top[-1]]
    refined_bound = updateBoundsBFSCut(v, graph, threshold)
    
    print(f"Refined bound for node {v}: {refined_bound}")

    Farn[v] = refined_bound

    while Q:
        
        if len(Top) < top_k:
            Top.append(v)
            Top.sort(key=lambda node: Farn[node])
            print(f"Added node {v} to Top. Current Top: {Top}")
        elif refined_bound < Farn[Top[-1]]:
            Top.append(v)
            Top.sort(key=lambda node: Farn[node])
            Top = Top[:top_k]  # 保持Top的长度为top_k
            print(f"Added node {v} to Top and trimmed. Current Top: {Top}")
        else:
            print(f"Node {v} not added to Top as its bound is not better than current top")

        _, v = heapq.heappop(Q)
    
        print(f"Processing node {v}")

        threshold = initial_threshold if len(Top) < top_k else Farn[Top[-1]]
        refined_bound = updateBoundsBFSCut(v, graph, threshold)
    
        print(f"Refined bound for node {v}: {refined_bound}")

        Farn[v] = refined_bound
        
        if len(Top) == top_k and Farn[v] >= Farn[Top[-1]]:
            print(Farn[v])
            print(Farn[Top[-1]])
            print("Remaining nodes in Q cannot improve Top. Stopping.")
            break

    print("\nFinal Top 10 nodes by refined lower centrality bound:")
    for i, node in enumerate(Top, 1):
        print(f"{i}. Node: {node}, Refined Lower Centrality: {Farn[node]:.10f}")

    end_time = time.time()
    total_time = end_time - start_time
    print(f"\nTotal time taken: {total_time:.4f} seconds")

    return graph

if __name__ == "__main__":
    filepath = "com-youtube.ungraph.tsv"  # 替换为你文件的路径
    print("Creating graph...")
    graph_with_centrality = create_mention_graph_with_centrality(filepath)