In [4]:
import csv
import re
import time
import cProfile
from collections import defaultdict, deque
import networkx as nx

def compute_scc_bounds(graph):
    """在 SCC 图上迭代计算 alpha 和 omega 值。"""
    if not isinstance(graph, nx.DiGraph) or not graph.nodes:
        return None

    # 1. 获取所有 SCC
    sccs = list(nx.strongly_connected_components(graph))

    # 2. 从 SCC 创建一个新图
    scc_graph = nx.DiGraph()
    scc_weights = {}
    scc_node_map = {}

    for i, scc in enumerate(sccs):
        scc_node = f"SCC_{i}"
        scc_graph.add_node(scc_node)
        scc_weights[scc_node] = len(scc)
        for node in scc:
            scc_node_map[node] = scc_node

    # 在 SCC 之间添加边
    for u in graph.nodes:
        for v in graph.neighbors(u):
            if scc_node_map[u] != scc_node_map[v]:
                scc_graph.add_edge(scc_node_map[u], scc_node_map[v])

    # 3. 初始化所有 SCC 节点的 alpha 和 omega
    scc_bounds = {}
    for scc_node in scc_graph.nodes:
        scc_bounds[scc_node] = (scc_weights[scc_node], scc_weights[scc_node])

    # 4. 反向拓扑排序（处理循环）
    remaining_nodes = set(scc_graph.nodes) #考虑SCC图中的所有节点进行拓扑排序
    reverse_topological_order = []
    if len(remaining_nodes) > 0:
        try:
            reverse_topological_order = list(nx.topological_sort(scc_graph.reverse().subgraph(remaining_nodes)))
        except nx.NetworkXUnfeasible:
            print("Warning: SCC 图中检测到循环。使用启发式排序。")
            reverse_topological_order = sorted(remaining_nodes, key=lambda node: scc_graph.degree(node), reverse=True)

    # 5. 迭代并更新 alpha 和 omega
    for scc_node in reversed(reverse_topological_order): #反向迭代
        for neighbor in scc_graph.neighbors(scc_node):
            scc_bounds[scc_node] = (max(scc_bounds[scc_node][0], scc_bounds[neighbor][0]),
                                    scc_bounds[scc_node][1] + scc_bounds[neighbor][1])

    # 6. 将边界分配给原始节点
    node_bounds = {}
    for node in graph.nodes:
        scc_node = scc_node_map[node]
        node_bounds[node] = scc_bounds[scc_node]

    return node_bounds


def neighborhood_lower_bound(graph):
    nodes = list(graph.nodes())
    n = len(nodes)
    lower_bounds = {}
    scc_bounds = compute_scc_bounds(graph)
    if scc_bounds is None:
        print("Error computing SCC bounds. Returning empty dictionary.")
        return {}
    reachable_nodes_dict = {node: bounds[0] for node, bounds in scc_bounds.items()}
    k = 2
    max_iterations = 10
    
    neighbors_dict = {node: list(graph.neighbors(node)) for node in nodes}
    
    Y = {}
    S_un = {} # 将列表改为字典
    nVisited = {}
    finished = {}

    while k <= max_iterations:
        for s in nodes:
            degree_s = graph.out_degree(s)
            Y[(k - 1, s)] = degree_s
            S_un[s] = degree_s # 使用节点名称（字符串）作为键
            nVisited[s] = degree_s + 1
            finished[s] = False

        nFinished = 0
        while nFinished < n:
            for s in nodes:
                if finished[s]:
                    continue
                r_v = reachable_nodes_dict[s]
                if k == 2:
                    Y[(k, s)] = sum(Y[(k - 1, w)] for w in neighbors_dict[s])
                elif k > 2:
                    Y[(k, s)] = sum(Y[(k - 1, w)] for w in neighbors_dict[s])
                else:
                    Y[(k, s)] = 0
                nVisited[s] += Y.get((k - 1, s), 0)
                if nVisited[s] < r_v:
                    S_un[s] = S_un[s] + k * Y.get((k - 1, s), 0) # 使用节点名称作为键
                else:
                    S_un[s] = S_un[s] + k * (r_v - (nVisited[s] - Y.get((k - 1, s), 0))) # 使用节点名称作为键
                    nFinished += 1
                    finished[s] = True
        k += 1
    for v in nodes:
        r_v = reachable_nodes_dict[v]
        print(S_un[v])
        #print(n)
        print(r_v)
        print('#####################')
        lower_bounds[v] = (n - 1) * S_un[v] / ((r_v - 1) ** 2) if r_v > 1 else 0
    return lower_bounds

def create_mention_graph_with_centrality(filepath):
    start_time = time.time()
    edge_weights = defaultdict(int)
    def process_chunk(chunk):
        for row in chunk:
            try:
                timestamp, user, tweet_text = row
                mentions = re.findall(r'@([a-zA-Z0-9_]+)', tweet_text)
                for mention in mentions:
                    edge_weights[(user, mention)] += 1
            except (ValueError, IndexError) as e:
                print(f"Error processing row: {row}. Skipping. Error: {e}")
    chunk_size = 100000
    current_chunk = []
    with open(filepath, 'r', encoding='utf-8') as file:
        reader = csv.reader(file, delimiter='\t')
        for row in reader:
            current_chunk.append(row)
            if len(current_chunk) == chunk_size:
                process_chunk(current_chunk)
                current_chunk = []
    if current_chunk:
        process_chunk(current_chunk)
    G = nx.DiGraph()
    for (u, v), weight in edge_weights.items():
        G.add_edge(u, v, weight=weight)
    print(f"Graph created with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges.")
    print("Calculating centrality...")
    lower_bounds = neighborhood_lower_bound(G)
    nx.set_node_attributes(G, lower_bounds, 'lower_centrality')
    end_time = time.time()
    total_time = end_time - start_time
    print(f"\nTotal time taken: {total_time:.4f} seconds")
    sorted_centralities = sorted(
        nx.get_node_attributes(G, 'lower_centrality').items(),
        key=lambda x: x[1],
        reverse=True
    )
    print("\nTop 10 nodes by lower centrality bound:")
    for node, centrality in sorted_centralities[:100]:
        print(f"Node: {node}, Lower Centrality: {centrality:.10f}")
    return G

if __name__ == "__main__":
    filepath = "twitter-small.tsv"  # Replace with your file path
    print("Creating graph...")
    cProfile.run('create_mention_graph_with_centrality("twitter-small.tsv")')

Creating graph...
Graph created with 94861 nodes and 155574 edges.
Calculating centrality...
-9
1
#####################
0
1
#####################
13642
1385
#####################
0
1
#####################
-27
1
#####################
0
1
#####################
0
1
#####################
-9
1
#####################
-27
1
#####################
0
1
#####################
13138
1385
#####################
0
1
#####################
12220
1385
#####################
0
1
#####################
0
1
#####################
13588
1385
#####################
-9
1
#####################
0
1
#####################
0
1
#####################
0
1
#####################
-99
1
#####################
-9
1
#####################
0
1
#####################
13777
1385
#####################
0
1
#####################
0
1
#####################
13408
1385
#####################
0
1
#####################
12121
1385
#####################
0
1
#####################
-9
1
#####################
0
1
#####################
13615
1385
####