In [11]:
import numpy as np

def load_graph(filename):
    edges = np.loadtxt(filename, dtype=int)
    return edges

def find_sinks(edges, num_nodes):
    out_deg = np.zeros(num_nodes, dtype=int)
    for src, _ in edges:
        out_deg[src] += 1
    sinks = np.where(out_deg == 0)[0]
    np.savetxt('sinks.csv', sinks, fmt='%d')
    return set(sinks)

def matrix(edges, num_nodes, sinks, alpha=0.85):
    M = np.zeros((num_nodes, num_nodes))
    for src, dst in edges:
        M[dst, src] += 1
    
    for j in range(num_nodes):
        if np.sum(M[:, j]) > 0:
            M[:, j] /= np.sum(M[:, j])
    
    for s in sinks:
        M[:, s] = 1 / num_nodes
    
    return alpha * M + (1 - alpha) / num_nodes

def pagerank(M, num_nodes, T=10):
    PR = np.ones(num_nodes) / num_nodes
    for _ in range(T):
        PR = M @ PR
    return PR

def save_pagerank(PR, filename='PR_results.csv'):
    sorted_indices = np.argsort(-PR)
    results = np.column_stack((sorted_indices, PR[sorted_indices]))
    np.savetxt(filename, results, fmt='%d,%.6f', delimiter=',')

def main():
    filename = 'p2p-Gnutella04.txt'
    edges = load_graph(filename)

    max_node_id = max(np.max(edges[:, 0]), np.max(edges[:, 1]))
    num_nodes = max(10876, max_node_id + 1)

    edges = load_graph(filename)
    sinks = find_sinks(edges, num_nodes)
    edges = np.array([edge for edge in edges if edge[0] not in sinks])
    M = matrix(edges, num_nodes, sinks)
    PR = pagerank(M, num_nodes)
    save_pagerank(PR)
    
if __name__ == "__main__":
    main()
