In [None]:
import numpy as np
import math
from scipy.sparse import lil_matrix, csr_matrix

In [None]:
# Constants
DAMPING_FACTOR = 0.85  # Damping factor (usually set to 0.85)
EPSILON = 0.001  # Adjusted convergence threshold for faster convergence
MAX_ITERATIONS = 100  # Limit the number of iterations to avoid infinite loops

In [None]:
# Function to calculate the absolute difference between two numbers
def abs_diff(a, b):
    return abs(a - b)

In [None]:
def initialize_auth(initialval, num_vertices):
    if initialval == 0:
        return 0.0
    elif initialval == 1:
        return 1.0
    elif initialval == -1:
        return 1.0 / num_vertices
    elif initialval == -2:
        return 1.0 / math.sqrt(num_vertices)

In [None]:
def load_graph(filename, initialval):
    max_index = 0
    edges = []
    
    # Find the maximum vertex index and store edges
    with open(filename, "r") as file:
        next(file)  # Skip first line
        for line in file:
            u, v = map(int, line.strip().split())
            edges.append((u, v))
            max_index = max(max_index, u, v)
    
    num_vertices = max_index + 1
    adj_matrix = lil_matrix((num_vertices, num_vertices))  # Sparse matrix format
    
    for u, v in edges:
        adj_matrix[u, v] = 1  # Only store non-zero edges
    
    rank = np.array([initialize_auth(initialval, num_vertices) for _ in range(num_vertices)])
    
    return adj_matrix.tocsr(), rank  # Convert to CSR format for efficient row slicing

In [None]:
def page_rank(filename, initialval):
    adj_matrix, rank = load_graph(filename, initialval)
    num_vertices = len(rank)
    new_rank = np.zeros(num_vertices)
    
    for iteration in range(MAX_ITERATIONS):
        max_diff = 0.0
        
        # Calculate new ranks using the PageRank formula
        for i in range(num_vertices):
            new_rank[i] = 0.0
            for j in adj_matrix[:, i].nonzero()[0]:  # Only non-zero entries
                new_rank[i] += rank[j] / (adj_matrix[j].count_nonzero() or 1)  # Handle dangling nodes
            new_rank[i] = (1 - DAMPING_FACTOR) / num_vertices + DAMPING_FACTOR * new_rank[i]
            
            # Calculate the absolute difference between old and new ranks for convergence check
            diff = abs_diff(rank[i], new_rank[i])
            if diff > max_diff:
                max_diff = diff
        
        rank = new_rank.copy()
        
        # Check for convergence using the maximum difference
        if max_diff < EPSILON:
            print(f"Converged after {iteration + 1} iterations")
            break
    else:
        print("Reached maximum iterations without full convergence")
    
    # Display the final PageRank scores
    print("PageRank scores:")
    for i in range(num_vertices):
        print(f"Page {i+1}: {rank[i]:.6f}")

In [None]:
# Calculate and display the PageRank scores
filename = "web-Google.txt"  # Replace with your file path
initialval = -1  # Choose initialization value as needed
page_rank(filename, initialval)