In [2]:
from utilities import edges_to_adjacency_matrix, load_edges

from pagerank_algo import pagerank_algorithm

from simrank_algo import simrank_algorithm

from hits_algo import hits_algorithm

from utils import StopWatch

import numpy as np

import os


# >> CONSTANTS

max_iterations = 30

damping_factor = 0.10

decay_factor   = 0.70

epsilon        = 1e-8

# << CONSTANTS


if (__name__ == "__main__"):

    simrank_exception = {
        "graph_6", "ibm-5000"
    }

    input_data_graph = [
        "data/graph_1.txt",
        "data/graph_2.txt",
        "data/graph_3.txt",
        "data/graph_4.txt",
        "data/graph_5.txt",
        "data/graph_6.txt"
    ]

    input_data_ibm = "data/ibm-5000.txt"

    graph_names = [ input_data_ibm ] + input_data_graph

    edges_list = [ load_edges(input_data_ibm, csv_format = False), *[
        load_edges(input_data_g, csv_format = True) for input_data_g in input_data_graph
    ]]

    output_folder = "results"

    os.makedirs(output_folder, exist_ok = True)

    for idx, edges in enumerate(edges_list):

        filename = graph_names[idx]

        print(filename)

        basename = os.path.basename(os.path.splitext(filename)[0])

        current_folder = os.path.join(output_folder, basename)

        os.makedirs(current_folder, exist_ok = True)

        (adjacency_matrix, mapping) = edges_to_adjacency_matrix(edges)

        with StopWatch("PageRank: {} Seconds"):
            pagerank = pagerank_algorithm(
                adjacency_matrix,
                damping_factor = damping_factor,
                max_iterations = max_iterations,
                epsilon        = epsilon
            )

        np.savetxt(f"{current_folder}/{basename}_PageRank.txt", pagerank, fmt = "%1.3f")

        with StopWatch("HITS: {} Seconds"):
            (authority, hubness) = hits_algorithm(
                adjacency_matrix,
                max_iterations = max_iterations,
                epsilon        = epsilon
            )

        np.savetxt(f"{current_folder}/{basename}_HITS_authority.txt", authority, fmt = "%1.3f")

        np.savetxt(f"{current_folder}/{basename}_HITS_hub.txt", hubness, fmt = "%1.3f")

        if (basename in simrank_exception):
            continue

        with StopWatch("SimRank: {} Seconds"):
            simrank = simrank_algorithm(
                adjacency_matrix,
                decay_factor   = decay_factor,
                max_iterations = max_iterations
            )

        np.savetxt(f"{current_folder}/{basename}_SimRank.txt", simrank, fmt = "%1.3f")

        simrank = np.round(simrank, 3)

data/ibm-5000.txt
PageRank: 0.606667 Seconds
HITS: 0.478551 Seconds
data/graph_1.txt
PageRank: 0.000995 Seconds
HITS: 0.000386 Seconds
SimRank: 0.00731 Seconds
data/graph_2.txt
PageRank: 0.000246 Seconds
HITS: 0.00035 Seconds
SimRank: 0.006712 Seconds
data/graph_3.txt
PageRank: 0.002136 Seconds
HITS: 0.002034 Seconds
SimRank: 0.005115 Seconds
data/graph_4.txt
PageRank: 0.003151 Seconds
HITS: 0.004535 Seconds
SimRank: 0.019891 Seconds
data/graph_5.txt
PageRank: 0.254019 Seconds
HITS: 0.241586 Seconds
SimRank: 57.060629 Seconds
data/graph_6.txt
PageRank: 0.543594 Seconds
HITS: 0.495639 Seconds
