In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import networkx as nx

In [15]:
# Reading the Dataset

data_file = open('../data/Wiki-Vote.txt', 'r')
lines = data_file.readlines()

In [16]:
#placing the pointer at the start of edge list in the list of lines read from dataset text file
start_ptr = 0
for i in range(len(lines)):
    if(lines[i][0] == '#'):
        start_ptr += 1

In [17]:
#forming the set of NodeID present in the graph
node_set = set()
for i in range(start_ptr, len(lines)):
    split1 = lines[i].split('\t')
    node1 = int(split1[0])
    node2 = int(split1[1][:(len(split1[1]) - 1)])
    node_set.add(node1)
    node_set.add(node2)

In [18]:
num_nodes = len(node_set) #number of nodes

In [19]:
#NodeIDs are not continuous so we assign a unique integer number to each node. The numbers are continuous and in range [0, (number_of_nodes - 1)]
node_num = 0
node_id_to_num = {} #nodeID to node_number mapping [each nodeID is assigned a unique node_number, node_number range from 0 to (number_of_nodes - 1)]
node_num_to_id = {} #node_number to nodeID mapping
for node_id in node_set:
    node_id_to_num[node_id] = node_num
    node_num_to_id[node_num] = node_id
    node_num += 1

In [20]:
#forming edge list, will use this to create graph using networkx
edge_list = [] #list of edges
for i in range(start_ptr, len(lines)): #reading the edges from txt file lines
    split1 = lines[i].split('\t')
    node1_id = int(split1[0]) #nodeID_1 (From NodeID)
    node2_id = int(split1[1][:(len(split1[1]) - 1)]) #nodeID_2 (To NodeID)
    node1_num = node_id_to_num[node1_id] #get node_num for the nodeID
    node2_num = node_id_to_num[node2_id] #get node_num for the nodeID
    edge_list.append([node1_num, node2_num]) #adding edge to edgelist

Forming Graph

In [21]:
G = nx.DiGraph() #making a directed graph from edge list
G.add_nodes_from(node_id_to_num.values()) #adding nodes to graph
G.add_edges_from(edge_list) #adding edges to graph

Pagerank Scores

In [22]:
pagerank_scores = nx.pagerank(G) #computing pagerank scores 

In [23]:
print(f"PageRank Scores :\n{pagerank_scores}")

PageRank Scores :
{0: 0.00020539498232448016, 1: 5.048782345863015e-05, 2: 5.048782345863015e-05, 3: 0.00031183250978437455, 4: 5.048782345863015e-05, 5: 0.00032663557615950425, 6: 5.048782345863015e-05, 7: 0.0004213996615598798, 8: 5.048782345863015e-05, 9: 5.048782345863015e-05, 10: 5.048782345863015e-05, 11: 5.048782345863015e-05, 12: 0.00368122072952927, 13: 5.048782345863015e-05, 14: 5.048782345863015e-05, 15: 5.048782345863015e-05, 16: 0.00013112179292607272, 17: 5.048782345863015e-05, 18: 5.048782345863015e-05, 19: 5.048782345863015e-05, 20: 0.00017122390637420328, 21: 5.048782345863015e-05, 22: 5.048782345863015e-05, 23: 5.048782345863015e-05, 24: 5.048782345863015e-05, 25: 0.0016986730322136924, 26: 0.00018490986415744417, 27: 0.00017349553934328335, 28: 5.048782345863015e-05, 29: 0.00019385458408414878, 30: 0.0003386160040196025, 31: 0.0002606886538476849, 32: 0.0007007673625519543, 33: 0.00013857302939342643, 34: 5.048782345863015e-05, 35: 0.00016519399800671061, 36: 0.00034

Hubs and Authorities Scores

In [24]:
#HITS algorithm
hubs_scores = hits_scores[0] #hubs_scores
authorities_scores = hits_scores[1] #authorities score

In [25]:
print(f"Hubs Scores :\n{hubs_scores}")

Hubs Scores :
{0: 4.021031639777644e-05, 1: 7.319607685824191e-05, 2: 3.5017884744336455e-05, 3: 0.0010539872861763623, 4: 8.200618013274952e-05, 5: 0.0003200123333087152, 6: 0.0002318196335517146, 7: 0.00018207047608178467, 8: 0.004921182063808106, 9: 0.0002882286245176333, 10: 3.120053955681936e-05, 11: 0.0004975871202612589, 12: 0.0004157326033809384, 13: 8.409812451935364e-05, 14: 0.00010084393686112615, 15: 3.617008656019808e-05, 16: 7.217473720148215e-05, 17: 0.0008868292175658347, 18: 0.0002928634622208908, 19: 6.217339200066013e-06, 20: 0.00018134382079422958, 21: 0.002645359109055595, 22: 0.00021311562589770795, 23: 0.0001458915638968585, 24: 0.0005196190515395972, 25: 0.00035693360951943707, 26: 0.0006751699797052777, 27: 7.925509536668044e-05, 28: 3.701485755755213e-05, 29: 1.3200870183843304e-05, 30: 6.99081608901082e-05, 31: 8.007226195926281e-06, 32: 0.00021942583528112783, 33: 0.0019031263078179547, 34: 0.00019759687828962253, 35: 4.947377401933716e-05, 36: 0.00010662223

In [26]:
print(f"Authorities Scores :\n{hubs_scores}")

Authorities Scores :
{0: 4.021031639777644e-05, 1: 7.319607685824191e-05, 2: 3.5017884744336455e-05, 3: 0.0010539872861763623, 4: 8.200618013274952e-05, 5: 0.0003200123333087152, 6: 0.0002318196335517146, 7: 0.00018207047608178467, 8: 0.004921182063808106, 9: 0.0002882286245176333, 10: 3.120053955681936e-05, 11: 0.0004975871202612589, 12: 0.0004157326033809384, 13: 8.409812451935364e-05, 14: 0.00010084393686112615, 15: 3.617008656019808e-05, 16: 7.217473720148215e-05, 17: 0.0008868292175658347, 18: 0.0002928634622208908, 19: 6.217339200066013e-06, 20: 0.00018134382079422958, 21: 0.002645359109055595, 22: 0.00021311562589770795, 23: 0.0001458915638968585, 24: 0.0005196190515395972, 25: 0.00035693360951943707, 26: 0.0006751699797052777, 27: 7.925509536668044e-05, 28: 3.701485755755213e-05, 29: 1.3200870183843304e-05, 30: 6.99081608901082e-05, 31: 8.007226195926281e-06, 32: 0.00021942583528112783, 33: 0.0019031263078179547, 34: 0.00019759687828962253, 35: 4.947377401933716e-05, 36: 0.0001