In [59]:
import networkx as nx
import itertools
import random
import numpy as np

with open('Pagerank/higgs-mention_network.edgelist','r') as f:
    lines1 = f.readlines()

with open('Pagerank/higgs-reply_network.edgelist','r') as f:
    lines2 = f.readlines()
    
with open('Pagerank/higgs-retweet_network.edgelist','r') as f:
    lines3 = f.readlines()
    
with open('Pagerank/higgs-social_network.edgelist','r') as f:
    lines4 = f.readlines()
    
lines = lines1 + lines2 + lines3 + lines4

nodes = []
edges = []

# Retrieve edges from input files
for line in lines:
    edges.append(tuple(line.split(' ')[:2]))

# Retrieve nodes from edges
nodes = set(itertools.chain(*edges))
count_nodes = len(nodes)

# Assign numeric node id to each node
node_ids = dict(zip(nodes,np.arange(0,count_nodes)))

# Create networkx graph
G = nx.DiGraph()
G.add_nodes_from(nodes)
G.add_edges_from(edges)
nx.set_node_attributes(G, node_ids, "node_id")

# Node for which recommendations are generated
input_node = '20120'

# Generate personalization vector based on input node
personalization_vec = np.zeros(count_nodes)
personalization_vec[G.nodes[input_node]['node_id']] = 1

# Transition matrix
tsn_mx = np.zeros(shape=(count_nodes,count_nodes))
for e in G.edges():
    tsn_mx[G.nodes[e[1]]['node_id'], G.nodes[e[0]]['node_id']] = 1/np.where(G.out_degree(e[0]) > 0, G.out_degree(e[0]), 1)

# Handling dangling nodes 
row_sum = tsn_mx.sum(axis=0)
dangling_weights = personalization_vec
is_dangling = np.where(row_sum == 0)[0]
    
# Pagerank vector, prev_pr to keep track of previous value to test convergence
pr = np.ones(count_nodes)
prev_pr = np.zeros(count_nodes)

beta = 0.85
tolerance = 1e-6

# Iterate pagerank calculation until convergence
while ~np.all(np.abs(pr-prev_pr) < tolerance):
    prev_pr = pr
    pr = beta*(np.matmul(tsn_mx,pr) + sum(pr[is_dangling])*dangling_weights) + (1-beta)*personalization_vec
    pr = pr/np.linalg.norm(pr,ord=1)

# Set pagerank values as node attributes
pagerank = dict(zip(list(node_ids.keys()),pr))
nx.set_node_attributes(G, pagerank, "pagerank")

# Print top 20 recommendations
print(sorted(G.nodes(), key=lambda n: G.nodes[n]['pagerank'],reverse=True)[:20])

['22832', '20120', '92293', '13813', '4741', '3369', '7274', '2164', '7756', '18319', '310', '18262', '6644', '11339', '12389', '390301', '81', '10836', '10844', '18356']


References:

https://github.com/networkx/networkx/blob/main/networkx/algorithms/link_analysis/pagerank_alg.py
<br>
https://towardsdatascience.com/pagerank-algorithm-fully-explained-dc794184b4af
<br>
https://www.youtube.com/watch?v=RVIr8Y5isek
<br>
Visualization
https://stellasia.github.io/blog/2020-03-07-page-rank-animation-with-networkx-numpy-and-matplotlib/

### Testing on smaller graph

In [58]:
import networkx as nx
import itertools
import random
import numpy as np

G = nx.DiGraph()
G.add_node(1)
G.add_node(2)
G.add_node(3)
G.add_node(4)
G.add_node(5)
G.add_edge(1,2)
G.add_edge(2,1)
G.add_edge(1,3)
G.add_edge(3,1)
G.add_edge(2,4)
G.add_edge(3,4)
G.add_edge(4,3)
G.add_edge(1,4)
G.add_edge(4,1)
G.add_edge(4,1)
G.add_edge(2,3)
G.add_edge(3,2)
G.add_edge(4,5)

nodes = {1,2,3,4,5}
count_nodes = len(nodes)

node_ids = dict(zip(nodes,np.arange(0,count_nodes)))

nx.set_node_attributes(G, node_ids, "node_id")

input_node = 3
personalization_vec = np.zeros(count_nodes)
personalization_vec[G.nodes[input_node]['node_id']] = 1


tsn_mx = np.zeros(shape=(count_nodes,count_nodes))
for e in G.edges():
    tsn_mx[G.nodes[e[1]]['node_id'], G.nodes[e[0]]['node_id']] = 1/np.where(G.out_degree(e[0]) > 0, G.out_degree(e[0]), 1)


s = tsn_mx.sum(axis=0)
dw = personalization_vec
is_dangling = np.where(s == 0)[0]


pr = np.ones(5)
prev_pr = np.zeros(5)
i=0
while ~np.all(np.abs(pr-prev_pr) < 1e-6):
    prev_pr = pr
    pr = 0.85*(np.matmul(tsn_mx,pr) + sum(pr[is_dangling]) * dw) + 0.15*personalization_vec
    pr = pr/np.linalg.norm(pr,ord=1)
    i=i+1
print(pr/np.linalg.norm(pr,ord=1))

nx.pagerank(G,alpha=0.85,personalization={1:0,2:0,3:1,4:0,5:0})

[0.20778166 0.16190772 0.36365756 0.20778166 0.05887141]


{1: 0.20778175890379338,
 2: 0.16190771686152597,
 3: 0.36365743071544715,
 4: 0.2077817589037934,
 5: 0.05887133461543995}