In [1]:
import numpy as np
from sklearn.preprocessing import normalize
import time
import pandas as pd

### Initializing constants

In [2]:
alpha = 0.85
epsilon = 0.00001
file = "links.txt"

### Reading the links.txt file

In [3]:
start_time = time.time()
df = pd.read_csv(file,sep=",",header=None,names=['jrnl','jrnl_cited','count'])
if df['jrnl'].nunique() > df['jrnl_cited'].nunique():
    nodes = df['jrnl'].nunique()
else:
    nodes = df['jrnl_cited'].nunique()
print("Number of nodes:", nodes)
print("Shape of the dataframe:",df.shape)

Number of nodes: 10748
Shape of the dataframe: (4283119, 3)


### Adjacency matrix (Z)

In [4]:
# Example raw adjacency matrix (Z)
# raw_data = np.matrix('1 0 2 0 4 3; 3 0 1 1 0 0; 2 0 4 0 1 0; 0 0 1 0 0 1; 8 0 3 0 5 2; 0 0 0 0 0 0')
# raw_data


# Initialise an adj. matrix with 0
adj_matrix = np.zeros((nodes,nodes))
# File read    
with open(file) as f:
    for index, lines in enumerate(f):
        # File split on commas
        line = lines.split(',')
        # Citing journal 
        jrnl = int(line[0])
        # Journal that is cited
        jrnl_cited = int(line[1])
        # Number of time the journal is cited from the citing journal
        citations = int(line[2])
            
    #Creating adjacency matrix - Intersection of jrnl(column) and jrnl_cited (row) contains the number of citations
        adj_matrix[jrnl_cited][jrnl] = citations
print("Shape:",adj_matrix.shape)

Shape: (10748, 10748)


### Set the diagonals of the Adjacency matrix to 0

In [5]:
# Set the diagonal to zero
# np.fill_diagonal(raw_data, 0)
# raw_data

np.fill_diagonal(adj_matrix, 0)
print("Adjacency matrix after diagonals are 0:")
print(adj_matrix)
print("Shape:",adj_matrix.shape)

Adjacency matrix after diagonals are 0:
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
Shape: (10748, 10748)


### Normalizing adjacency matrix (H)

In [6]:
# Normalize the columns. This matrix is H.
# h = normalize(raw_data, axis=0, norm='l1')

h = normalize(adj_matrix, axis=0, norm='l1')
print("Normalized matrix:")
print(h)
print("Shape:",h.shape)

Normalized matrix:
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
Shape: (10748, 10748)


### Dangling nodes matrix (d)

In [7]:
# Dangling Nodes
# d = np.matrix('0 1 0 0 0 0')

dangling_node = np.zeros((nodes))
for i in range(nodes):
    count = 0
    for j in range(nodes):
        if h[j][i]==0:
            count+=1
    if count == nodes:
        dangling_node[i] = 1
print(dangling_node)
print(dangling_node.shape)


[0. 0. 0. ... 0. 0. 0.]
(10748,)


### Article vector (a)

In [8]:
# Article vector
# a = np.array([3/14, 2/14, 5/14, 1/14, 2/14, 1/14]).reshape(-1, 1)

a = np.full((1, nodes), 1)
a = a.T

article_matrix_sum = 0

for i in range(nodes):
    article_matrix_sum = article_matrix_sum + a[i][0]
a = a/article_matrix_sum
print(a)
print(a.shape)

[[9.30405657e-05]
 [9.30405657e-05]
 [9.30405657e-05]
 ...
 [9.30405657e-05]
 [9.30405657e-05]
 [9.30405657e-05]]
(10748, 1)


### Initial start vector

In [9]:
# Initial start vector
# pi = np.array([1/6, 1/6, 1/6, 1/6, 1/6, 1/6]).reshape(-1, 1)

pi = np.full((nodes, 1), 1/nodes)
print(pi)
print("Shape:",pi.shape)

[[9.30405657e-05]
 [9.30405657e-05]
 [9.30405657e-05]
 ...
 [9.30405657e-05]
 [9.30405657e-05]
 [9.30405657e-05]]
Shape: (10748, 1)


### Calculating influence matrix

In [10]:
influence_vector = np.full((1, nodes), 0) #Creating influence vector of shape (1,n), where all elements = 0
pi_initial_matrix = pi 
condition = 0.1 #providing initial condition to enter while loop

iter = 0 # number of iterations
while condition >= epsilon:
    # alpha x H . pi + [alpha x d . pi + (1-alpha)].a
    influence_vector = alpha*np.dot(h,pi_initial_matrix) + np.dot(a,[alpha*np.dot(dangling_node,pi_initial_matrix)+(1-alpha)])
    array_temp = np.subtract(influence_vector,pi_initial_matrix)
    condition = np.linalg.norm(array_temp, ord=1)
    pi_initial_matrix=influence_vector
    iter+=1
stop_time = time.time()
print("Influnce vector:")
print(influence_vector)

Influnce vector:
[[4.38729652e-05]
 [2.74427015e-05]
 [1.47081207e-04]
 ...
 [4.08557473e-05]
 [1.56821263e-05]
 [3.67818515e-05]]


### Calculating Eigenfactor

In [11]:
# H . (influence_vector)
Eigenfactor = np.dot(h, influence_vector)
EF_sum =0

# sum(H . (influence_vector))
for i in range(nodes):
    EF_sum = EF_sum + Eigenfactor[i]

# Normalizing EF matrix [Eigenfactor/sum of Eigenfactor] and multiplying by 100
Eigenfactor = (Eigenfactor/EF_sum)*100

print("Eigen Factor:")
print(Eigenfactor)

Eigen Factor:
[[0.00346107]
 [0.00150851]
 [0.0157262 ]
 ...
 [0.00310253]
 [0.00011092]
 [0.00261838]]


### a. Scores for the top 20 journals

In [12]:
Eigenfactor = Eigenfactor[Eigenfactor[:, 0].argsort()][::-1][:20]
print(Eigenfactor)

[[1.44811869]
 [1.41271864]
 [1.23503457]
 [0.67950236]
 [0.66487912]
 [0.63463484]
 [0.57723297]
 [0.48081512]
 [0.47777265]
 [0.4397348 ]
 [0.42971775]
 [0.38620652]
 [0.38512026]
 [0.3795776 ]
 [0.37278901]
 [0.33030628]
 [0.3275079 ]
 [0.31927167]
 [0.31677903]
 [0.31125705]]


### b. Time taken to run the code on the real network:

In [13]:
time_taken = (stop_time - start_time)
# print(time_taken)
time_secs = round(time_taken%60)
time_mins = int(time_taken/60)
print("Time taken for the iterations: ~",time_mins,"mins",time_secs, "seconds.")

Time taken for the iterations: ~ 1 mins 34 seconds.


### c. Number of iterations to get to the answer

In [14]:
print("Iterations:",iter)

Iterations: 32
