In [354]:
#import libraries
import pandas as pd 
import numpy as np
import scipy.sparse as sparse
import time
start_time= time.time()

Step 1. Load links dataset 

In [355]:
#load the links data
links = np.loadtxt('/Users/ruizhewang/Downloads/links.txt', delimiter=",", unpack=True)

#unpack your array
Journal_ID_1, Journal_ID_2, Number_of_citations = links[::]

In [356]:
#zip the array
links_1= np.array(list(zip(Journal_ID_1, Journal_ID_2, Number_of_citations)))

#convert the data to integer
links_1 = links_1.astype(int)

links_1

array([[ 758, 1476,    5],
       [ 758,  758,  150],
       [ 758, 5938,    3],
       ...,
       [9742, 7940,    1],
       [9742, 7744,    1],
       [9742, 5130,    0]])

Step 2. Creating an Adjacency Matrix

In [357]:
#source: https://stackoverflow.com/questions/29146892/numpy-scipy-build-adjacency-matrix-from-weighted-edgelist?rq=1
#create adjacency matrix for links data
shape = tuple(links_1.max(axis=0)[:2]+1)
coo = sparse.coo_matrix((links_1[:, 2], (links_1[:, 1], links_1[:, 0])), shape=shape,
                        dtype=links_1.dtype)

adj_matrix= coo.todense()

#links data adjacency matrix output
adj_matrix

matrix([[  34,    0,    0, ...,    0,    0,    0],
        [   0,   21,    0, ...,    0,    0,    0],
        [   0,    0, 1594, ...,    0,    0,    0],
        ...,
        [   0,    0,    0, ...,   20,    0,    0],
        [   0,    0,    0, ...,    0,    0,    0],
        [   0,    0,    0, ...,    0,    0,   40]])

Step 3. Modifying the Adjacency Matrix

In [358]:
#set the diagonal to zero
np.fill_diagonal(adj_matrix,0)

#links data adjacency matrix output
adj_matrix

matrix([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]])

In [359]:
#source: https://stackoverflow.com/questions/40200070/what-does-axis-0-do-in-numpys-sum-function
#create function to normalize the adjacency matrix
def normalize(matrix):
    col_sum= matrix.sum(axis=0)
    matrix= matrix/col_sum
    return (matrix)

old_H= normalize(adj_matrix)

#replace all nan with 0
H= np.nan_to_num(old_H)

#links data normalized adjacency matrix output
H



  matrix= matrix/col_sum


matrix([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]])

Step 4. Identifying the Dangling Nodes

In [360]:
#identify the dangling nodes
def dandlingNodes(matrix):
    col_sum= np.array(matrix.sum(axis=0)) #get the sum for each column
    col_sum= col_sum.flatten() #collapse into 1 dimension
    d=[]
    for i in range(len(col_sum)): #use for loop to replace the 0 with 1 and non-zeros with 0
        if col_sum[i]== 0:
            col_sum[i]= 1
        else:
            col_sum[i]= 0
        d.append(col_sum[i])
    return(d)

#get the dangling nodes for matrix H
d= np.array(dandlingNodes(H))

##links data dangling nodes output

d



array([0., 0., 0., ..., 0., 0., 0.])

Step 5. Calculating the Stationary Vector

In [361]:
#create the article vector 
def article(matrix):
    length= len(matrix)
    matrix= np.ones((length, 1))
    matrix= matrix/matrix.sum(axis=0)
    return(matrix)

#convert into one dimension
a= article(H).flatten()

##links data article vector output
a


array([9.30405657e-05, 9.30405657e-05, 9.30405657e-05, ...,
       9.30405657e-05, 9.30405657e-05, 9.30405657e-05])

In [362]:
#create initial start vector
def initialStartVector(matrix):
    length= len(matrix) #get the length of matrix
    array= np.full((length,1), 1) #fill the array with 1 by the number of times
    pi_0= array/length #get pi 0 by dividing the array by length
    return(pi_0)

pi_0= np.array(initialStartVector(H))

#links data pi_0 output
pi_0


array([[9.30405657e-05],
       [9.30405657e-05],
       [9.30405657e-05],
       ...,
       [9.30405657e-05],
       [9.30405657e-05],
       [9.30405657e-05]])

In [363]:
#source: https://stackoverflow.com/questions/47934252/valueerror-shapes-1-1000-and-1-1000-not-aligned-1000-dim-1-1-dim-0/47934275
#calculate the number of iterations to converge
alpha= 0.85
Epsilon= 0.00001
iteration= 1
residual= 3
while (residual > Epsilon):
    #calculate pi_1
    #reshape the second part to the dimension same as the first part
    pi_1 = (alpha * H * pi_0) + np.reshape((alpha * np.dot(d,pi_0) + (1 - alpha))*a, (10748,1), order= 'F')
    #calculate the residual and apply L1 normalization
    residual= np.linalg.norm (pi_1 - pi_0)
    #count the number of iterations
    iteration += 1
    #convergence with pi_0 and pi_1
    pi_0= pi_1

#number of iternations to convert
print('Number of Iterations to Converge:', iteration)

Number of Iterations to Converge: 22


Step 6. Calculationg the EigenFactor (EF) Score

In [364]:
#calculate the eigenfactor scores
def calEigenfactor(H, pi):
    #calculate the eigenfactor scores and convert into array
    EF= np.array(100*(np.dot(H, pi)/np.sum(np.dot(H,pi))))
    return(EF)

EigenfactorScores= calEigenfactor(H,pi_0)

#links data eigenfactor scores output
EigenfactorScores




array([[0.00346127],
       [0.00150941],
       [0.01572399],
       ...,
       [0.00310084],
       [0.00011093],
       [0.00261783]])

In [365]:
#source: https://stackoverflow.com/questions/6910641/how-do-i-get-indices-of-n-maximum-values-in-a-numpy-array
#get the index for the 10 largest elements
index= np.argpartition(EigenfactorScores.flatten(), -20)[-20:]

#use the index to find the top 10 eigenfactor scores 
top20Scores= EigenfactorScores.flatten()[index]
print(top20Scores)

[0.31121248 0.31659069 0.37262531 0.33019386 0.31919523 0.32730623
 0.37952447 0.47758936 0.38598353 0.4396224  0.38504837 0.42962702
 0.48060872 1.23460582 0.63425277 0.67933464 0.57686694 0.66469197
 1.4475384  1.41203757]


In [366]:
#get the total time to run the code
print("Time taken: " + str(round(time.time() - start_time,2)) + " seconds")

Time taken: 50.84 seconds
