# PageRank algorithm

In [1]:
import numpy as np
from scipy.sparse import dok_matrix
import scipy.linalg
from numba import njit, prange

In [2]:
np.loadtxt('matrix.txt', skiprows=1)

array([[0., 7.],
       [1., 0.],
       [3., 0.],
       [3., 2.],
       [3., 6.],
       [4., 0.],
       [4., 5.],
       [4., 6.],
       [5., 0.],
       [5., 6.],
       [6., 0.],
       [7., 0.]])

### Problem 1

In [3]:
def adjacency_matrix(filename, N):
    A = np.zeros((N, N))
    nodes = []
    with open(filename, 'r') as f:
        for c in f:
            try:
                node = list(map(int, c.strip().split()))
                nodes.append(node)
            except:
                pass
            
    for i in range(N):
        for j in range(N):
            if [i, j] in nodes:
                A[i, j] = 1
    return dok_matrix(A)

In [4]:
A = adjacency_matrix('matrix.txt', 8)

In [5]:
A.toarray()

array([[0., 0., 0., 0., 0., 0., 0., 1.],
       [1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 1., 0., 0., 0., 1., 0.],
       [1., 0., 0., 0., 0., 1., 1., 0.],
       [1., 0., 0., 0., 0., 0., 1., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0.]])

### Problem 2

In [6]:
@njit
def calculate_K(A, N):
    A[A.sum(axis=1) == 0, :] = np.ones(N)
    D = A.sum(axis=1)
    return (A.T / D)

In [7]:
calculate_K(A.toarray(), 8)

array([[0.        , 1.        , 0.125     , 0.33333333, 0.33333333,
        0.5       , 1.        , 1.        ],
       [0.        , 0.        , 0.125     , 0.        , 0.        ,
        0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.125     , 0.33333333, 0.        ,
        0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.125     , 0.        , 0.        ,
        0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.125     , 0.        , 0.        ,
        0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.125     , 0.        , 0.33333333,
        0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.125     , 0.33333333, 0.33333333,
        0.5       , 0.        , 0.        ],
       [1.        , 0.        , 0.125     , 0.        , 0.        ,
        0.        , 0.        , 0.        ]])

### Problem 3

In [8]:
@njit
def pagerank(A, N=None, d=0.85, tol=1e-5, max_iter=500):
    
    if N is None:
        N = A.shape[0]
    A = A[:N, :N]
    
    K = calculate_K(A, N)
    
    p = np.ones(N)
    p = p / p.sum()
    
    diff = 1e3
    i = 0
    
    while diff > tol and i < max_iter:
        p_new = d * K @ p + ((1 - d) / N) * np.ones(N)
        diff = np.linalg.norm(p - p_new)
        p = p_new
        i += 1
    
    return p

In [9]:
pagerank(A.toarray())

array([0.43868966, 0.02171029, 0.02786154, 0.02171029, 0.02171029,
       0.02786154, 0.04585394, 0.39460246])

### Problem 4

In [10]:
def pagerank_eigen(A, N=None, d=0.85, tol=1e-5, max_iter=500):
    
    if N is None:
        N = A.shape[0]
    A = A[:N, :N]
    
    K = calculate_K(A, N)
    
    B = d * K + ((1 - d) / N) * np.ones((N, N))
    
    eigs, eigvecs = scipy.linalg.eig(B)
    max_eig = eigs.argmax()
    print(eigs[max_eig])
    
    return eigvecs[max_eig]

In [11]:
pagerank_eigen(A.toarray())

(0.999999999999999+0j)


array([-7.38129111e-01, -7.07106781e-01,  2.36517595e-01, -9.45845122e-17,
       -1.29826927e-01, -2.77839441e-09,  2.77839403e-09, -1.86179804e-16])

### Problem 5

In [62]:
win_lose = []
with open('ncaa2013.csv', 'r') as file:
    file.readline()
    for line in file:
        teams = line.strip().split(',')
        win_lose.append(teams)
        
win_lose = np.array(win_lose)
teams = np.unique(win_lose.flatten())
N = len(teams)
team_id = dict(zip(teams, range(N)))
win_lose_id = np.array([[team_id[win], team_id[lose]] for win, lose in win_lose[:, ]])
win_lose_id

array([[168,   4],
       [ 52, 104],
       [122,  30],
       ...,
       [213, 340],
       [273, 317],
       [170,  88]])

In [63]:
A = np.zeros((N, N))

for game in win_lose_id:
    i, j = game
    if A[i, j] == 0:
        A[i, j] = 1

In [64]:
A

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [86]:
p = pagerank(A, d=0.7)
rank_id = p.argsort()[::-1]