# Page Rank

Alberto Quaini

### Import libraries

In [1]:
import numpy as np
import pandas as pd
import scipy.linalg as la
import scipy.sparse as sp

## Problem 1

In [37]:
def graph_adj(file, N):
    
    Adj = sp.dok_matrix((N,N))
    with open(file, 'r') as myfile:
        for line in myfile:
            try:
                line = line.strip().split()
                Adj[int(line[0]), int(line[1])] = 1
            except ValueError:
                pass
    
    return Adj

In [40]:
print(graph_adj('matrix.txt', 8))

  (0, 7)	1.0
  (1, 0)	1.0
  (3, 0)	1.0
  (3, 2)	1.0
  (3, 6)	1.0
  (4, 0)	1.0
  (4, 5)	1.0
  (4, 6)	1.0
  (5, 0)	1.0
  (5, 6)	1.0
  (6, 0)	1.0
  (7, 0)	1.0


In [99]:
A = np.array([[ 0,  0,  0,  0,  0,  0,  0,  1],
              [ 1,  0,  0,  0,  0,  0,  0,  0],
              [ 0,  0,  0,  0,  0,  0,  0,  0],
              [ 1,  0,  1,  0,  0,  0,  1,  0],
              [ 1,  0,  0,  0,  0,  1,  1,  0],
              [ 1,  0,  0,  0,  0,  0,  1,  0],
              [ 1,  0,  0,  0,  0,  0,  0,  0],
              [ 1,  0,  0,  0,  0,  0,  0,  0]])

## Problem 2

In [100]:
def produce_K(A):
    rows = np.where(~A.any(axis=1))[0]
    A[rows,:] = 1
    D = A.sum(axis=1)
    #D = np.diag(1/D)
    K = np.copy(A)
    K = A.T / D
    
    return K

In [102]:
produce_K(A)

array([[0.        , 1.        , 0.125     , 0.33333333, 0.33333333,
        0.5       , 1.        , 1.        ],
       [0.        , 0.        , 0.125     , 0.        , 0.        ,
        0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.125     , 0.33333333, 0.        ,
        0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.125     , 0.        , 0.        ,
        0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.125     , 0.        , 0.        ,
        0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.125     , 0.        , 0.33333333,
        0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.125     , 0.33333333, 0.33333333,
        0.5       , 0.        , 0.        ],
       [1.        , 0.        , 0.125     , 0.        , 0.        ,
        0.        , 0.        , 0.        ]])

## Problem 3

In [108]:
def steady_graph(A, N = None, d = .85, tol = 1e-5):
    if not N == None:
        A = A[:N,:N]
    m, n = A.shape
    if not m == n:
        raise ValueError('A must be a square matrix.')
        
    K = produce_K(A)
    ones = np.ones((m, 1))
    update = lambda p: d * K @ p + (1 - d) / n * ones
    p = np.random.rand(m,1)
    dist = tol + 1
    while dist > tol:
        new_p = update(p)
        dist = abs(la.norm(new_p - p))
        p = new_p
    
    return p

In [109]:
steady_graph(A)

array([[0.43869797],
       [0.02171029],
       [0.02786154],
       [0.02171029],
       [0.02171029],
       [0.02786154],
       [0.04585394],
       [0.39461031]])

## Problem 4

In [126]:
def Steady_Graph(A, N = None, d = .85, tol = 1e-5):
    if not N == None:
        A = A[:N,:N]
    m, n = A.shape
    if not m == n:
        raise ValueError('A must be a square matrix.')
        
    K = produce_K(A)
    p = la.solve(np.eye(m) - d * K, ((1 - d) / n) * np.ones(m))
    
    return p

In [127]:
Steady_Graph(A)

array([0.43869288, 0.02171029, 0.02786154, 0.02171029, 0.02171029,
       0.02786154, 0.04585394, 0.39459924])

## Problem 5

In [178]:
ncaa = pd.read_csv('ncaa2013.csv', header = 0).as_matrix()
m, n = ncaa.shape
teams, idx = np.unique(ncaa, return_inverse = True)
idx = idx.reshape((m, n))

t = len(teams)
Adj = np.zeros((t,t))

for i in range(m):
    Adj[idx[i, 1], idx[i, 0]] = 1

ranks = steady_graph(Adj, d = 0.7)
np.argsort(ranks.T)
ranks = teams[np.argsort(ranks.T)].T[::-1]

print('The top 5 ranked teams are, in order:')
for i in range(5):
    print(ranks[i][0])

The top 5 ranked teams are, in order:
Duke
Butler
Louisville
Illinois
Indiana
