## PageRank

## Exercise 1

In [113]:
import numpy as np
from scipy.sparse import dok_matrix
from scipy import linalg as la
import pandas as pd

In [76]:
def readmatfromtext(filename):
    '''
    Accepts a .txt file with nodes listed in each line and
    creates an adjacency matrix from them.
    '''
    nodes = []
    N = 0
    with open(filename, 'r') as myfile:
        for line in myfile:
            try:
                line = list(map(int, line.strip().split()))
                nodes.append(line)
            except:
                pass
    #Grab highest node
    N = np.amax(nodes) + 1
    A = np.zeros((N,N))
    for i in range(N):
        for j in range(N):
            if[i, j] in nodes:
                A[i, j] = 1
    return dok_matrix(A)

readmatfromtext("Data/matrix.txt").toarray()

array([[0., 0., 0., 0., 0., 0., 0., 1.],
       [1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 1., 0., 0., 0., 1., 0.],
       [1., 0., 0., 0., 0., 1., 1., 0.],
       [1., 0., 0., 0., 0., 0., 1., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0.]])

## Exercise 2

In [77]:
def Kmat(A):
    '''
    Accepts an adjacency matrix and returns the K matrix
    '''
    #Compute modified adjacency matrix
    N = A.shape[0]
    A[A.sum(axis=1) == 0, :] = np.ones(N)
    # Get diagonals
    D = A.sum(axis = 1)
    K = A.T / D
    return(K)

Data = readmatfromtext("Data/matrix.txt").toarray()

Kmat(Data)

array([[0.        , 1.        , 0.125     , 0.33333333, 0.33333333,
        0.5       , 1.        , 1.        ],
       [0.        , 0.        , 0.125     , 0.        , 0.        ,
        0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.125     , 0.33333333, 0.        ,
        0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.125     , 0.        , 0.        ,
        0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.125     , 0.        , 0.        ,
        0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.125     , 0.        , 0.33333333,
        0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.125     , 0.33333333, 0.33333333,
        0.5       , 0.        , 0.        ],
       [1.        , 0.        , 0.125     , 0.        , 0.        ,
        0.        , 0.        , 0.        ]])

## Exercise 3

In [79]:
def steadystate(A, N = 'none', d =.85, tol = 1e-5, maxiter=1000):
    '''
    Input A is the adjacency matrix - we can find it by using
    for example the function above. Output will be a probability
    density vector for each of the nodes of the graph.
    '''
    # Restrict to relevant section of A
    if N != 'none':
        A = A[:N,:N]
    n = A.shape[0]
    #Initialize random vector for p_t
    p_t = np.random.rand(n)
    #normalize
    p_t = p_t / np.sum(p_t)
    #Initialize distance and iter counters
    pdist = 5
    iter = 0
    while (pdist > tol) & (iter < maxiter):
        pnext = d * A @ p_t + ((1 - d)/ n)
        pdist = la.norm(pnext - p_t)
        p_t = pnext
        print("At iter:", iter, "Distance was", pdist)
        iter += 1
    return(p_t)

Data = readmatfromtext("Data/matrix.txt").toarray()
A = Kmat(Data)
steadystate(A)
    

At iter: 0 Distance was 0.4124925690034114
At iter: 1 Distance was 0.28485637604362485
At iter: 2 Distance was 0.07430087821591272
At iter: 3 Distance was 0.07255765968259882
At iter: 4 Distance was 0.056304368893569566
At iter: 5 Distance was 0.049018648073046345
At iter: 6 Distance was 0.041391709425534266
At iter: 7 Distance was 0.03524791738852704
At iter: 8 Distance was 0.029945627971558707
At iter: 9 Distance was 0.025457347663465794
At iter: 10 Distance was 0.021637912452949926
At iter: 11 Distance was 0.0183924214006226
At iter: 12 Distance was 0.01563351230747961
At iter: 13 Distance was 0.013288496231360379
At iter: 14 Distance was 0.011295219271076634
At iter: 15 Distance was 0.009600936972980194
At iter: 16 Distance was 0.008160796288042666
At iter: 17 Distance was 0.006936676877442695
At iter: 18 Distance was 0.005896175338177678
At iter: 19 Distance was 0.005011749039245306
At iter: 20 Distance was 0.004259986682937641
At iter: 21 Distance was 0.00362098868059575
At iter:

array([0.43869576, 0.02171029, 0.02786154, 0.02171029, 0.02171029,
       0.02786154, 0.04585394, 0.39459636])

## Problem 4 Incomplete

In [111]:
def sseigen(A, N='none', d = .85):
    n = len(A)
    #Define B matrix
    K = Kmat(A)
    # Everything good except K - fix!!!
    E = np.ones((n, n))
    B = (d * K + ((1 - d) / n) * E)
    #Get eigenvalues
    eigs, eigvecs = la.eig(B)
    p_ss = eigvecs[:, 0]/ np.sum(eigvecs[:,0])
    return p_ss

Data = readmatfromtext("Data/matrix.txt").toarray()
A = Kmat(Data)
#print(steadystate(A))
print(sseigen(A))

[[0.         0.         0.         0.         0.         0.
  0.         0.88888889]
 [0.23300971 0.         0.         0.         0.         0.
  0.         0.        ]
 [0.02912621 1.         0.27272727 1.         1.         0.27272727
  0.09677419 0.11111111]
 [0.0776699  0.         0.72727273 0.         0.         0.
  0.25806452 0.        ]
 [0.0776699  0.         0.         0.         0.         0.72727273
  0.25806452 0.        ]
 [0.11650485 0.         0.         0.         0.         0.
  0.38709677 0.        ]
 [0.23300971 0.         0.         0.         0.         0.
  0.         0.        ]
 [0.23300971 0.         0.         0.         0.         0.
  0.         0.        ]]
[0.03870928 0.02641669 0.48031139 0.32401996 0.04643401 0.03127529
 0.02641669 0.02641669]


## Exercise 5

In [136]:
raw = pd.read_csv('Data/ncaa2013.csv')
winners = raw['WINNING_TEAM']
# NOTE THAT THERE IS A SPACE BEFORE LOSING TEAM!!!
losers = raw[' LOSING_TEAM']
names = np.unique(np.concatenate((winners, losers)))
n = len(names)
# Create adjacency matrix (P from link predictor)
adj = np.zeros((n, n))
for i in raw.index:
    name1, name2 = winners[i], losers[i]
    name1loc = np.where(name1 == names)
    name2loc = np.where(name2 == names)
    '''
    Note - this is where 
    '''
    adj[name2loc, name1loc] = 1
np.where('Cleveland St'==names), np.where('Grambling'==names)


1.0 0.0
