In [2]:
import numpy as np
import re

In [69]:
def read_adj_file(file = './dummy_adjascency.txt'):
    A = []
    with open(file , 'r', encoding = 'utf-8') as f:
        for line in f:
            line2 = re.sub('[(|)|\n]', '', line) # remove (,) and \n characters 
            line2 = [i for i in line2.split(' ') if i != ''] # tokenize line
            #print(line2)
            if line2 != []:
                A.append(line2)
    return np.asarray(A, dtype = np.float)

rawA = read_adj_file()
print(rawA.shape, rawA)

(6, 6) [[0. 1. 1. 0. 0. 0.]
 [1. 0. 1. 0. 1. 1.]
 [0. 1. 0. 0. 1. 1.]
 [0. 0. 1. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]]


In [128]:
def to_stochastic(A):
    ''' DESCRIPTION: THis function rectifies adjascency matrix A to being irreducible, stochastic and aperdiodic.
                     It adds a link from every page to all others, and make sure the matrix is stochastic.
        RETURNS: A (np 2D array): rectified adjascency matrix A.
    '''
    A_ = A.copy()
    # Sum all line elemnts to see which line is 0 aka dangling page
    normalizer = np.sum(A_, axis =1)
    # Find all non dnagling pages
    idxs = np.where(normalizer!=0)[0]
    # Find all dangling pages
    nullIdxs = np.where(normalizer==0)[0]
    normalizer[nullIdxs]= A_.shape[0]
    # Compute all the existing non null transition probs (excercise requirements, not necessary result)
    initA = A / normalizer.reshape(A_.shape[0],1)
    # Add a uniform probality to all such lines
    A_[nullIdxs,: ] = 1 
    # Reshape divider into a column vectors so each row of A is divided by each row of normalizer
    A_ /= normalizer.reshape(A_.shape[0],1)
    #print(normalizer, A_ )
    return A_, initA

def to_irreducible(A, d=0.9):
    
    return (1-d) + d* A.transpose()

A, initA = to_stochastic(rawA)
irredA = to_irreducible(A)

print("Initial matrix: ", initA)
print("\nStochastic A: ", A)
print("\nIrreducible A: ", A_irred)

Initial matrix:  [[0.         0.5        0.5        0.         0.         0.        ]
 [0.25       0.         0.25       0.         0.25       0.25      ]
 [0.         0.33333333 0.         0.         0.33333333 0.33333333]
 [0.         0.         0.5        0.         0.5        0.        ]
 [0.         0.         0.         0.         0.         0.        ]
 [0.         0.         0.         0.         0.         0.        ]]

Stochastic A:  [[0.         0.5        0.5        0.         0.         0.        ]
 [0.25       0.         0.25       0.         0.25       0.25      ]
 [0.         0.33333333 0.         0.         0.33333333 0.33333333]
 [0.         0.         0.5        0.         0.5        0.        ]
 [0.16666667 0.16666667 0.16666667 0.16666667 0.16666667 0.16666667]
 [0.16666667 0.16666667 0.16666667 0.16666667 0.16666667 0.16666667]]

Irreducible A:  [[0.1   0.325 0.1   0.1   0.25  0.25 ]
 [0.55  0.1   0.4   0.1   0.25  0.25 ]
 [0.55  0.325 0.1   0.55  0.25  0.25 ]
 [0

In [145]:
def power_iterate(A, d= 0.9, epsilon = 0.0001, maxIters = 2):
    # Innit page rank as 1 over the num of pages
    P_k = np.ones((A.shape[0])) / A.shape[0]
    print(P_k, P_k.shape, A.shape)
    k = 1
    delta = 1
    while (delta > epsilon) and (k <= maxIters):
        #print(k)
        P_k_1 = (1-d) + d* np.matmul(A.transpose(), P_k)
        k += 1
        delta = np.linalg.norm(P_k_1 - P_k, 1)
        P_k = P_k_1
    return P_k_1

def compute_rank(A, P, d = 0.9):
    PR = np.zeros((P.shape))
    print(P.shape,A.shape, A[:,2].shape)
    for i, p in enumerate(P):
        PR[i] = (1-d) + d * np.sum(A[:,i]* P)
    return PR

P = power_iterate(A)
PR = compute_rank(A,P)
print(*['Page: {}, Rank: {:.2f}\n'.format(i, r) for i,r in enumerate(PR)])
print('(', *[r for r in PR])

[0.16666667 0.16666667 0.16666667 0.16666667 0.16666667 0.16666667] (6,) (6, 6)
(6,) (6, 6) (6,)
Page: 0, Rank: 0.30
 Page: 1, Rank: 0.44
 Page: 2, Rank: 0.49
 Page: 3, Rank: 0.21
 Page: 4, Rank: 0.50
 Page: 5, Rank: 0.42

( 0.2966406249999999 0.4426562499999999 0.48873437499999994 0.21381249999999993 0.49764062499999984 0.4155156249999999
1


In [155]:
def write_to_file(initA, A, irredA, PR, path = './assignment4_results.txt'):
    args = [initA, A, irredA, PR]
    with open(path, 'w', encoding ='utf-8') as f:
        
        for idx, arg in enumerate(args):
            m = arg
            s = '\n(' if idx != 0 else '('
            f.write(s)
            for i in range(m.shape[0]):
                if len(m.shape)> 1:
                    print(*['{:.2f}'.format(a) for a in m[i,:] ])
                    f.write(' '.join((['{:.2f}'.format(a) for a in m[i,:] ])))
                    s = '\n' if i != m.shape[0]-1 else ')'
                else:
                    f.write(' '.join((['{:.2f}'.format(m[i])])))
                    s = ' ' if i != m.shape[0]-1 else ')'
                f.write(s)
        
        
write_to_file(initA, A, irredA, PR)

0.00 0.50 0.50 0.00 0.00 0.00
0.25 0.00 0.25 0.00 0.25 0.25
0.00 0.33 0.00 0.00 0.33 0.33
0.00 0.00 0.50 0.00 0.50 0.00
0.00 0.00 0.00 0.00 0.00 0.00
0.00 0.00 0.00 0.00 0.00 0.00
0.00 0.50 0.50 0.00 0.00 0.00
0.25 0.00 0.25 0.00 0.25 0.25
0.00 0.33 0.00 0.00 0.33 0.33
0.00 0.00 0.50 0.00 0.50 0.00
0.17 0.17 0.17 0.17 0.17 0.17
0.17 0.17 0.17 0.17 0.17 0.17
0.10 0.32 0.10 0.10 0.25 0.25
0.55 0.10 0.40 0.10 0.25 0.25
0.55 0.32 0.10 0.55 0.25 0.25
0.10 0.10 0.10 0.10 0.25 0.25
0.10 0.32 0.40 0.55 0.25 0.25
0.10 0.32 0.40 0.10 0.25 0.25
