In [26]:
import numpy as np
from scipy import sparse

In [27]:
beta = 0.8
tolerance = 10**-10

In [28]:
def getGraphData(file):
    graphEdges = dict()
    with open(file) as fp:
        line = fp.readline()
        while line:
            lineL = [ int(node) for node in line.strip().split('\t')]
            if graphEdges.get(lineL[0]):
                graphEdges[lineL[0]].append(lineL[1])
            else:
                graphEdges[lineL[0]] = [lineL[1]]
            line = fp.readline()
    return graphEdges

graphEdges = getGraphData('web-Stanford.txt')


In [29]:
numberOfNodes = sorted(graphEdges.keys(), reverse=True)[:1][0]
print("Number of nodes using is", numberOfNodes)

Number of nodes using is 281903


In [30]:
def createMatrixFromGraph(graph, size):
    matrix = np.zeros((size, size))
    for node in graph.keys():
        nodeKey = node
        nodeValues = graph.get(node)
        nodesCount = len(nodeValues)
        eachNodeVal = 1 / nodesCount
        # for handling duplicates
        valDict = dict()
        for val in nodeValues:
            if valDict.get(val):
                valDict[val] = valDict.get(val) + 1
            else:
                valDict[val] = 1

        for nodeVal in list(set(nodeValues)):
            # To match node and matrix indices: node = matrix indices + 1
            count = valDict.get(nodeVal)
            matrix[nodeVal-1][node-1] = count * float(eachNodeVal)
    return matrix

matrix = createMatrixFromGraph(graphEdges, numberOfNodes)

In [31]:
def createMatrixFromGraphSparse(graph, size):
    row = []
    col = []
    data = []
    for node in graph.keys():
        nodeKey = node
        nodeValues = graph.get(node)
        nodesCount = len(nodeValues)
        eachNodeVal = 1 / nodesCount
        # for handling duplicates
        valDict = dict()
        for val in nodeValues:
            if valDict.get(val):
                valDict[val] = valDict.get(val) + 1
            else:
                valDict[val] = 1
        for nodeVal in list(set(nodeValues)):
            # To match node and matrix indices: node = matrix indices + 1
            count = valDict.get(nodeVal)
            row.append(nodeVal-1)
            col.append(node-1)
            data.append(count * float(eachNodeVal))
    matrix = sparse.csr_matrix((data, (row, col)), shape=(size, size))
    return matrix

sparseMatrix = createMatrixFromGraphSparse(graphEdges, numberOfNodes)


In [32]:
numberOfNonZeroEdges = sparse.csr_matrix.count_nonzero(sparseMatrix)
print('Number of non zero points is ',numberOfNonZeroEdges)

Number of non zero points is  2312497


In [33]:
# Given, 1 is one vector with nx1 entries of value 1
one = np.ones((numberOfNodes,1))

# initialising the r0
r = 1/ float(numberOfNodes) * one

In [34]:
count = 0
while True:
    count += 1
    rnew = ((1-beta)/numberOfNodes) * one + beta * sparseMatrix.dot(r)
    l1Norm = np.linalg.norm((rnew - r), ord=1)
    if l1Norm < tolerance:
        break
    r = rnew
print('Number of iterations it took is ', count)
    

Number of iterations it took is  87
