### Importing required packages

In [1]:
import random

### Function to read graph contents

In [2]:
def read_graph(fname):
    '''
    This function reads the file input and returns the nodes and its neighbours 
    as dictionary, and set of all unique nodes of the input graph.
    '''
    f = open(fname, "r")
    dic = {}
    tot = []
    for line in f:
        s = line.split()
        if(len(s)==1):
            dic[s[0]] = []
        else:
            dic[s[0]] = s[1:]
        tot.append(s)
    total = [item for sublist in tot for item in sublist]
    total = list(set(total))
    return dic,total

### Function for random_walk

In [3]:
def random_walk(graph, walk_len, beta):
    '''
    This function reads graph, walk_len, and beta to calculate the current_page we are on.
    returns the current page.
    '''
    total = list(graph.keys())
    p = random.randint(0,len(total)-1)
    curr_page = total[p]
    for i in range(0,walk_len):
        r = random.random()
        lst = graph[curr_page]
        if (r<= beta) and (lst != []):
            lst = graph[curr_page]
            curr_page = lst[random.randint(0,len(lst)-1)]
        else:
            curr_page = total[random.randint(0,len(total)-1)]
    return curr_page

### Function for simulate_pagerank driver function

In [4]:
def simulate_pagerank(fname, walk_len, N, beta):
    '''
    This function is the driver that makes calls to read_graph and random_walk, and calculates 
    relative frequency of each page i.e page rank of each page. 
    Prints the output of each node and page rank of corresponding node
    '''
    random.seed(1)
    graph,content = read_graph(fname)
    print(graph)
    print(content)
    dic = dict((t,0) for t in content)
    for i in range(0,N):
        curr_page = random_walk(graph, walk_len, beta)
        dic[curr_page] += 1
    print(dic)
    dic = {k: v / N for k, v in dic.items()}
    keys = sorted(dic)
    for k in keys:
        print(k,' ',dic[k])

In [5]:
#code to execute graph-1.txt 
simulate_pagerank("graph-1.txt", walk_len=1000, N=1000, beta=0.85)

{'A': ['B', 'C'], 'B': ['C'], 'C': ['A'], 'D': ['C']}
['A', 'C', 'D', 'B']
{'A': 379, 'C': 370, 'D': 45, 'B': 206}
A   0.379
B   0.206
C   0.37
D   0.045


In [6]:
#code to execute graph-2.txt 
simulate_pagerank("graph-2.txt", walk_len=1000, N=1000, beta=0.85)

{'A': ['B', 'C'], 'B': ['C', 'D', 'E'], 'C': ['A'], 'D': ['C', 'E'], 'E': ['A']}
['E', 'C', 'B', 'A', 'D']
{'E': 128, 'C': 270, 'B': 169, 'A': 362, 'D': 71}
A   0.362
B   0.169
C   0.27
D   0.071
E   0.128


In [7]:
#code to wikipedia-example.txt graph-1.txt 
simulate_pagerank("wikipedia-example.txt", walk_len=1000, N=10000, beta=0.85)

{'A': [], 'B': ['C'], 'C': ['B'], 'D': ['B', 'A'], 'E': ['B', 'D', 'F'], 'F': ['B', 'E'], 'G': ['B', 'E'], 'H': ['B', 'E'], 'I': ['B', 'E'], 'J': ['E'], 'K': ['E']}
['I', 'E', 'C', 'F', 'K', 'H', 'J', 'B', 'G', 'A', 'D']
{'I': 161, 'E': 781, 'C': 3495, 'F': 363, 'K': 169, 'H': 163, 'J': 128, 'B': 3859, 'G': 165, 'A': 324, 'D': 392}
A   0.0324
B   0.3859
C   0.3495
D   0.0392
E   0.0781
F   0.0363
G   0.0165
H   0.0163
I   0.0161
J   0.0128
K   0.0169
