## shikha Goel - shikhag

In [1]:
import random
def read_graph(fname):
    '''
    Takes the filename with the incidence vector representation and 
    return a dict
    '''
    fin = open(fname,'r')
    out = tuple((x,y) for x,*y in (line.strip().split() for line in fin))
    return dict(out)


In [11]:
def random_walk(graph, walk_len = 1000, beta = 0.85):
    '''
    Takes a graph and after performing a random walk of walk_len length, 
    returns the final landing page. The walk is implemented by jumping to a 
    page directly with probability 1-beta or by clicking on
    a page link with probability beta. 
    '''
    pages = list(graph.keys())
    current_page = random.choice(pages) 
    for step in range(walk_len):
        if random.random() <= beta:
            links = graph[current_page]
            if len(links)==0:
                links = pages.copy()
                links.remove(current_page)  ## <--- if the current page has no neighbors then all 'other' pages are treated a neighbours
            current_page = random.choice(links) 
        else:
            current_page = random.choice(pages)
    return(current_page) 


In [3]:
def simulate_pagerank(fname,walk_len = 1000,N=1000,beta=0.85):
    '''
    Takes the filename with incidence vector representation and calls read_graph to 
    create dict. Passes the dict into random walk and performs random walk N times. 
    Each time it increases the counter of the final landing page from the random walk. 
    Calulates pagerank by dividing each page's landing counter with N and returns
    pagerank of each page
    '''
    random.seed(1)
    graph = read_graph(fname)
    counter = {}
    for i in range(N):
        landing_page = random_walk(graph,walk_len,beta)
        if landing_page in counter.keys():
            counter[landing_page] += 1
        else:
            counter[landing_page] = 1
            
    for page,count in sorted(counter.items()):
        print (page,count/N)
    
        

In [4]:
simulate_pagerank("graph-1.txt")


A 0.379
B 0.206
C 0.37
D 0.045


In [5]:
simulate_pagerank("graph-2.txt")

A 0.362
B 0.169
C 0.27
D 0.071
E 0.128


In [13]:
simulate_pagerank("wikipedia-example.txt",walk_len = 1000,N=1000,beta=0.85)   

A 0.032
B 0.393
C 0.345
D 0.027
E 0.084
F 0.038
G 0.011
H 0.023
I 0.014
J 0.019
K 0.014
