In [151]:
#Done in collaboration with Moises Baly, Claire Opila, and Omri Sass

import math
import operator

In [2]:
#class to create nodes
class Node:
    def __init__(self, ID, score):
        self.ID = ID
        self.score = score
        self.adjacents = set()
        
    def out_degree(self):
        return len(self.adjacents)
    
    def __eq__(self, other):
        """Override the default Equals behavior"""
        if isinstance(other, self.__class__):
            return self.ID == other.ID
        return False
    
    def __ne__(self, other):
        """Define a non-equality test"""
        return not self.__eq__(other)
    
    def __str__(self):
        return '(%s, %s)' % (self.ID, self.score)
    
    def __repr__(self):
        return self.__str__()
    
    def __hash__(self):
        return self.ID

In [8]:
#class to create undirected graph
class Graph:
    def __init__(self, N, fully_undirected=False):
        self.nodes = []
        self.N = N
        self.fully_undirected = fully_undirected
        for i in range(N):
            # initializes page rank algorithm.
            self.nodes.append(Node(i, 1.0 / float(N)))
            
    def add_edge(self,i,j):
        self.nodes[i].adjacents.add(self.nodes[j])
        
    def add_edge_und(self,i,j):
        self.add_edge(i, j)
        self.add_edge(j, i)
    
    # this is the key function! setting ∈ = 1/7 and calculating new score for each round
    def page_rank(self, epsilon = 0.0000000001, rounds = 100000000, e = 1.0/7):
        _break = False
        _round = 0
        while _round <= rounds and not _break:
            _break = True
            for node in self.nodes:
                new_score = self.__page_rank_score(node.ID)
                if (abs(new_score - node.score) > epsilon):
                    _break = False
                node.score = new_score
            _round += 1
    
    #Little trick for the facebook graph. performance related
    def __get_nodes_in(self, i):
        if self.fully_undirected:
            return self.nodes[i].adjacents
        else:
            return [node for node in self.nodes if self.nodes[i] in node.adjacents]
    
    #calculating sum of page_rank_score for all the nodes
    def __page_rank_score(self, i):
        in_nodes = self.__get_nodes_in(i)
        scores = [ (node.score / node.out_degree()) for node in in_nodes]
        return sum(scores)
    
    #creating dictionary to store the scores
    def scores(self):
        d = {}
        for node in self.nodes:
            d[node.ID] = node.score
        return d
    
    def __str__(self):
        s = ""
        for i in range(len(self.nodes)):
            s+='%s: %s \n' %(i, self.nodes[i].adjacents)
        return s
    
    def __repr__(self):
        return self.__str__()

In [9]:
#creating a function to read the input file and format it correctly
def read_graph(filepath, undirected = False):
    with open(filepath) as fileIn:
        N = int((fileIn.readline()))
        g = Graph(N, fully_undirected=undirected)
        for line in fileIn:
            i, j = (int(s) for s in line.split())
            if undirected:
                g.add_edge_und(i,j)
            else:
                g.add_edge(i,j)
        return g

Running Figure 11.1

In [11]:
#now it's time to bring in figure 11.1
g = read_graph('f_11_1_1.txt')
print(g)
g.page_rank()
print(g.scores())


0: {(1, 0.25), (3, 0.25)} 
1: {(2, 0.25)} 
2: {(0, 0.25)} 
3: {(3, 0.25)} 

{0: 5.820766091346741e-11, 1: 2.9103830456733704e-11, 2: 2.9103830456733704e-11, 3: 0.49999999997089617}


Running Figure 11.2

In [156]:
#now it's time to bring in figure 11.2 
g = read_graph('figure_11_2.txt')
g.page_rank()
print(g.scores())


IndexError: list index out of range

# 7. a) & b) & c)

In [131]:
#calculating the average scores for 10 rounds but first calling the function to make the undirected graph directed
for i in range(2, 12, 2):
    fb_g = read_graph('facebook_combined.txt', undirected=True) 
    fb_g.page_rank(rounds = i)
    scores = fb_g.scores()
    print('Avg score for %s rounds : %s ' % (i, sum(scores.values()) / len(scores)))

Avg score for 2 rounds : 0.000256740039662 
Avg score for 4 rounds : 0.000252549169981 
Avg score for 6 rounds : 0.000252549169981 
Avg score for 8 rounds : 0.000252549169981 
Avg score for 10 rounds : 0.000252549169981 


In [None]:
for i in range(2, 12, 2):
    fb_g = read_graph('facebook_combined.txt', undirected=True) 
    fb_g.page_rank(rounds = i)
    scores = fb_g.scores()
    print('Avg score for %s rounds : %s ' % (i, sum(scores.values()) / len(scores)))

# 7. d)

In [132]:
#here we are sorting all the nodes by their scores
sorted(scores.items(), key=operator.itemgetter(1), reverse=True)



[(3437, 0.007856054818887988),
 (107, 0.007030568695334819),
 (0, 0.006527541967903311),
 (1684, 0.006470322387009363),
 (1912, 0.003921062784627191),
 (348, 0.002410108916710299),
 (3980, 0.0023070763207021783),
 (686, 0.0022915348414082825),
 (414, 0.0018493302918218863),
 (698, 0.0013643004768346249),
 (483, 0.0013469588705058684),
 (3830, 0.0012160652463880117),
 (376, 0.0009394636101450625),
 (2047, 0.0008583918038054017),
 (56, 0.000850555981355952),
 (25, 0.0008419460014036578),
 (322, 0.0008249408174388533),
 (828, 0.0008228602745397247),
 (67, 0.0008179354265076881),
 (475, 0.0008178532205401038),
 (428, 0.0008133396821946039),
 (3596, 0.0007930578822037973),
 (271, 0.000787463276292453),
 (713, 0.0007802268792871754),
 (119, 0.0007684627419430487),
 (563, 0.0007593949059080996),
 (277, 0.0007549284914241192),
 (3545, 0.0007539682889028224),
 (2313, 0.0007537139756014439),
 (3938, 0.0007537024049331141),
 (917, 0.000750295132725704),
 (26, 0.0007335994794915794),
 (21, 0.00073