# 6

In [28]:
import math
import operator

In [3]:
class Node:
    def __init__(self, ID, score):
        self.ID = ID
        self.score = score
        self.adjacents = set()
        
    def out_degree(self):
        return len(self.adjacents)
    
    def __eq__(self, other):
        """Override the default Equals behavior"""
        if isinstance(other, self.__class__):
            return self.ID == other.ID
        return False
    
    def __ne__(self, other):
        """Define a non-equality test"""
        return not self.__eq__(other)
    
    def __str__(self):
        return '(%s, %s)' % (self.ID, self.score)
    
    def __repr__(self):
        return self.__str__()
    
    def __hash__(self):
        return self.ID

In [20]:
class Graph:
    def __init__(self, N, fully_undirected=False):
        self.nodes = []
        self.N = N
        self.fully_undirected = fully_undirected
        for i in range(N):
            # initializes page rank algorithm.
            self.nodes.append(Node(i, 1.0 / float(N)))
            
    def add_edge(self,i,j):
        self.nodes[i].adjacents.add(self.nodes[j])
        
    def add_edge_und(self,i,j):
        self.add_edge(i, j)
        self.add_edge(j, i)
    
    # it can terminate based on epsilon change or number of iterations
    def page_rank(self, epsilon = 0.00001, rounds = math.inf, e = 1.0/7):
        _break = False
        _round = 0
        while _round <= rounds and not _break:
            _break = True
            for node in self.nodes:
                new_score = (e / self.N) + (1 - e) * self.__page_rank_score(node.ID)
                if abs(new_score - node.score > epsilon):
                    _break = False
                node.score = new_score
            _round += 1
    
    #Little trick for the facebook graph. performance related
    def __get_nodes_in(self, i):
        if self.fully_undirected:
            return self.nodes[i].adjacents
        else:
            return [node for node in self.nodes if self.nodes[i] in node.adjacents]
        
    def __page_rank_score(self, i):
        in_nodes = self.__get_nodes_in(i)
        scores = [ node.score / node.out_degree() for node in in_nodes]
        return sum(scores)
        
    def scores(self):
        d = {}
        for node in self.nodes:
            d[node.ID] = node.score
        return d
    
    def __str__(self):
        s = ""
        for i in range(len(self.nodes)):
            s+='%s: %s \n' %(i, self.nodes[i].adjacents)
        return s
    
    def __repr__(self):
        return self.__str__()

In [17]:
def read_graph(filepath, undirected = False):
    with open(filepath) as fileIn:
        N = int(fileIn.readline())
        g = Graph(N, fully_undirected=undirected)
        for line in fileIn:
            i, j = (int(s) for s in line.split())
            if undirected:
                g.add_edge_und(i,j)
            else:
                g.add_edge(i,j)
        return g

 #### We can use `n` to stop the iterations or it will stop when the change in scores is very small.

##### Fig 11_2

In [8]:
g = read_graph('f_11_2.txt')
g.page_rank()
print(g.scores())

{0: 0.16666666666666666, 1: 0.16666666666666666, 2: 0.16666666666666666, 3: 0.16666666666666666, 4: 0.16666666666666666, 5: 0.16666666666666666}


##### Fig 11_1

In [9]:
g = read_graph('f_11_1.txt')
g.page_rank()
print(g.scores())

{0: 0.13510638297872343, 1: 0.09361702127659577, 2: 0.11595744680851067, 3: 0.6552630928589758}


# 7

## a

In [10]:
#this is already taken care of by the way the graph is constructed.
fb_g = read_graph('facebook_combined.txt', undirected=True)
assert(len(fb_g.nodes[0].adjacents) == 347)

## b

#### We are looking at the average score as we iterate through the number of rounds. That should give us an idea of the tendency. We can see that after 4 rounds we converge.

In [24]:
for i in range(2, 12, 2):
    fb_g = read_graph('facebook_combined.txt', undirected=True) 
    fb_g.page_rank(rounds = i)
    scores = fb_g.scores()
    print('Avg score for %s rounds : %s ' % (i, sum(scores.values()) / len(scores)))

Avg score for 2 rounds : 0.0002567400396618219 
Avg score for 4 rounds : 0.00025254916998110517 
Avg score for 6 rounds : 0.00025254916998110517 
Avg score for 8 rounds : 0.00025254916998110517 
Avg score for 10 rounds : 0.00025254916998110517 


#### And without rounds stop condition

In [25]:
fb_g = read_graph('facebook_combined.txt', undirected=True) 
fb_g.page_rank()
scores = fb_g.scores()
print('Avg score: %s ' % (sum(scores.values()) / len(scores)))

Avg score: 0.00025254916998110517 


## C

#### Lets first look at sorted scores to see if there are repetitions or tendencies in score. There doesn't seem to be a lot of bucketing among the scores, although only a few nodes have the highest values before scores dropping heavily.

In [31]:
# There doesn't seem to be a particular bucketing in scores.
sorted(scores.items(), key=operator.itemgetter(1), reverse=True)

[(3437, 0.007856054818887988),
 (107, 0.007030568695334819),
 (0, 0.006527541967903309),
 (1684, 0.006470322387009362),
 (1912, 0.00392106278462719),
 (348, 0.002410108916710299),
 (3980, 0.0023070763207021783),
 (686, 0.0022915348414082825),
 (414, 0.0018493302918218863),
 (698, 0.0013643004768346247),
 (483, 0.0013469588705058682),
 (3830, 0.0012160652463880117),
 (376, 0.0009394636101450623),
 (2047, 0.0008583918038054019),
 (56, 0.0008505559813559518),
 (25, 0.0008419460014036577),
 (322, 0.0008249408174388531),
 (828, 0.0008228602745397243),
 (67, 0.0008179354265076882),
 (475, 0.0008178532205401037),
 (428, 0.0008133396821946039),
 (3596, 0.0007930578822037972),
 (271, 0.0007874632762924529),
 (713, 0.0007802268792871753),
 (119, 0.0007684627419430485),
 (563, 0.0007593949059080993),
 (277, 0.0007549284914241192),
 (3545, 0.0007539682889028225),
 (2313, 0.0007537139756014437),
 (3938, 0.000753702404933114),
 (917, 0.0007502951327257039),
 (26, 0.0007335994794915794),
 (21, 0.0007

## d
#### This is definitely a measure of popularity. High scoring nodes are highly connected, which implies that they are very relevant or popular. However, there is a difference between been highly connected and highly influencial: one person can have many friends but  have very little influence on these friends.