-
Notifications
You must be signed in to change notification settings - Fork 3
/
pageRank.py
60 lines (44 loc) · 1.71 KB
/
pageRank.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/usr/bin/python
from mrjob.job import MRJob
from mrjob.protocol import JSONProtocol
class MRPageRank(MRJob):
INPUT_PROTOCOL = JSONProtocol # read the same format we write
def configure_options(self):
super(MRPageRank, self).configure_options()
self.add_passthrough_option(
'--iterations', dest='iterations', default=10, type='int',
help='number of iterations to run')
self.add_passthrough_option(
'--damping-factor', dest='damping_factor', default=0.85,
type='float',
help='probability a web surfer will continue clicking on links')
def map_task(self, node_id, node):
yield node_id, ('node', node)
if 'links' in node:
for dest_id, weight in node.get('links'):
if type(weight)==list:
yield dest_id, ('score', node['score'] * weight[1])
else:
yield dest_id, ('score', node['score'] * weight)
def reduce_task(self, node_id, typed_values):
node = {}
total_score = 0
prevScoreSet=False
for value_type, value in typed_values:
if value_type == 'node':
node = value
if not prevScoreSet:
node['prev_score'] = node['score']
prevScoreSet=True
elif value_type == 'score':
total_score += value
else:
raise Excpetion("Fishy business!!")
d = self.options.damping_factor
node['score'] = 1 - d + d * total_score
yield node_id, node
def steps(self):
return ([self.mr(mapper=self.map_task, reducer=self.reduce_task)] *
self.options.iterations)
if __name__ == '__main__':
MRPageRank.run()