Skip to content
Browse files

Closes #30

  • Loading branch information...
1 parent 73fe86a commit d5551dbf0c9a691fb120907769b6bc994c757821 @klbostee committed May 15, 2009
Showing with 67 additions and 4 deletions.
  1. +18 −4 dumbo/lib.py
  2. +15 −0 examples/eno.txt
  3. +23 −0 examples/multicount.py
  4. +11 −0 tests/testexamples.py
View
22 dumbo/lib.py
@@ -16,9 +16,12 @@
import heapq
import os
+import types
from itertools import chain, imap, izip
from math import sqrt
+from dumbo.core import MapRedBase
+
def identitymapper(key, value):
yield (key, value)
@@ -96,10 +99,21 @@ def __new__(cls):
return object.__new__(cls)
def __init__(self):
- self.mappers = []
+ self._mappers = []
+
+ def itermappers(self):
+ for pattern, mapper in self._mappers:
+ if type(mapper) in (types.ClassType, type):
+ mappercls = type('DumboMapper', (mapper, MapRedBase), {})
+ if hasattr(mappercls, 'map'):
+ yield (pattern, mappercls().map)
+ else:
+ yield (pattern, mappercls())
+ else:
+ yield (pattern, mapper)
def __call__normalkey(self, data):
- mappers = self.mappers
+ mappers = list(self.itermappers())
for key, value in data:
path, key = key
for pattern, mapper in mappers:
@@ -108,7 +122,7 @@ def __call__normalkey(self, data):
yield output
def __call__joinkey(self, data):
- mappers = self.mappers
+ mappers = list(self.itermappers())
for key, value in data:
path = key.body[0]
key.body = key.body[1]
@@ -118,4 +132,4 @@ def __call__joinkey(self, data):
yield output
def add(self, pattern, mapper):
- self.mappers.append((pattern, mapper))
+ self._mappers.append((pattern, mapper))
View
15 examples/eno.txt
@@ -0,0 +1,15 @@
+He had arms and legs and hands and feet
+This boy whose name was Eno
+And he grew, grew, grew and grew
+Grew up to be
+Yes he grew up to be
+A teenager called Eno
+A teenager called Eno
+And his face became spotty
+Yes his face became spotty
+And his voice dropped down low
+And things started to grow
+On young Eno and show
+He was certainly no
+No girl named Eno
+Not a girl named Eno
View
23 examples/multicount.py
@@ -0,0 +1,23 @@
+"""
+Illustrates MultiMapper.
+"""
+
+from dumbo import main, MultiMapper, sumreducer
+
+def mapper1(key, value):
+ for word in value.split():
+ yield ("A", word), 1
+
+class Mapper2:
+ def __call__(self, key, value):
+ for word in value.split():
+ yield ("B", word), 1
+
+def runner(job):
+ mapper = MultiMapper()
+ mapper.add("brian", mapper1)
+ mapper.add("eno", Mapper2)
+ job.additer(mapper, sumreducer, combiner=sumreducer)
+
+if __name__ == "__main__":
+ main(runner)
View
11 tests/testexamples.py
@@ -62,6 +62,17 @@ def testjoin(self):
output = dict(util.loadcode(open(self.outfile)))
self.assertEqual(5, int(output['node1']))
+ def testmulticount(self):
+ opts = [('input', self.exdir+'brian.txt'),
+ ('input', self.exdir+'eno.txt'),
+ ('output', self.outfile)]
+ retval = cmd.start(self.exdir+'multicount.py', opts,
+ stdout=self.logfile, stderr=self.logfile)
+ self.assertEqual(0, retval)
+ output = dict(util.loadcode(open(self.outfile)))
+ self.assertEqual(6, int(output[('A', 'Brian')]))
+ self.assertEqual(6, int(output[('B', 'Eno')]))
+
if __name__ == "__main__":
suite = unittest.TestLoader().loadTestsFromTestCase(TestExamples)

0 comments on commit d5551db

Please sign in to comment.
Something went wrong with that request. Please try again.