-
Notifications
You must be signed in to change notification settings - Fork 146
Example programs
klbostee edited this page Nov 24, 2010
·
15 revisions
They don’t illustrate all of Dumbo’s nifty features, but you should be able to get the basics from these examples. Some of these examples can also be found here with additional info in the form of docstrings.
def mapper(key, value): for word in value.split(): yield word, 1 def reducer(key, values): yield key, sum(values) if __name__ == "__main__": import dumbo dumbo.run(mapper, reducer)
class Mapper: def __init__(self): file = open("excludes.txt", "r") self.excludes = set(line.strip() for line in file) file.close() def __call__(self, key, value): for word in value.split(): if not word in self.excludes: yield word, 1 def reducer(key, values): yield key, sum(values) if __name__ == "__main__": import dumbo dumbo.run(Mapper, reducer, reducer)
def mapper1(key, value): for word in value.split(): yield word, 1 def mapper2(key, value): for letter in key: yield letter, 1 def reducer1(key, values): count = sum(values) if count > 1: yield key, count def reducer2(key, values): yield key, sum(values) if __name__ == "__main__": import dumbo job = dumbo.Job() job.additer(mapper1, reducer1, reducer2) job.additer(mapper2, reducer2, reducer2) job.run()
def mapper(key, value): if "playground.last.fm" in value: yield key, value if __name__ == "__main__": import dumbo dumbo.run(mapper)