Permalink
Browse files

Closes #10

  • Loading branch information...
1 parent 4f7c74c commit 50eb428dfbeaf4e1714fbe44320b409be0aeb84c @klbostee committed Mar 22, 2009
@@ -1,15 +1,6 @@
"""
Counts how many times each word occurs, using the alternative
-(more low-level) interface to mappers/reducers:
-
->>> from dumbo import cmd, util
->>> opts = [('input','brian.txt'),('output','counts.txt')]
->>> logfile = open('log.txt','a')
->>> cmd.start('wordcount.py',opts,stdout=logfile,stderr=logfile)
-0
->>> output = dict(util.loadcode(open('counts.txt')))
->>> int(output['Brian'])
-6
+(more low-level) interface to mappers/reducers.
"""
def mapper(data):
View
@@ -1,14 +1,5 @@
"""
-Example of two iterations in one Dumbo program:
-
->>> from dumbo import cmd, util
->>> opts = [('input','brian.txt'),('output','counts.txt')]
->>> logfile = open('log.txt','a')
->>> cmd.start('itertwice.py',opts,stdout=logfile,stderr=logfile)
-0
->>> output = dict(util.loadcode(open('counts.txt')))
->>> int(output['e'])
-14
+Example of two iterations in one Dumbo program.
"""
def mapper1(key,value):
View
@@ -1,15 +1,5 @@
"""
-Joins hostnames with logs and counts number of logs per host:
-
->>> from dumbo import cmd, util
->>> opts = [('input','hostnames.code'),('input','logs.code')]
->>> opts += [('inputformat','code'),('output','counts.code')]
->>> logfile = open('log.txt','a')
->>> cmd.start('join.py',opts,stdout=logfile,stderr=logfile)
-0
->>> output = dict(util.loadcode(open('counts.code')))
->>> int(output['node1'])
-5
+Joins hostnames with logs and counts number of logs per host.
"""
def mapper1(key, value):
@@ -1,14 +1,5 @@
"""
-Counts how many times each non-excluded word occurs:
-
->>> from dumbo import cmd, util
->>> opts = [('excludes','excludes.txt'),('output','counts.txt')]
->>> logfile = open('log.txt','a')
->>> cmd.start('oowordcount.py',opts,stdout=logfile,stderr=logfile)
-0
->>> output = dict(util.loadcode(open('counts.txt')))
->>> int(output['Brian'])
-6
+Counts how many times each non-excluded word occurs.
"""
class Mapper:
@@ -29,7 +20,6 @@ def runner(job):
def starter(prog):
excludes = prog.delopt("excludes")
if excludes: prog.addopt("param","excludes="+excludes)
- prog.addopt("input","brian.txt")
if __name__ == "__main__":
import dumbo
View
@@ -1,14 +1,5 @@
"""
-Counts how many times each word occurs:
-
->>> from dumbo import cmd, util
->>> opts = [('input','brian.txt'),('output','counts.txt')]
->>> logfile = open('log.txt','a')
->>> cmd.start('wordcount.py',opts,stdout=logfile,stderr=logfile)
-0
->>> output = dict(util.loadcode(open('counts.txt')))
->>> int(output['Brian'])
-6
+Counts how many times each word occurs.
"""
def mapper(key,value):
View
@@ -0,0 +1,68 @@
+import os
+import sys
+import unittest
+from dumbo import cmd, util
+
+class TestExamples(unittest.TestCase):
+
+ def setUp(self):
+ if "/" in __file__:
+ self.exdir = __file__.split("tests/")[0] + "examples/"
+ self.tstdir = "/".join(__file__.split("/")[:-1]) + "/"
+ else:
+ self.exdir = "../examples/"
+ self.tstdir = "./"
+ self.logfile = open(self.tstdir+"log.txt", "w")
+ self.outfile = self.tstdir + "output.code"
+
+ def tearDown(self):
+ self.logfile.close()
+ os.remove(self.outfile)
+
+ def testwordcount(self):
+ opts = [('input', self.exdir+'brian.txt'), ('output', self.outfile)]
+ retval = cmd.start(self.exdir+'wordcount.py', opts,
+ stdout=self.logfile, stderr=self.logfile)
+ self.assertEqual(0, retval)
+ output = dict(util.loadcode(open(self.outfile)))
+ self.assertEqual(6, int(output['Brian']))
+
+ def testoowordcount(self):
+ opts = [('excludes', self.exdir+'excludes.txt'),
+ ('input', self.exdir+'brian.txt'), ('output', self.outfile)]
+ retval = cmd.start(self.exdir+'oowordcount.py', opts,
+ stdout=self.logfile, stderr=self.logfile)
+ self.assertEquals(0, retval)
+ output = dict(util.loadcode(open(self.outfile)))
+ self.assertEquals(6, int(output['Brian']))
+
+ def testaltwordcount(self):
+ opts = [('input', self.exdir+'brian.txt'), ('output', self.outfile)]
+ retval = cmd.start(self.exdir+'altwordcount.py', opts,
+ stdout=self.logfile, stderr=self.logfile)
+ self.assertEqual(0, retval)
+ output = dict(util.loadcode(open(self.outfile)))
+ self.assertEqual(6, int(output['Brian']))
+
+ def testitertwice(self):
+ opts = [('input', self.exdir+'brian.txt'), ('output', self.outfile)]
+ retval = cmd.start(self.exdir+'itertwice.py', opts,
+ stdout=self.logfile, stderr=self.logfile)
+ self.assertEqual(0, retval)
+ output = dict(util.loadcode(open(self.outfile)))
+ self.assertEqual(14, int(output['e']))
+
+ def testjoin(self):
+ opts = [('input', self.exdir+'hostnames.code'),
+ ('input', self.exdir+'logs.code')]
+ opts += [('inputformat', 'code'), ('output', self.outfile)]
+ retval = cmd.start(self.exdir+'join.py', opts,
+ stdout=self.logfile, stderr=self.logfile)
+ self.assertEqual(0, retval)
+ output = dict(util.loadcode(open(self.outfile)))
+ self.assertEqual(5, int(output['node1']))
+
+
+if __name__ == "__main__":
+ suite = unittest.TestLoader().loadTestsFromTestCase(TestExamples)
+ unittest.TextTestRunner(verbosity=2).run(suite)

0 comments on commit 50eb428

Please sign in to comment.