Skip to content

Commit

Permalink
Closes #21
Browse files Browse the repository at this point in the history
  • Loading branch information
elias committed Apr 29, 2009
1 parent 81cae43 commit 3069d0e
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 7 deletions.
8 changes: 4 additions & 4 deletions dumbo/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,13 @@ def combiner(key_, values):

def statsreducer(key, values):
columns = iizip(*values)
s0 = sum(columns.next())
s1 = sum(columns.next())
s2 = sum(columns.next())
s0 = sum(columns.next()) # n
s1 = sum(columns.next()) # sum(x)
s2 = sum(columns.next()) # sum(x**2)
minimum = min(columns.next())
maximum = max(columns.next())
mean = float(s1) / s0
std = sqrt(s0 * s2 - s1**2) / s0
std = sqrt((s2-s1**2/s0)/(s0-1)) # sample standard deviation
yield (key, (s0, mean, std, minimum, maximum))


Expand Down
7 changes: 4 additions & 3 deletions tests/testlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@
class TestLib(unittest.TestCase):

def teststats(self):
input = [('testkey',i) for i in xrange(10)]
input = [('testkey',i) for i in xrange(3)]
input = core.itermapred(input, lib.identitymapper, lib.statscombiner)
output = dict(core.itermapred(input, lib.identitymapper, lib.statsreducer))
self.assertEqual(output['testkey'][0], 10)
self.assertEqual(output['testkey'][1], 4.5)
self.assertEqual(output['testkey'][0], 3) # n
self.assertEqual(output['testkey'][1], 1) # mean
self.assertEqual(output['testkey'][2], 1.0) # std

if __name__ == "__main__":
suite = unittest.TestLoader().loadTestsFromTestCase(TestMapReduce)
Expand Down

0 comments on commit 3069d0e

Please sign in to comment.