From a72aa813ce4c0299b51b6ec11d8c1b3ee1f30134 Mon Sep 17 00:00:00 2001 From: Klaas Bosteels Date: Thu, 15 Jan 2009 20:15:21 +0100 Subject: [PATCH 1/2] added statscombiner --- src/python/dumbo.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/python/dumbo.py b/src/python/dumbo.py index 3049220..cfc4498 100644 --- a/src/python/dumbo.py +++ b/src/python/dumbo.py @@ -458,20 +458,31 @@ def sumsreducer(key, values): def statsmapper(key, value): - yield (key, (1, value, value**2)) + yield (key, (1, value, value**2, value, value)) def statsreducer(key, values): columns = izip(*values) s0 = sum(columns.next()) - column = columns.next() - (s1, minimum, maximum) = (sum(column), min(column), max(column)) + s1 = sum(columns.next()) s2 = sum(columns.next()) + minimum = min(columns.next()) + maximum = max(columns.next()) mean = float(s1) / s0 std = sqrt(s0 * s2 - s1**2) / s0 yield (key, (mean, std, minimum, maximum)) +def statscombiner(key, values): + columns = izip(*values) + s0 = sum(columns.next()) + s1 = sum(columns.next()) + s2 = sum(columns.next()) + minimum = min(columns.next()) + maximum = max(columns.next()) + yield (key, (s0, s1, s2, minimum, maximum)) + + def incrcounter(group, counter, amount): print >> sys.stderr, 'reporter:counter:%s,%s,%s' % (group, counter, amount) From c116d3989450329b0c6b1b6679e2b880777bdb4f Mon Sep 17 00:00:00 2001 From: Klaas Bosteels Date: Thu, 15 Jan 2009 20:16:55 +0100 Subject: [PATCH 2/2] version bump --- src/python/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/setup.py b/src/python/setup.py index 987265f..1b0f747 100644 --- a/src/python/setup.py +++ b/src/python/setup.py @@ -1,7 +1,7 @@ from distutils.core import setup setup(name='dumbo', - version='0.20.19', + version='0.20.20', py_modules=['dumbo'], author='Klaas Bosteels', author_email='klaas@last.fm',