Skip to content
This repository has been archived by the owner on Jan 13, 2024. It is now read-only.

Commit

Permalink
add mapper and reducers to illustrate map/reduce in python
Browse files Browse the repository at this point in the history
  • Loading branch information
sdpython committed Apr 10, 2018
1 parent 6b71c16 commit 6b3440b
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 0 deletions.
10 changes: 10 additions & 0 deletions _doc/sphinxdoc/source/api/fctmr.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@

Mapper and Reducer
==================

Those functions are mostly used to illustrate mappers and
reducers in map/reduce languages.

.. autosignature:: sparkouille.datasets.fctmr.simplefctmr.mapper

.. autosignature:: sparkouille.datasets.fctmr.simplefctmr.reducer
73 changes: 73 additions & 0 deletions _unittests/ut_fctmr/test_fctmr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#-*- coding: utf-8 -*-
"""
@brief test log(time=20s)
"""

import sys
import os
import unittest


try:
import src
except ImportError:
path = os.path.normpath(
os.path.abspath(
os.path.join(
os.path.split(__file__)[0],
"..",
"..")))
if path not in sys.path:
sys.path.append(path)
import src

try:
import pyquickhelper as skip_
except ImportError:
path = os.path.normpath(
os.path.abspath(
os.path.join(
os.path.split(__file__)[0],
"..",
"..",
"..",
"pyquickhelper",
"src")))
if path not in sys.path:
sys.path.append(path)
import pyquickhelper as skip_

from pyquickhelper.pycode import ExtTestCase
from src.sparkouille.fctmr import mapper, reducer


class TestFctMr(ExtTestCase):

def test_mapper(self):
res = mapper(lambda x: x + 1, [4, 5])
self.assertNotIsInstance(res, list)
self.assertEqual(list(res), [5, 6])

def test_reducer(self):
res = reducer(lambda x: x[0], [
('a', 1), ('b', 2), ('a', 3)], asiter=False)
self.assertEqual(
list(res), [('a', [('a', 1), ('a', 3)]), ('b', [('b', 2)])])
res2 = reducer(lambda x: x[0], [
('a', 1), ('b', 2), ('a', 3)], asiter=False, sort=False)
self.assertEqual(
list(res2), [('a', [('a', 1)]), ('b', [('b', 2)]), ('a', [('a', 3)])])
res3 = reducer(lambda x: x[0], [
('a', 1), ('b', 2), ('a', 3)], asiter=True, sort=False)
res4 = [(a, list(b)) for a, b in res3]
self.assertEqual(
list(res4), [('a', [('a', 1)]), ('b', [('b', 2)]), ('a', [('a', 3)])])
res5 = reducer(lambda x: x[0], [
('a', 1), ('b', 2), ('a', 3)], asiter=True, sort=True)
res6 = [(a, list(b)) for a, b in res5]
self.assertEqual(
list(res6), [('a', [('a', 1), ('a', 3)]), ('b', [('b', 2)])])


if __name__ == "__main__":
unittest.main()
6 changes: 6 additions & 0 deletions src/sparkouille/fctmr/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"""
@file
@brief Shorcuts to *fctmr*
"""

from .simplefctmr import mapper, reducer
44 changes: 44 additions & 0 deletions src/sparkouille/fctmr/simplefctmr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#-*- coding: utf-8 -*-
"""
@file
@brief Simple *mapper* and *reducer* implemented in :epkg:`Python`
"""
from itertools import groupby


def mapper(fct, gen):
"""
Applies function *fct* to a generator.
@param fct function
@param gen generator
@return generator
"""
return map(fct, gen)


def reducer(fctkey, gen, asiter=True, sort=True):
"""
Implements a reducer.
@param key function which returns the key
@param gen generator
@param asiter returns an iterator on each element of the group
of the group itself
@param sort sort elements by key before grouping
@return generator
"""
if sort:
new_gen = map(lambda x: x[1], sorted(
map(lambda el: (fctkey(el), el), gen)))
gr = groupby(new_gen, fctkey)
else:
gr = groupby(gen, fctkey)
if asiter:
# Cannot return gr. Python is confused when yield and return
# are used in the same function.
for _ in gr:
yield _
else:
for key, it in gr:
yield key, list(it)

0 comments on commit 6b3440b

Please sign in to comment.