automatically sort before GroupBy

tandav · May 13, 2023 · 695c833 · 695c833
1 parent b38dc0c
commit 695c833
Show file tree

Hide file tree

Showing 3 changed files with 25 additions and 23 deletions.
diff --git a/docs/reference.md b/docs/reference.md
@@ -215,15 +215,34 @@ useful for objects that don't have `__len__` method:
 
 ```
 
+## Sorted
+
+```py
+>>> '3510' | Sorted()
+['0', '1', '3', '5']
+
+>>> '3510' | Sorted(reverse=True)
+['5', '3', '1', '0']
+
+>>> '!*&)#' | Sorted(key=ord)
+['!', '#', '&', ')', '*']
+
+>>> '!*&)#' | Sorted(key=ord, reverse=True)
+['*', ')', '&', '#', '!']
+
+```
+
 ## GroupBy
 
+Note: `GroupBy` sorts iterable before grouping. If you pass key function, eg `GroupBy(len)`, it also will be used as sorting key.
+
 ```py
 >>> import operator
->>> [(0, 'a'), (0, 'b'), (1, 'c'), (2, 'd')] | GroupBy(operator.itemgetter(0)) | MapValues(list) | Pipe(list)
+>>> [(0, 'a'), (1, 'c'), (0, 'b'), (2, 'd')] | GroupBy(operator.itemgetter(0)) | MapValues(list) | Pipe(list)
 [(0, [(0, 'a'), (0, 'b')]), (1, [(1, 'c')]), (2, [(2, 'd')])]
 
 >>> ['ab', 'cd', 'e', 'f', 'gh', 'ij'] | GroupBy(len) | MapValues(list) | Pipe(list)
-[(2, ['ab', 'cd']), (1, ['e', 'f']), (2, ['gh', 'ij'])]
+[(1, ['e', 'f']), (2, ['ab', 'cd', 'gh', 'ij'])]
 
 ```
 
@@ -282,23 +301,6 @@ False
 
 ```
 
-## Sorted
-
-```py
->>> '3510' | Sorted()
-['0', '1', '3', '5']
-
->>> '3510' | Sorted(reverse=True)
-['5', '3', '1', '0']
-
->>> '!*&)#' | Sorted(key=ord)
-['!', '#', '&', ')', '*']
-
->>> '!*&)#' | Sorted(key=ord, reverse=True)
-['*', ')', '&', '#', '!']
-
-```
-
 ## Unique
 
 ```py

diff --git a/pipe21.py b/pipe21.py
@@ -34,11 +34,11 @@ class Count        (B): __ror__ = lambda self, it: sum(1 for _ in it)
 class Slice        (B): __ror__ = lambda self, it: itertools.islice(it, self.f, *self.args)
 class Take         (B): __ror__ = lambda self, it: it | Slice(self.f) | Pipe(tuple)
 class Chunked      (B): __ror__ = lambda self, it: iter(functools.partial(lambda n, i: i | Take(n), self.f, iter(it)), ())
-class GroupBy      (B): __ror__ = lambda self, it: itertools.groupby(it, key=self.f)
+class Sorted       (B): __ror__ = lambda self, it: sorted(it, **self.kw)
+class GroupBy      (B): __ror__ = lambda self, it: itertools.groupby(it | Sorted(key=self.f), key=self.f)
 class PipeArgs     (B): __ror__ = lambda self, x: self.f(*x)
 class StarMap      (B): __ror__ = lambda self, x: x | Map(lambda y: y | PipeArgs(self.f))
 class IsUnique     (B): __ror__ = lambda self, seq: len(seq) == len(set(seq if self.f is None else map(self.f, seq)))
-class Sorted       (B): __ror__ = lambda self, it: sorted(it, **self.kw)
 class MapApply     (B): __ror__ = lambda self, it: it | Map(lambda x: x | Apply(self.f))
 class ReduceByKey  (B): __ror__ = lambda self, it: it | Sorted(lambda kv: kv[0]) | GroupBy(lambda kv: kv[0]) | MapValues(lambda kv: kv | Values() | Reduce(self.f)) | Pipe(list)
 

diff --git a/tests/pipe_test.py b/tests/pipe_test.py
@@ -222,8 +222,8 @@ def test_chunked(it, n, expected):
 
 @pytest.mark.parametrize(
     ('it', 'f', 'expected'), [
-        ([(0, 'a'), (0, 'b'), (1, 'c'), (2, 'd')], operator.itemgetter(0), [(0, [(0, 'a'), (0, 'b')]), (1, [(1, 'c')]), (2, [(2, 'd')])]),
-        (['ab', 'cd', 'e', 'f', 'gh', 'ij'], len, [(2, ['ab', 'cd']), (1, ['e', 'f']), (2, ['gh', 'ij'])]),
+        ([(0, 'a'), (1, 'c'), (0, 'b'), (2, 'd')], operator.itemgetter(0), [(0, [(0, 'a'), (0, 'b')]), (1, [(1, 'c')]), (2, [(2, 'd')])]),
+        (['ab', 'cd', 'e', 'f', 'gh', 'ij'], len, [(1, ['e', 'f']), (2, ['ab', 'cd', 'gh', 'ij'])]),
     ],
 )
 def test_groupby(it, f, expected):