Skip to content

Commit

Permalink
automatically sort before GroupBy
Browse files Browse the repository at this point in the history
  • Loading branch information
tandav committed May 13, 2023
1 parent b38dc0c commit 695c833
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 23 deletions.
40 changes: 21 additions & 19 deletions docs/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -215,15 +215,34 @@ useful for objects that don't have `__len__` method:

```

## Sorted

```py
>>> '3510' | Sorted()
['0', '1', '3', '5']

>>> '3510' | Sorted(reverse=True)
['5', '3', '1', '0']

>>> '!*&)#' | Sorted(key=ord)
['!', '#', '&', ')', '*']

>>> '!*&)#' | Sorted(key=ord, reverse=True)
['*', ')', '&', '#', '!']

```

## GroupBy

Note: `GroupBy` sorts iterable before grouping. If you pass key function, eg `GroupBy(len)`, it also will be used as sorting key.

```py
>>> import operator
>>> [(0, 'a'), (0, 'b'), (1, 'c'), (2, 'd')] | GroupBy(operator.itemgetter(0)) | MapValues(list) | Pipe(list)
>>> [(0, 'a'), (1, 'c'), (0, 'b'), (2, 'd')] | GroupBy(operator.itemgetter(0)) | MapValues(list) | Pipe(list)
[(0, [(0, 'a'), (0, 'b')]), (1, [(1, 'c')]), (2, [(2, 'd')])]

>>> ['ab', 'cd', 'e', 'f', 'gh', 'ij'] | GroupBy(len) | MapValues(list) | Pipe(list)
[(2, ['ab', 'cd']), (1, ['e', 'f']), (2, ['gh', 'ij'])]
[(1, ['e', 'f']), (2, ['ab', 'cd', 'gh', 'ij'])]

```

Expand Down Expand Up @@ -282,23 +301,6 @@ False

```

## Sorted

```py
>>> '3510' | Sorted()
['0', '1', '3', '5']

>>> '3510' | Sorted(reverse=True)
['5', '3', '1', '0']

>>> '!*&)#' | Sorted(key=ord)
['!', '#', '&', ')', '*']

>>> '!*&)#' | Sorted(key=ord, reverse=True)
['*', ')', '&', '#', '!']

```

## Unique

```py
Expand Down
4 changes: 2 additions & 2 deletions pipe21.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ class Count (B): __ror__ = lambda self, it: sum(1 for _ in it)
class Slice (B): __ror__ = lambda self, it: itertools.islice(it, self.f, *self.args)
class Take (B): __ror__ = lambda self, it: it | Slice(self.f) | Pipe(tuple)
class Chunked (B): __ror__ = lambda self, it: iter(functools.partial(lambda n, i: i | Take(n), self.f, iter(it)), ())
class GroupBy (B): __ror__ = lambda self, it: itertools.groupby(it, key=self.f)
class Sorted (B): __ror__ = lambda self, it: sorted(it, **self.kw)
class GroupBy (B): __ror__ = lambda self, it: itertools.groupby(it | Sorted(key=self.f), key=self.f)
class PipeArgs (B): __ror__ = lambda self, x: self.f(*x)
class StarMap (B): __ror__ = lambda self, x: x | Map(lambda y: y | PipeArgs(self.f))
class IsUnique (B): __ror__ = lambda self, seq: len(seq) == len(set(seq if self.f is None else map(self.f, seq)))
class Sorted (B): __ror__ = lambda self, it: sorted(it, **self.kw)
class MapApply (B): __ror__ = lambda self, it: it | Map(lambda x: x | Apply(self.f))
class ReduceByKey (B): __ror__ = lambda self, it: it | Sorted(lambda kv: kv[0]) | GroupBy(lambda kv: kv[0]) | MapValues(lambda kv: kv | Values() | Reduce(self.f)) | Pipe(list)

Expand Down
4 changes: 2 additions & 2 deletions tests/pipe_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,8 @@ def test_chunked(it, n, expected):

@pytest.mark.parametrize(
('it', 'f', 'expected'), [
([(0, 'a'), (0, 'b'), (1, 'c'), (2, 'd')], operator.itemgetter(0), [(0, [(0, 'a'), (0, 'b')]), (1, [(1, 'c')]), (2, [(2, 'd')])]),
(['ab', 'cd', 'e', 'f', 'gh', 'ij'], len, [(2, ['ab', 'cd']), (1, ['e', 'f']), (2, ['gh', 'ij'])]),
([(0, 'a'), (1, 'c'), (0, 'b'), (2, 'd')], operator.itemgetter(0), [(0, [(0, 'a'), (0, 'b')]), (1, [(1, 'c')]), (2, [(2, 'd')])]),
(['ab', 'cd', 'e', 'f', 'gh', 'ij'], len, [(1, ['e', 'f']), (2, ['ab', 'cd', 'gh', 'ij'])]),
],
)
def test_groupby(it, f, expected):
Expand Down

0 comments on commit 695c833

Please sign in to comment.