diff --git a/.binstar.yml b/.binstar.yml index 43606529..86ea8b8d 100644 --- a/.binstar.yml +++ b/.binstar.yml @@ -6,9 +6,7 @@ platform: - win-64 - win-32 engine: - - python=2.6 - python=2.7 - - python=3.3 - python=3.4 script: - conda build conda.recipe diff --git a/.gitignore b/.gitignore index 3d4bfdf9..6e6f4b7d 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ build/ dist/ *.egg-info/ +bench/shakespeare.txt +.coverage diff --git a/.travis.yml b/.travis.yml index c6e2098f..6d8404eb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,22 +1,23 @@ sudo: false language: python python: - - "2.6" - "2.7" - - "3.3" - "3.4" - "3.5" - "3.6" - "3.7-dev" - "pypy" +# Enable 3.7 without globally enabling sudo and dist: xenial for other build jobs +matrix: + include: + - python: 3.7 + dist: xenial + sudo: true + env: - PEP8_IGNORE="E731,W503,E402" -matrix: - allow_failures: - - python: "3.7-dev" - # command to install dependencies install: - pip install coverage pep8 pytest diff --git a/README.rst b/README.rst index ee1628c7..099c3ff8 100644 --- a/README.rst +++ b/README.rst @@ -1,7 +1,7 @@ Toolz ===== -|Build Status| |Coverage Status| |Version Status| |Downloads| +|Build Status| |Coverage Status| |Version Status| A set of utility functions for iterators, functions, and dictionaries. @@ -30,7 +30,7 @@ Structure and Heritage ``unique``, ``interpose``, |literal functoolz|_, for higher-order functions. Examples: ``memoize``, -``curry``, ``compose`` +``curry``, ``compose``, |literal dicttoolz|_, for operations on dictionaries. Examples: ``assoc``, ``update-in``, ``merge``. @@ -73,11 +73,11 @@ This builds a standard wordcount function from pieces within ``toolz``: Dependencies ------------ -``toolz`` supports Python 2.6+ and Python 3.3+ with a common codebase. +``toolz`` supports Python 2.7 and Python 3.4+ with a common codebase. It is pure Python and requires no dependencies beyond the standard library. -It is, in short, a light weight dependency. +It is, in short, a lightweight dependency. CyToolz @@ -86,21 +86,21 @@ CyToolz The ``toolz`` project has been reimplemented in `Cython `__. The ``cytoolz`` project is a drop-in replacement for the Pure Python implementation. -See `CyToolz Github Page `__ for more +See `CyToolz GitHub Page `__ for more details. See Also -------- -- `Underscore.js `__: A similar library for +- `Underscore.js `__: A similar library for JavaScript -- `Enumerable `__: A +- `Enumerable `__: A similar library for Ruby -- `Clojure `__: A functional language whose +- `Clojure `__: A functional language whose standard library has several counterparts in ``toolz`` -- `itertools `__: The +- `itertools `__: The Python standard library for iterator tools -- `functools `__: The +- `functools `__: The Python standard library for function tools Contributions Welcome @@ -129,6 +129,4 @@ We're friendly. .. |Coverage Status| image:: https://coveralls.io/repos/pytoolz/toolz/badge.svg?branch=master :target: https://coveralls.io/r/pytoolz/toolz .. |Version Status| image:: https://badge.fury.io/py/toolz.svg - :target: http://badge.fury.io/py/toolz -.. |Downloads| image:: https://img.shields.io/pypi/dm/toolz.svg - :target: https://pypi.python.org/pypi/toolz/ + :target: https://badge.fury.io/py/toolz diff --git a/doc/source/api.rst b/doc/source/api.rst index f53ff19d..86ac4c0b 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -34,6 +34,7 @@ Itertoolz partition partition_all peek + peekn pluck random_sample reduceby diff --git a/doc/source/heritage.rst b/doc/source/heritage.rst index c66afea5..f2fc587a 100644 --- a/doc/source/heritage.rst +++ b/doc/source/heritage.rst @@ -39,13 +39,13 @@ philosophy but mimics declarative database languages rather than functional ones. Enumerable_ is is the closest project in Ruby. Other excellent projects also exist within the Python ecosystem, most notably Fn.py_ and Funcy_. -.. [itertools] http://docs.python.org/2/library/itertools.html -.. [functools] http://docs.python.org/2/library/functools.html -.. [itertoolz] http://github.com/pytoolz/itertoolz -.. [functoolz] http://github.com/pytoolz/functoolz -.. [Underscore.js] http://underscorejs.org -.. [cheatsheet] http://clojure.org/cheatsheet -.. [Guido] http://python-history.blogspot.com/2009/04/origins-of-pythons-functional-features.html -.. [Enumerable] http://ruby-doc.org/core-2.0.0/Enumerable.html +.. [itertools] https://docs.python.org/2/library/itertools.html +.. [functools] https://docs.python.org/2/library/functools.html +.. [itertoolz] https://github.com/mrocklin/itertoolz +.. [functoolz] https://github.com/mrocklin/functoolz +.. [Underscore.js] https://underscorejs.org +.. [cheatsheet] https://clojure.org/cheatsheet +.. [Guido] https://python-history.blogspot.com/2009/04/origins-of-pythons-functional-features.html +.. [Enumerable] https://ruby-doc.org/core-2.0.0/Enumerable.html .. [funcy] https://github.com/suor/funcy/ .. [fn.py] https://github.com/kachayev/fn.py diff --git a/doc/source/install.rst b/doc/source/install.rst index 07051b17..cfba80aa 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -11,4 +11,4 @@ three ways: 1. Toolz is pure Python 2. Toolz relies only on the standard library -3. Toolz simultaneously supports Python versions 2.6, 2.7, 3.3, 3.4, 3.5, 3.6, PyPy +3. Toolz simultaneously supports Python versions 2.7, 3.4+, PyPy diff --git a/doc/source/streaming-analytics.rst b/doc/source/streaming-analytics.rst index 9a8cdd06..0cd73af5 100644 --- a/doc/source/streaming-analytics.rst +++ b/doc/source/streaming-analytics.rst @@ -38,7 +38,7 @@ These functions correspond to the SQL commands ``SELECT`` and ``WHERE``. ... map(get([1, 2])), ... list) -*note: this uses the curried_ versions of ``map`` and ``filter``.* +Note: this uses the `curried`_ versions of ``map`` and ``filter``. Of course, these operations are also well supported with standard list/generator comprehension syntax. This syntax is more often used and @@ -81,8 +81,8 @@ groups. .. code:: - >>> from toolz import groupby, valmap, compose - >>> from toolz.curried import get, pluck + >>> from toolz import compose + >>> from toolz.curried import get, pluck, groupby, valmap >>> groupby(get(3), accounts) {'F': [(1, 'Alice', 100, 'F'), (5, 'Edith', 300, 'F')], diff --git a/setup.py b/setup.py index 7ae76e18..9577b7fe 100755 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup(name='toolz', version=toolz.__version__, description='List processing tools and functional utilities', - url='http://github.com/pytoolz/toolz/', + url='https://github.com/pytoolz/toolz/', author='https://raw.github.com/pytoolz/toolz/master/AUTHORS.md', maintainer='Matthew Rocklin', maintainer_email='mrocklin@gmail.com', @@ -21,16 +21,17 @@ long_description=(open('README.rst').read() if exists('README.rst') else ''), zip_safe=False, + python_requires=">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*", classifiers=[ "Development Status :: 5 - Production/Stable", "License :: OSI Approved :: BSD License", "Programming Language :: Python", - "Programming Language :: Python :: 2.6", + "Programming Language :: Python :: 2", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy"]) diff --git a/tlz/_build_tlz.py b/tlz/_build_tlz.py index 34eac604..3c017a54 100644 --- a/tlz/_build_tlz.py +++ b/tlz/_build_tlz.py @@ -1,15 +1,15 @@ import sys import types import toolz -from toolz.compatibility import import_module +from importlib import import_module class TlzLoader(object): """ Finds and loads ``tlz`` modules when added to sys.meta_path""" def __init__(self): - self.always_from_toolz = set([ + self.always_from_toolz = { toolz.pipe, - ]) + } def _load_toolz(self, fullname): rv = {} diff --git a/toolz/_signatures.py b/toolz/_signatures.py index 28aa95ce..b9d87012 100644 --- a/toolz/_signatures.py +++ b/toolz/_signatures.py @@ -16,8 +16,9 @@ import inspect import itertools import operator +from importlib import import_module -from .compatibility import PY3, import_module +from .compatibility import PY3 from .functoolz import (is_partial_args, is_arity, has_varargs, has_keywords, num_required_args) @@ -640,8 +641,8 @@ def get_exclude_keywords(num_pos_only, sigspec): def signature_or_spec(func): try: return inspect.signature(func) - except (ValueError, TypeError) as e: - return e + except (ValueError, TypeError): + return None else: # pragma: py3 no cover def num_pos_args(sigspec): @@ -662,8 +663,8 @@ def get_exclude_keywords(num_pos_only, sigspec): def signature_or_spec(func): try: return inspect.getargspec(func) - except TypeError as e: - return e + except TypeError: + return None def expand_sig(sig): @@ -697,7 +698,7 @@ def expand_sig(sig): num_pos_only = num_pos_args(sigspec) keyword_only = () keyword_exclude = get_exclude_keywords(num_pos_only, sigspec) - return (num_pos_only, func, keyword_only + keyword_exclude, sigspec) + return num_pos_only, func, keyword_only + keyword_exclude, sigspec signatures = {} diff --git a/toolz/compatibility.py b/toolz/compatibility.py index 67635026..51e3673f 100644 --- a/toolz/compatibility.py +++ b/toolz/compatibility.py @@ -1,13 +1,12 @@ import operator import sys PY3 = sys.version_info[0] > 2 -PY33 = sys.version_info[0] == 3 and sys.version_info[1] == 3 PY34 = sys.version_info[0] == 3 and sys.version_info[1] == 4 PYPY = hasattr(sys, 'pypy_version_info') __all__ = ('map', 'filter', 'range', 'zip', 'reduce', 'zip_longest', 'iteritems', 'iterkeys', 'itervalues', 'filterfalse', - 'PY3', 'PY34', 'PYPY', 'import_module') + 'PY3', 'PY34', 'PYPY') if PY3: map = map @@ -20,6 +19,7 @@ iteritems = operator.methodcaller('items') iterkeys = operator.methodcaller('keys') itervalues = operator.methodcaller('values') + from collections.abc import Sequence else: range = xrange reduce = reduce @@ -31,10 +31,4 @@ iteritems = operator.methodcaller('iteritems') iterkeys = operator.methodcaller('iterkeys') itervalues = operator.methodcaller('itervalues') - -try: - from importlib import import_module -except ImportError: - def import_module(name): - __import__(name) - return sys.modules[name] + from collections import Sequence diff --git a/toolz/curried/__init__.py b/toolz/curried/__init__.py index 43aeffd4..20b6e2a0 100644 --- a/toolz/curried/__init__.py +++ b/toolz/curried/__init__.py @@ -26,9 +26,11 @@ import toolz from . import operator from toolz import ( + apply, comp, complement, compose, + compose_left, concat, concatv, count, @@ -59,6 +61,7 @@ assoc_in = toolz.curry(toolz.assoc_in) cons = toolz.curry(toolz.cons) countby = toolz.curry(toolz.countby) +dissoc = toolz.curry(toolz.dissoc) do = toolz.curry(toolz.do) drop = toolz.curry(toolz.drop) excepts = toolz.curry(toolz.excepts) @@ -80,6 +83,7 @@ partition = toolz.curry(toolz.partition) partition_all = toolz.curry(toolz.partition_all) partitionby = toolz.curry(toolz.partitionby) +peekn = toolz.curry(toolz.peekn) pluck = toolz.curry(toolz.pluck) random_sample = toolz.curry(toolz.random_sample) reduce = toolz.curry(toolz.reduce) diff --git a/toolz/curried/operator.py b/toolz/curried/operator.py index 6af55e5d..8bc9e523 100644 --- a/toolz/curried/operator.py +++ b/toolz/curried/operator.py @@ -11,8 +11,8 @@ def should_curry(f): locals().update( - dict((name, curry(f) if should_curry(f) else f) - for name, f in vars(operator).items() if callable(f)), + {name: curry(f) if should_curry(f) else f + for name, f in vars(operator).items() if callable(f)}, ) # Clean up the namespace. diff --git a/toolz/dicttoolz.py b/toolz/dicttoolz.py index f840e7fc..91bff23c 100644 --- a/toolz/dicttoolz.py +++ b/toolz/dicttoolz.py @@ -1,4 +1,3 @@ -import copy import operator from toolz.compatibility import (map, zip, iteritems, iterkeys, itervalues, reduce) @@ -11,8 +10,8 @@ def _get_factory(f, kwargs): factory = kwargs.pop('factory', dict) if kwargs: - raise TypeError("{0}() got an unexpected keyword argument " - "'{1}'".format(f.__name__, kwargs.popitem()[0])) + raise TypeError("{}() got an unexpected keyword argument " + "'{}'".format(f.__name__, kwargs.popitem()[0])) return factory @@ -192,11 +191,12 @@ def assoc(d, key, value, factory=dict): {'x': 1, 'y': 3} """ d2 = factory() + d2.update(d) d2[key] = value - return merge(d, d2, factory=factory) + return d2 -def dissoc(d, *keys): +def dissoc(d, *keys, **kwargs): """ Return a new dict with the given key(s) removed. New dict has d[key] deleted for each supplied key. @@ -209,10 +209,19 @@ def dissoc(d, *keys): >>> dissoc({'x': 1}, 'y') # Ignores missing keys {'x': 1} """ - d2 = copy.copy(d) - for key in keys: - if key in d2: - del d2[key] + factory = _get_factory(dissoc, kwargs) + d2 = factory() + + if len(keys) < len(d) * .6: + d2.update(d) + for key in keys: + if key in d2: + del d2[key] + else: + remaining = set(d) + remaining.difference_update(keys) + for k in remaining: + d2[k] = d[k] return d2 @@ -265,15 +274,28 @@ def update_in(d, keys, func, default=None, factory=dict): >>> update_in({1: 'foo'}, [2, 3, 4], inc, 0) {1: 'foo', 2: {3: {4: 1}}} """ - assert len(keys) > 0 - k, ks = keys[0], keys[1:] - if ks: - return assoc(d, k, update_in(d[k] if (k in d) else factory(), - ks, func, default, factory), - factory) + ks = iter(keys) + k = next(ks) + + rv = inner = factory() + rv.update(d) + + for key in ks: + if k in d: + d = d[k] + dtemp = factory() + dtemp.update(d) + else: + d = dtemp = factory() + + inner[k] = inner = dtemp + k = key + + if k in d: + inner[k] = func(d[k]) else: - innermost = func(d[k]) if (k in d) else func(default) - return assoc(d, k, innermost, factory) + inner[k] = func(default) + return rv def get_in(keys, coll, default=None, no_default=False): diff --git a/toolz/functoolz.py b/toolz/functoolz.py index 408d73f3..a68f26f8 100644 --- a/toolz/functoolz.py +++ b/toolz/functoolz.py @@ -2,14 +2,17 @@ import inspect import operator from operator import attrgetter +from importlib import import_module from textwrap import dedent +from types import MethodType -from .compatibility import PY3, PY33, PY34, PYPY, import_module +from .compatibility import PY3, PY34, PYPY from .utils import no_default -__all__ = ('identity', 'thread_first', 'thread_last', 'memoize', 'compose', - 'pipe', 'complement', 'juxt', 'do', 'curry', 'flip', 'excepts') +__all__ = ('identity', 'apply', 'thread_first', 'thread_last', 'memoize', + 'compose', 'compose_left', 'pipe', 'complement', 'juxt', 'do', + 'curry', 'flip', 'excepts') def identity(x): @@ -21,6 +24,22 @@ def identity(x): return x +def apply(*func_and_args, **kwargs): + """ Applies a function and returns the results + >>> def double(x): return 2*x + >>> def inc(x): return x + 1 + >>> apply(double, 5) + 10 + + >>> tuple(map(apply, [double, inc, double], [10, 500, 8000])) + (20, 501, 16000) + """ + if not func_and_args: + raise TypeError('func argument is required') + func, args = func_and_args[0], func_and_args[1:] + return func(*args, **kwargs) + + def thread_first(val, *forms): """ Thread value through a sequence of functions/forms @@ -351,7 +370,7 @@ def __reduce__(self): if k not in ('_partial', '_sigspec')) state = (type(self), func, self.args, self.keywords, userdict, is_decorated) - return (_restore_curry, state) + return _restore_curry, state def _restore_curry(cls, func, args, kwargs, userdict, is_decorated): @@ -498,11 +517,50 @@ def composed_doc(*fs): def __name__(self): try: return '_of_'.join( - f.__name__ for f in reversed((self.first,) + self.funcs) + (f.__name__ for f in reversed((self.first,) + self.funcs)) ) except AttributeError: return type(self).__name__ + def __repr__(self): + return '{.__class__.__name__}{!r}'.format( + self, tuple(reversed((self.first, ) + self.funcs))) + + def __eq__(self, other): + if isinstance(other, Compose): + return other.first == self.first and other.funcs == self.funcs + return NotImplemented + + def __ne__(self, other): + equality = self.__eq__(other) + return NotImplemented if equality is NotImplemented else not equality + + def __hash__(self): + return hash(self.first) ^ hash(self.funcs) + + # Mimic the descriptor behavior of python functions. + # i.e. let Compose be called as a method when bound to a class. + if PY3: # pragma: py2 no cover + # adapted from + # docs.python.org/3/howto/descriptor.html#functions-and-methods + def __get__(self, obj, objtype=None): + return self if obj is None else MethodType(self, obj) + else: # pragma: py3 no cover + # adapted from + # docs.python.org/2/howto/descriptor.html#functions-and-methods + def __get__(self, obj, objtype=None): + return self if obj is None else MethodType(self, obj, objtype) + + # introspection with Signature is only possible from py3.3+ + if PY3: # pragma: py2 no cover + @instanceproperty + def __signature__(self): + base = inspect.signature(self.first) + last = inspect.signature(self.funcs[-1]) + return base.replace(return_annotation=last.return_annotation) + + __wrapped__ = instanceproperty(attrgetter('first')) + def compose(*funcs): """ Compose functions to operate in series. @@ -519,6 +577,7 @@ def compose(*funcs): '4' See Also: + compose_left pipe """ if not funcs: @@ -529,6 +588,27 @@ def compose(*funcs): return Compose(funcs) +def compose_left(*funcs): + """ Compose functions to operate in series. + + Returns a function that applies other functions in sequence. + + Functions are applied from left to right so that + ``compose_left(f, g, h)(x, y)`` is the same as ``h(g(f(x, y)))``. + + If no arguments are provided, the identity function (f(x) = x) is returned. + + >>> inc = lambda i: i + 1 + >>> compose_left(inc, str)(3) + '4' + + See Also: + compose + pipe + """ + return compose(*reversed(funcs)) + + def pipe(data, *funcs): """ Pipe a value through a sequence of functions @@ -758,11 +838,6 @@ def _check_sigspec(sigspec, func, builtin_func, *builtin_args): and (( hasattr(func, '__signature__') and hasattr(func.__signature__, '__get__') - ) or ( - PY33 - and hasattr(func, '__wrapped__') - and hasattr(func.__wrapped__, '__get__') - and not callable(func.__wrapped__) )) ): # pragma: no cover (not covered in Python 3.4) val = builtin_func(*builtin_args) diff --git a/toolz/itertoolz.py b/toolz/itertoolz.py index a25eea3c..e71f1eee 100644 --- a/toolz/itertoolz.py +++ b/toolz/itertoolz.py @@ -5,7 +5,7 @@ from functools import partial from random import Random from toolz.compatibility import (map, filterfalse, zip, zip_longest, iteritems, - filter) + filter, Sequence) from toolz.utils import no_default @@ -14,7 +14,7 @@ 'first', 'second', 'nth', 'last', 'get', 'concat', 'concatv', 'mapcat', 'cons', 'interpose', 'frequencies', 'reduceby', 'iterate', 'sliding_window', 'partition', 'partition_all', 'count', 'pluck', - 'join', 'tail', 'diff', 'topk', 'peek', 'random_sample') + 'join', 'tail', 'diff', 'topk', 'peek', 'peekn', 'random_sample') def remove(predicate, seq): @@ -56,7 +56,13 @@ def accumulate(binop, seq, initial=no_default): itertools.accumulate : In standard itertools for Python 3.2+ """ seq = iter(seq) - result = next(seq) if initial == no_default else initial + if initial == no_default: + try: + result = next(seq) + except StopIteration: + return + else: + result = initial yield result for elem in seq: result = binop(result, elem) @@ -83,6 +89,8 @@ def groupby(key, seq): 'M': [{'gender': 'M', 'name': 'Bob'}, {'gender': 'M', 'name': 'Charlie'}]} + Not to be confused with ``itertools.groupby`` + See Also: countby """ @@ -374,7 +382,9 @@ def second(seq): >>> second('ABC') 'B' """ - return next(itertools.islice(seq, 1, None)) + seq = iter(seq) + next(seq) + return next(seq) def nth(n, seq): @@ -383,7 +393,7 @@ def nth(n, seq): >>> nth(1, 'ABC') 'B' """ - if isinstance(seq, (tuple, list, collections.Sequence)): + if isinstance(seq, (tuple, list, Sequence)): return seq[n] else: return next(itertools.islice(seq, n, None)) @@ -449,7 +459,7 @@ def get(ind, seq, default=no_default): if len(ind) > 1: return operator.itemgetter(*ind)(seq) elif ind: - return (seq[ind[0]],) + return seq[ind[0]], else: return () else: @@ -720,7 +730,23 @@ def partition_all(n, seq): yield prev prev = item if prev[-1] is no_pad: - yield prev[:prev.index(no_pad)] + try: + # If seq defines __len__, then + # we can quickly calculate where no_pad starts + yield prev[:len(seq) % n] + except TypeError: + # Get first index of no_pad without using .index() + # https://github.com/pytoolz/toolz/issues/387 + # Binary search from CPython's bisect module, + # modified for identity testing. + lo, hi = 0, n + while lo < hi: + mid = (lo + hi) // 2 + if prev[mid] is no_pad: + hi = mid + else: + lo = mid + 1 + yield prev[:lo] else: yield prev @@ -792,6 +818,8 @@ def join(leftkey, leftseq, rightkey, rightseq, This is a semi-streaming operation. The LEFT sequence is fully evaluated and placed into memory. The RIGHT sequence is evaluated lazily and so can be arbitrarily large. + (Note: If right_default is defined, then unique keys of rightseq + will also be stored in memory.) >>> friends = [('Alice', 'Edith'), ... ('Alice', 'Zhao'), @@ -834,7 +862,10 @@ def join(leftkey, leftseq, rightkey, rightseq, Usually the key arguments are callables to be applied to the sequences. If the keys are not obviously callable then it is assumed that indexing was - intended, e.g. the following is a legal change + intended, e.g. the following is a legal change. + The join is implemented as a hash join and the keys of leftseq must be + hashable. Additionally, if right_default is defined, then keys of rightseq + must also be hashable. >>> # result = join(second, friends, first, cities) >>> result = join(1, friends, 0, cities) # doctest: +SKIP @@ -845,22 +876,47 @@ def join(leftkey, leftseq, rightkey, rightseq, rightkey = getter(rightkey) d = groupby(leftkey, leftseq) - seen_keys = set() - left_default_is_no_default = (left_default == no_default) - for item in rightseq: - key = rightkey(item) - seen_keys.add(key) - try: - left_matches = d[key] - for match in left_matches: - yield (match, item) - except KeyError: - if not left_default_is_no_default: + if left_default == no_default and right_default == no_default: + # Inner Join + for item in rightseq: + key = rightkey(item) + if key in d: + for left_match in d[key]: + yield (left_match, item) + elif left_default != no_default and right_default == no_default: + # Right Join + for item in rightseq: + key = rightkey(item) + if key in d: + for left_match in d[key]: + yield (left_match, item) + else: yield (left_default, item) + elif right_default != no_default: + seen_keys = set() + seen = seen_keys.add + + if left_default == no_default: + # Left Join + for item in rightseq: + key = rightkey(item) + seen(key) + if key in d: + for left_match in d[key]: + yield (left_match, item) + else: + # Full Join + for item in rightseq: + key = rightkey(item) + seen(key) + if key in d: + for left_match in d[key]: + yield (left_match, item) + else: + yield (left_default, item) - if right_default != no_default: - for key, matches in d.items(): + for key, matches in iteritems(d): if key not in seen_keys: for match in matches: yield (match, right_default) @@ -942,7 +998,25 @@ def peek(seq): """ iterator = iter(seq) item = next(iterator) - return item, itertools.chain([item], iterator) + return item, itertools.chain((item,), iterator) + + +def peekn(n, seq): + """ Retrieve the next n elements of a sequence + + Returns a tuple of the first n elements and an iterable equivalent + to the original, still having the elements retrieved. + + >>> seq = [0, 1, 2, 3, 4] + >>> first_two, seq = peekn(2, seq) + >>> first_two + (0, 1) + >>> list(seq) + [0, 1, 2, 3, 4] + """ + iterator = iter(seq) + peeked = tuple(take(n, iterator)) + return peeked, itertools.chain(iter(peeked), iterator) def random_sample(prob, seq, random_state=None): diff --git a/toolz/sandbox/core.py b/toolz/sandbox/core.py index 359fc3fa..915f06c2 100644 --- a/toolz/sandbox/core.py +++ b/toolz/sandbox/core.py @@ -105,7 +105,7 @@ def unzip(seq): [1, 2] Unlike the naive implementation ``def unzip(seq): zip(*seq)`` this - implementation can handle a finite sequence of infinite sequences. + implementation can handle an infinite sequence ``seq``. Caveats: @@ -113,7 +113,8 @@ def unzip(seq): of auxiliary storage if the resulting iterators are consumed at different times. - * The top level sequence cannot be infinite. + * The inner sequence cannot be infinite. In Python 3 ``zip(*seq)`` can be + used if ``seq`` is a finite sequence of infinite sequences. """ diff --git a/toolz/sandbox/tests/test_core.py b/toolz/sandbox/tests/test_core.py index 96a83094..14e3847f 100644 --- a/toolz/sandbox/tests/test_core.py +++ b/toolz/sandbox/tests/test_core.py @@ -20,8 +20,8 @@ def test_EqualityHashKey_default_key(): data2 = list(map(EqualityHashDefault, [T0, T0, T1, T1, (), (1,)])) data2.extend([T0, T1, (), (1,)]) set3 = set(data2) - assert set3 == set([(), (1,), EqualityHashDefault(()), - EqualityHashDefault((1,))]) + assert set3 == {(), (1,), EqualityHashDefault(()), + EqualityHashDefault((1,))} assert len(set3) == 4 assert EqualityHashDefault(()) in set3 assert EqualityHashDefault((1,)) in set3 diff --git a/toolz/sandbox/tests/test_parallel.py b/toolz/sandbox/tests/test_parallel.py index e645dfdd..e22c3de2 100644 --- a/toolz/sandbox/tests/test_parallel.py +++ b/toolz/sandbox/tests/test_parallel.py @@ -17,8 +17,8 @@ def setadd(s, item): s.add(item) return s - assert fold(setadd, [1, 2, 3], set()) == set((1, 2, 3)) + assert fold(setadd, [1, 2, 3], set()) == {1, 2, 3} assert (fold(setadd, [1, 2, 3], set(), chunksize=2, combine=set.union) - == set((1, 2, 3))) + == {1, 2, 3}) assert fold(add, range(10), default=no_default2) == fold(add, range(10)) diff --git a/toolz/tests/test_curried.py b/toolz/tests/test_curried.py index 2efb0001..5db4d079 100644 --- a/toolz/tests/test_curried.py +++ b/toolz/tests/test_curried.py @@ -2,8 +2,8 @@ import toolz.curried from toolz.curried import (take, first, second, sorted, merge_with, reduce, merge, operator as cop) -from toolz.compatibility import import_module from collections import defaultdict +from importlib import import_module from operator import add @@ -62,7 +62,7 @@ def test_curried_operator(): ) # Make sure this isn't totally empty. - assert len(set(vars(cop)) & set(['add', 'sub', 'mul'])) == 3 + assert len(set(vars(cop)) & {'add', 'sub', 'mul'}) == 3 def test_curried_namespace(): @@ -79,10 +79,10 @@ def should_curry(func): def curry_namespace(ns): - return dict( - (name, toolz.curry(f) if should_curry(f) else f) + return { + name: toolz.curry(f) if should_curry(f) else f for name, f in ns.items() if '__' not in name - ) + } from_toolz = curry_namespace(vars(toolz)) from_exceptions = curry_namespace(vars(exceptions)) diff --git a/toolz/tests/test_dicttoolz.py b/toolz/tests/test_dicttoolz.py index 0226e49b..d7b78648 100644 --- a/toolz/tests/test_dicttoolz.py +++ b/toolz/tests/test_dicttoolz.py @@ -90,16 +90,16 @@ def test_assoc(self): def test_dissoc(self): D, kw = self.D, self.kw - assert dissoc(D({"a": 1}), "a") == D({}) - assert dissoc(D({"a": 1, "b": 2}), "a") == D({"b": 2}) - assert dissoc(D({"a": 1, "b": 2}), "b") == D({"a": 1}) - assert dissoc(D({"a": 1, "b": 2}), "a", "b") == D({}) - assert dissoc(D({"a": 1}), "a") == dissoc(dissoc(D({"a": 1}), "a"), "a") + assert dissoc(D({"a": 1}), "a", **kw) == D({}) + assert dissoc(D({"a": 1, "b": 2}), "a", **kw) == D({"b": 2}) + assert dissoc(D({"a": 1, "b": 2}), "b", **kw) == D({"a": 1}) + assert dissoc(D({"a": 1, "b": 2}), "a", "b", **kw) == D({}) + assert dissoc(D({"a": 1}), "a", **kw) == dissoc(dissoc(D({"a": 1}), "a", **kw), "a", **kw) # Verify immutability: d = D({'x': 1}) oldd = d - d2 = dissoc(d, 'x') + d2 = dissoc(d, 'x', **kw) assert d is oldd assert d2 is not oldd diff --git a/toolz/tests/test_functoolz.py b/toolz/tests/test_functoolz.py index deda6068..753926dc 100644 --- a/toolz/tests/test_functoolz.py +++ b/toolz/tests/test_functoolz.py @@ -1,7 +1,10 @@ +import inspect import platform from toolz.functoolz import (thread_first, thread_last, memoize, curry, - compose, pipe, complement, do, juxt, flip, excepts) + compose, compose_left, pipe, complement, do, juxt, + flip, excepts, apply) +from toolz.compatibility import PY3 from operator import add, mul, itemgetter from toolz.utils import raises from functools import partial @@ -23,6 +26,32 @@ def double(x): return 2 * x +class AlwaysEquals(object): + """useful to test correct __eq__ implementation of other objects""" + + def __eq__(self, other): + return True + + def __ne__(self, other): + return False + + +class NeverEquals(object): + """useful to test correct __eq__ implementation of other objects""" + + def __eq__(self, other): + return False + + def __ne__(self, other): + return True + + +def test_apply(): + assert apply(double, 5) == 10 + assert tuple(map(apply, [double, inc, double], [10, 500, 8000])) == (20, 501, 16000) + assert raises(TypeError, apply) + + def test_thread_first(): assert thread_first(2) == 2 assert thread_first(2, inc) == 3 @@ -328,7 +357,7 @@ def bar(a, b, c=1): b1 = curry(bar, 1, c=2) assert b1 != f1 - assert set([f1, f2, g1, h1, h2, h3, b1, b1()]) == set([f1, g1, h1, b1]) + assert {f1, f2, g1, h1, h2, h3, b1, b1()} == {f1, g1, h1, b1} # test unhashable input unhash1 = curry(foo, []) @@ -499,17 +528,54 @@ def _should_curry(self, args, kwargs, exc=None): """ -def test_compose(): - assert compose()(0) == 0 - assert compose(inc)(0) == 1 - assert compose(double, inc)(0) == 2 - assert compose(str, iseven, inc, double)(3) == "False" - assert compose(str, add)(1, 2) == '3' +def generate_compose_test_cases(): + """ + Generate test cases for parametrized tests of the compose function. + """ - def f(a, b, c=10): + def add_then_multiply(a, b, c=10): return (a + b) * c - assert compose(str, inc, f)(1, 2, c=3) == '10' + return ( + ( + (), # arguments to compose() + (0,), {}, # positional and keyword args to the Composed object + 0 # expected result + ), + ( + (inc,), + (0,), {}, + 1 + ), + ( + (double, inc), + (0,), {}, + 2 + ), + ( + (str, iseven, inc, double), + (3,), {}, + "False" + ), + ( + (str, add), + (1, 2), {}, + '3' + ), + ( + (str, inc, add_then_multiply), + (1, 2), {"c": 3}, + '10' + ), + ) + + +def test_compose(): + for (compose_args, args, kw, expected) in generate_compose_test_cases(): + assert compose(*compose_args)(*args, **kw) == expected + + +def test_compose_metadata(): # Define two functions with different names def f(a): @@ -529,6 +595,93 @@ def g(a): assert composed.__name__ == 'Compose' assert composed.__doc__ == 'A composition of functions' + assert repr(composed) == 'Compose({!r}, {!r})'.format(f, h) + + assert composed == compose(f, h) + assert composed == AlwaysEquals() + assert not composed == compose(h, f) + assert not composed == object() + assert not composed == NeverEquals() + + assert composed != compose(h, f) + assert composed != NeverEquals() + assert composed != object() + assert not composed != compose(f, h) + assert not composed != AlwaysEquals() + + assert hash(composed) == hash(compose(f, h)) + assert hash(composed) != hash(compose(h, f)) + + bindable = compose(str, lambda x: x*2, lambda x, y=0: int(x) + y) + + class MyClass: + + def __int__(self): + return 8 + + my_method = bindable + my_static_method = staticmethod(bindable) + + assert MyClass.my_method(3) == '6' + assert MyClass.my_method(3, y=2) == '10' + assert MyClass.my_static_method(2) == '4' + assert MyClass().my_method() == '16' + assert MyClass().my_method(y=3) == '22' + assert MyClass().my_static_method(0) == '0' + assert MyClass().my_static_method(0, 1) == '2' + + assert compose(f, h).__wrapped__ is h + assert compose(f, h).__class__.__wrapped__ is None + + # __signature__ is python3 only + if PY3: + + def myfunc(a, b, c, *d, **e): + return 4 + + def otherfunc(f): + return 'result: {}'.format(f) + + # set annotations compatibly with python2 syntax + myfunc.__annotations__ = { + 'a': int, + 'b': str, + 'c': float, + 'd': int, + 'e': bool, + 'return': int, + } + otherfunc.__annotations__ = {'f': int, 'return': str} + + composed = compose(otherfunc, myfunc) + sig = inspect.signature(composed) + assert sig.parameters == inspect.signature(myfunc).parameters + assert sig.return_annotation == str + + class MyClass: + method = composed + + assert len(inspect.signature(MyClass().method).parameters) == 4 + + +def generate_compose_left_test_cases(): + """ + Generate test cases for parametrized tests of the compose function. + + These are based on, and equivalent to, those produced by + enerate_compose_test_cases(). + """ + return tuple( + (tuple(reversed(compose_args)), args, kwargs, expected) + for (compose_args, args, kwargs, expected) + in generate_compose_test_cases() + ) + + +def test_compose_left(): + for (compose_left_args, args, kw, expected) in generate_compose_left_test_cases(): + assert compose_left(*compose_left_args)(*args, **kw) == expected + def test_pipe(): assert pipe(1, inc) == 2 @@ -645,4 +798,3 @@ def raise_(a): excepting = excepts(object(), object(), object()) assert excepting.__name__ == 'excepting' assert excepting.__doc__ == excepts.__doc__ - diff --git a/toolz/tests/test_inspect_args.py b/toolz/tests/test_inspect_args.py index ef03331e..77457ed6 100644 --- a/toolz/tests/test_inspect_args.py +++ b/toolz/tests/test_inspect_args.py @@ -7,7 +7,7 @@ num_required_args, has_varargs, has_keywords) from toolz._signatures import builtins import toolz._signatures as _sigs -from toolz.compatibility import PY3, PY33 +from toolz.compatibility import PY3 from toolz.utils import raises @@ -437,7 +437,7 @@ def is_missing(modname, name, func): if missing: messages = [] for modname, names in sorted(missing.items()): - msg = '{0}:\n {1}'.format(modname, '\n '.join(sorted(names))) + msg = '{}:\n {}'.format(modname, '\n '.join(sorted(names))) messages.append(msg) message = 'Missing introspection for the following callables:\n\n' raise AssertionError(message + '\n\n'.join(messages)) @@ -495,6 +495,6 @@ def __wrapped__(self): if PY3: assert inspect.signature(func) == inspect.signature(wrapped) - assert num_required_args(Wrapped) == (False if PY33 else None) + assert num_required_args(Wrapped) is None _sigs.signatures[Wrapped] = (_sigs.expand_sig((0, lambda func: None)),) assert num_required_args(Wrapped) == 1 diff --git a/toolz/tests/test_itertoolz.py b/toolz/tests/test_itertoolz.py index 93aa856d..c9119eea 100644 --- a/toolz/tests/test_itertoolz.py +++ b/toolz/tests/test_itertoolz.py @@ -13,7 +13,7 @@ reduceby, iterate, accumulate, sliding_window, count, partition, partition_all, take_nth, pluck, join, - diff, topk, peek, random_sample) + diff, topk, peek, peekn, random_sample) from toolz.compatibility import range, filter from operator import add, mul @@ -271,7 +271,7 @@ def set_add(s, i): return s assert reduceby(iseven, set_add, [1, 2, 3, 4, 1, 2], set) == \ - {True: set([2, 4]), False: set([1, 3])} + {True: {2, 4}, False: {1, 3}} def test_iterate(): @@ -289,6 +289,7 @@ def binop(a, b): start = object() assert list(accumulate(binop, [], start)) == [start] + assert list(accumulate(binop, [])) == [] assert list(accumulate(add, [1, 2, 3], no_default2)) == [1, 3, 6] @@ -318,6 +319,17 @@ def test_partition_all(): assert list(partition_all(3, range(5))) == [(0, 1, 2), (3, 4)] assert list(partition_all(2, [])) == [] + # Regression test: https://github.com/pytoolz/toolz/issues/387 + class NoCompare(object): + def __eq__(self, other): + if self.__class__ == other.__class__: + return True + raise ValueError() + obj = NoCompare() + result = [(obj, obj, obj, obj), (obj, obj, obj)] + assert list(partition_all(4, [obj]*7)) == result + assert list(partition_all(4, iter([obj]*7))) == result + def test_count(): assert count((1, 2, 3)) == 3 @@ -356,10 +368,10 @@ def addpair(pair): result = set(starmap(add, join(first, names, second, fruit))) - expected = set([((1, 'one', 'apple', 1)), - ((1, 'one', 'orange', 1)), - ((2, 'two', 'banana', 2)), - ((2, 'two', 'coconut', 2))]) + expected = {(1, 'one', 'apple', 1), + (1, 'one', 'orange', 1), + (2, 'two', 'banana', 2), + (2, 'two', 'coconut', 2)} assert result == expected @@ -397,14 +409,14 @@ def test_join_double_repeats(): result = set(starmap(add, join(first, names, second, fruit))) - expected = set([((1, 'one', 'apple', 1)), - ((1, 'one', 'orange', 1)), - ((2, 'two', 'banana', 2)), - ((2, 'two', 'coconut', 2)), - ((1, 'uno', 'apple', 1)), - ((1, 'uno', 'orange', 1)), - ((2, 'dos', 'banana', 2)), - ((2, 'dos', 'coconut', 2))]) + expected = {(1, 'one', 'apple', 1), + (1, 'one', 'orange', 1), + (2, 'two', 'banana', 2), + (2, 'two', 'coconut', 2), + (1, 'uno', 'apple', 1), + (1, 'uno', 'orange', 1), + (2, 'dos', 'banana', 2), + (2, 'dos', 'coconut', 2)} assert result == expected @@ -415,21 +427,21 @@ def test_join_missing_element(): result = set(starmap(add, join(first, names, second, fruit))) - expected = set([((1, 'one', 'orange', 1))]) + expected = {(1, 'one', 'orange', 1)} assert result == expected def test_left_outer_join(): result = set(join(identity, [1, 2], identity, [2, 3], left_default=None)) - expected = set([(2, 2), (None, 3)]) + expected = {(2, 2), (None, 3)} assert result == expected def test_right_outer_join(): result = set(join(identity, [1, 2], identity, [2, 3], right_default=None)) - expected = set([(2, 2), (1, None)]) + expected = {(2, 2), (1, None)} assert result == expected @@ -437,7 +449,7 @@ def test_right_outer_join(): def test_outer_join(): result = set(join(identity, [1, 2], identity, [2, 3], left_default=None, right_default=None)) - expected = set([(2, 2), (1, None), (None, 3)]) + expected = {(2, 2), (1, None), (None, 3)} assert result == expected @@ -496,12 +508,23 @@ def test_topk_is_stable(): def test_peek(): alist = ["Alice", "Bob", "Carol"] element, blist = peek(alist) - element == alist[0] + assert element == alist[0] assert list(blist) == alist assert raises(StopIteration, lambda: peek([])) +def test_peekn(): + alist = ("Alice", "Bob", "Carol") + elements, blist = peekn(2, alist) + assert elements == alist[:2] + assert tuple(blist) == alist + + elements, blist = peekn(len(alist) * 4, alist) + assert elements == alist + assert tuple(blist) == alist + + def test_random_sample(): alist = list(range(100)) diff --git a/toolz/tests/test_serialization.py b/toolz/tests/test_serialization.py index 1780c508..92b07f05 100644 --- a/toolz/tests/test_serialization.py +++ b/toolz/tests/test_serialization.py @@ -2,7 +2,7 @@ import toolz import toolz.curried import pickle -from toolz.compatibility import PY3, PY33, PY34 +from toolz.compatibility import PY3 from toolz.utils import raises @@ -81,7 +81,7 @@ def g1(self): def __reduce__(self): """Allow us to serialize instances of GlobalCurried""" - return (GlobalCurried, (self.x, self.y)) + return GlobalCurried, (self.x, self.y) @toolz.curry class NestedCurried(object): @@ -98,7 +98,7 @@ def g2(self): def __reduce__(self): """Allow us to serialize instances of NestedCurried""" - return (GlobalCurried.NestedCurried, (self.x, self.y)) + return GlobalCurried.NestedCurried, (self.x, self.y) class Nested(object): def __init__(self, x, y): @@ -148,7 +148,7 @@ def preserves_identity(obj): # If we add `curry.__getattr__` forwarding, the following tests will pass - # if not PY33 and not PY34: + # if not PY34: # assert preserves_identity(GlobalCurried.func.g1) # assert preserves_identity(GlobalCurried.func.NestedCurried.func.g2) # assert preserves_identity(GlobalCurried.func.Nested) @@ -159,7 +159,7 @@ def preserves_identity(obj): # assert preserves_identity(GlobalCurried.NestedCurried) # assert preserves_identity(GlobalCurried.NestedCurried.f2) # assert preserves_identity(GlobalCurried.Nested.f3) - # if not PY33 and not PY34: + # if not PY34: # assert preserves_identity(GlobalCurried.g1) # assert preserves_identity(GlobalCurried.NestedCurried.g2) # assert preserves_identity(GlobalCurried.Nested) @@ -175,7 +175,7 @@ def preserves_identity(obj): # assert func1 is not func2 # assert func1(4) == func2(4) == 10 # - # if not PY33 and not PY34: + # if not PY34: # nested3 = GlobalCurried.func.Nested(1, 2) # nested4 = pickle.loads(pickle.dumps(nested3)) # assert nested3 is not nested4