From 01f94f8775cce93669e1d480612b43a9fe3fda40 Mon Sep 17 00:00:00 2001 From: herr kaste Date: Sat, 14 Apr 2012 02:27:10 +0200 Subject: [PATCH] Initial commit. --- README | 58 ++++++ setup.py | 30 ++++ tests/__init__.py | 0 tests/pipes_test.py | 368 ++++++++++++++++++++++++++++++++++++++ useless/__init__.py | 0 useless/pipes/__init__.py | 4 + useless/pipes/common.py | 103 +++++++++++ useless/pipes/fs.py | 51 ++++++ useless/pipes/pipes.py | 128 +++++++++++++ useless/pipes/tupleize.py | 38 ++++ 10 files changed, 780 insertions(+) create mode 100644 README create mode 100644 setup.py create mode 100644 tests/__init__.py create mode 100644 tests/pipes_test.py create mode 100644 useless/__init__.py create mode 100644 useless/pipes/__init__.py create mode 100644 useless/pipes/common.py create mode 100644 useless/pipes/fs.py create mode 100644 useless/pipes/pipes.py create mode 100644 useless/pipes/tupleize.py diff --git a/README b/README new file mode 100644 index 0000000..792a0fa --- /dev/null +++ b/README @@ -0,0 +1,58 @@ +Pseudo-hacky sugar around generators. If you use this your code can get unreadable or very simple. Over the last years I never beautified the code, but used it quite sometimes. + + +Basically it looks like this: + + [1, 2, 3] | add(2) | list == [3, 4, 5] + +where + + @worker + def add(items, n): + for i in items: + yield i + n + +If you write nested for loops or nested functions a(b(c([1,2,3]))) this might help you. + + +Sometimes you can achieve something like this: + + + filter_audio_files = fs.filter_by_ext(['.mp3']) + + @producer + def folders_with_audio_files(path): + for root, folders, filenames in os.walk(path): + if any(filenames | filter_audio_files): + yield root + + + @worker + def that_need_fix(paths): + for path in paths: + files = listdir(path) | filter_audio_files | join_path(path) | list + dos_names = files | get_83DOS_name | list + + if files.sort() != dos_names.sort(): + yield path + that_need_fix = that_need_fix() + + + # and the outermost command looks like this + + folders_with_audio_files(root) | that_need_fix # ... + + + # note that convetional python doesn't have to be more complex + # it's just about code reuse + for root, folders, filenames in os.walk(root): + files = [file for file in listdir(path) if os.path.splitext(file)[1] in ['.mp3',]] + if any(files): + files = map(lambda file:os.path.join(root, file), files) + + dos_names = [get_dos_name(file) for file in files] + + if files.sort() != dos_names.sort(): + yield path + + diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..5d192e1 --- /dev/null +++ b/setup.py @@ -0,0 +1,30 @@ +__version__ = "0.0.1" +import os +from setuptools import setup, find_packages + +def _read_contents(fn): + here = os.path.dirname( os.path.realpath(__file__) ) + filename = os.path.join(here, fn) + with open(filename) as file: + return file.read() + +setup( + name='useless.pipes', + version=__version__, + description='Generator sugar.', + long_description=_read_contents('README'), + author="herr kaste", + author_email="herr.kaste@gmail.com", + packages=find_packages(exclude=['tests']), + install_requires=[], + tests_require=['pytest'], + classifiers= [ + 'Development Status :: 3 - Alpha', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: BSD License', + 'Operating System :: OS Independent', + 'Programming Language :: Python', + 'Topic :: Internet :: WWW/HTTP', + 'Topic :: Software Development :: Libraries', + ], +) \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/pipes_test.py b/tests/pipes_test.py new file mode 100644 index 0000000..f0bc6a9 --- /dev/null +++ b/tests/pipes_test.py @@ -0,0 +1,368 @@ +from useless.pipes import * + +import unittest + +@producer +def one_by_one(): + for item in ['a', 'b']: + yield item + +@worker +def echo2(ITEMS): + for item in ITEMS: + yield item + +@worker +def add(ITEMS, s): + for item in ITEMS: + yield item + s + +@worker +def unique(ITEMS): + set_ = set() + for item in ITEMS: + if item not in set_: + set_.add(item) + yield item + +@consumer +def as_list(ITEMS): + return [i for i in ITEMS] + +class ProducerTest(unittest.TestCase): + def testYouCanTurnAProducerIntoAList(self): + items = ['a', 'b'] + p = one_by_one() + self.assertEqual(items, p.as_list()) + + def testPrintingReturnsAList(self): + items = ['a', 'b'] + p = one_by_one() + self.assertEqual(str(items), str(p)) + + def testYouCanIterateOverTheElements(self): + items = ['a', 'b'] + p = one_by_one() + got = [i for i in p] + self.assertEqual(items, got) + + def testPassedInArgumentsOnConstructionAreStored(self): + @producer + def echo(list_): + for item in list_: + yield item + + items = ['b', 'c'] + p = echo(items) + self.assertEqual(items, p.as_list()) + + def testConcurrentVersionsOfAProducerDontDisturbEachOther(self): + @producer + def echo(list_): + for item in list_: + yield item + + A = ['A'] + B = ['B'] + p1 = echo(A) + p2 = echo(B) + self.assertEqual(p1.as_list(), A) + self.assertEqual(p2.as_list(), B) + +class WorkerTest(unittest.TestCase): + def testTheDescriptorRemovesTheFirstArgument(self): + w = echo2() + + def testCallingAWorkerInjectsTheItems(self): + items = ['a', 'b'] + w = echo2() + got = [i for i in w(items)] + self.assertEqual(items, got) + + def testYouCanPipeInItems(self): + items = ['a', 'b'] + w = echo2() + pipe = items | w + + got = [i for i in pipe] + self.assertEqual(items, got) + + def testPassedArgumentsOnConstructionAreAvailable(self): + items = ['a', 'b'] + w = add('C') + pipe = items | w + + got = [i for i in pipe] + expected = ['aC', 'bC'] + self.assertEqual(got, expected) + + def testYouCanPipeTwoWorkersAndSoOn(self): + items = ['a', 'b'] + w1 = add('C') + w2 = add('D') + pipe = items | w1 | w2 + + got = [i for i in pipe] + expected = ['aCD', 'bCD'] + self.assertEqual(got, expected) + + def testAWorkerCanHaveLocalVars(self): + items = ['a', 'b', 'a'] + w = unique() + pipe = items | add('C') | w + + got = [i for i in pipe] + expected = ['aC', 'bC'] + self.assertEqual(got, expected) + + def testEqualityWithOtherWorkers(self): + from useless.pipes.common import echo + + self.assertEqual(echo, echo) + + w1 = [1,2] | echo + w2 = [1,2] | echo + + self.assertEqual(w1,w2) + + + w3 = [1,3] | echo + + self.assertNotEqual(w1,w3) + + @worker + def echo(items): + for item in items: + yield item + + self.assertNotEqual([1,2]|echo(),[1,2]|echo()) + + + def testEqualityWithAList(self): + from useless.pipes.common import echo + + w1 = [1,2] | echo + w2 = [1,2] | echo + self.assertEqual(w1, [1,2]) + + def testAskWhatsInTheChain(self): + from useless.pipes.common import echo, unique + items = [1, 2] + w1 = unique + w2 = echo + chain = items | w1 | w2 + + self.assertEqual(chain.the_chain_to_the_left(), [w2, w1, items]) + + def testReusableWorkers(self): + from useless.pipes.common import unique, zip_with + + @worker + def selectFirst(tuples): + for first, _ in tuples: + yield first + + self.assertEqual( + [1,1] | unique | # [1] + zip_with([2,2]) | unique | # [(1,2)] + selectFirst() | unique | list, # [1] + [1] + ) + + def testCycleThenRepeat(self): + + def cycle_then_repeat(iter): + for item in iter: + yield item + + while True: + yield item + + iter = cycle_then_repeat([1,2]) + got = [] + for i in range(4): + got.append(iter.next()) + + self.assertEqual([1,2,2,2], got) + + def testWorkersCanBeCached(self): + from useless.pipes.common import echo + + @worker + def spying(items, list_): + for item in items: + list_.append(item) + yield item + + @worker + def echoes(items): + for item in items: + yield item + + cache = echoes() + cache.cache + + spied = [] + spy = spying(spied) + [1,2] | spy | cache | list + [1,2] | spy | cache | list + + self.assertEqual(spied, [1,2]) + + cache = echoes() + cache.cache + + spied = [] + spy = spying(spied) + [3,4] | spy | cache | (echo, echo) | list + + self.assertEqual(spied, [3,4]) + + def testYouCanCloneAWorker(self): + from useless.pipes.common import echo + + clone = echo.clone() + clone.cache + + self.assertFalse(echo._cache) + + def testTransformingWorkers(self): + from useless.pipes.common import echo + + @worker + def add_one(items): + for item in items: + yield item + 1 + + @worker + def dummy(items): + return items | add_one() | add_one() + + self.assertEqual([4,5], [2,3] | dummy() | list) + + @worker + def cat(items): + return items | dummy() | add_one() + + self.assertEqual([5,6], [2,3] | cat() | list) + + def testConcatWorkers(self): + from useless.pipes.common import concat, echo + + @worker + def add_one(items): + for item in items: + yield item + 1 + + it = add_one() | echo | add_one() + + self.assertEqual([4,5], [2,3] | concat(it) | list) + self.assertEqual([6,7], [2,3] | concat(it) | concat(it) | list) + +class WorkingWithListOfTuples(unittest.TestCase): + def testTheTupleizeWorker(self): + from useless.pipes.common import echo + from useless.pipes.tupleize import tupleize + + w = [1,2] | tupleize(echo) + + self.assertEqual([(1,),(2,)], w | list) + + def testAutomaticallyAddLastGeneratorToEachLeftEnd(self): + from useless.pipes.common import from_list,echo,unique + + self.assertEqual(from_list([1,3]) | (echo, ) | list, [(1,),(3,)]) + self.assertEqual(from_list([1,2]) | (echo, echo) | list, [(1,1),(2,2)]) + +# assert False + + def testWhenThereIsNoUnboundLeftEndJustExecuteThePipe(self): + from useless.pipes.common import echo,unique + self.assertEqual( + [1,2,4] | echo | + ([1,1] | unique, [3,3] | unique) # in the tuple we 'start' + # with a new pipe + | list, + [(1,3)] + ) + + def testALiteralValueJustRepeats(self): + from useless.pipes.common import from_list,echo + + self.assertEqual( + from_list([1,2]) | ('foo', echo) | list, + [('foo',1),('foo',2)] + ) + + def testTupleOnTheLeftMostSideWorkLikeProducers(self): + from useless.pipes.common import from_list, echo, unique + + # using 'literal' iterables + self.assertEqual( + ([1,2],[3,4]) | echo | list, + [(1,3),(2,4)] + ) + #using useless.pipess: a worker and a producer + self.assertEqual( + ([1,2] | unique, from_list([3,4])) | echo | list, + [(1,3),(2,4)] + ) + #'literal' values repeat + self.assertEqual( + ('foo', [1,2]) | echo | list, + [('foo',1),('foo',2)] + ) + + +class ConsumerTest(unittest.TestCase): + def testReturnsInsteadOfYields(self): + items = ['a', 'b'] + c = as_list() + + self.assertEqual(c(items), items) + + def testConsumersArePipeable(self): + items = ['a', 'b'] + c = as_list() + pipe = items | c + + self.assertEqual(pipe, items) + + def testShorthandConsumers(self): + items = ['a', 'b', 'a'] + pipe = items | unique() | list + self.assertEqual(pipe, ['a','b']) + + def testSorted(self): + items = ['a', 'b', 'a'] + @consumer + def s(ITEMS, **kw): + return sorted(ITEMS, **kw) + s = lambda items, **kw: sorted(items, **kw) + s = consumer(lambda items, **kw: sorted(items, **kw)) + pipe = items | unique() | s(reverse=True) + + self.assertEqual(pipe, ['b', 'a']) + + def testSideeffect(self): + l = [] + @worker + def effect(ITEMS, list_): + for i in ITEMS: + list_.append(i) + yield i + + items = [1,2,3] + pipe = items | effect(l) | unique() + pipe |= as_list() + + self.assertEqual(l, items) + + def testChainability(self): + u2 = consumer(lambda items:items|unique()) + + self.assertEqual([1,2,2,3] | u2() | as_list(), [1,2,3]) + +if __name__ == '__main__': + unittest.main() + diff --git a/useless/__init__.py b/useless/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/useless/pipes/__init__.py b/useless/pipes/__init__.py new file mode 100644 index 0000000..f7ced2d --- /dev/null +++ b/useless/pipes/__init__.py @@ -0,0 +1,4 @@ +from pipes import producer, worker, consumer + + +__all__ = ['producer','worker','consumer'] diff --git a/useless/pipes/common.py b/useless/pipes/common.py new file mode 100644 index 0000000..7a6e4a7 --- /dev/null +++ b/useless/pipes/common.py @@ -0,0 +1,103 @@ +from pipes import producer, worker, consumer +from itertools import izip, repeat +from collections import defaultdict + +@worker +def unique(items): + set_ = set() + for item in items: + if item in set_: continue + + set_.add(item) + yield item +unique = unique() + +@consumer +def length(items): + i = 0 + for _ in items: + i += 1 + return i +length = length() + +@worker +def join_each_by(items, char=''): + for item in items: + yield char.join(item) +join_each = join_each_by(char='') + +@consumer +def first(items): + for item in items: + return item +first = first() + +@worker +def group(items): + d = defaultdict(int) + for item in items: + d[item] += 1 + + for key, value in d.iteritems(): + yield (key, value) +group = group() + +@consumer +def sort(items, key=None, reverse=False): + return sorted(items, key=key, reverse=reverse) + +@consumer +def zip_with(*items): + return izip(*items) + +@worker +def echo(items): + for item in items: + yield item +echo = echo() + +@producer +def from_list(items): + for item in items: + yield item + +@worker +def concat(items, worker): + # We want something like items | w, but since we don't have an idea + # of a pipe or chain of workers, w represents always the rightmost + # worker. + # When we do something like items | leftend(w), this breaks the + # chain, because on bind we also copy or 'clone' the worker. Hence: + # we must walk to the left, and rebind every worker up to the right + # [ A -- B -- w + # [ items -- A' -- B' -- w' + + chain = worker.the_chain_to_the_left() + + left = items + while chain: + current_worker = chain.pop() + cloned_worker = left | current_worker + left = cloned_worker + return cloned_worker + + + + +##################################################### + +if __name__ == '__main__': + assert [('a','b'),('c','d')] | join_each | list == ['ab','cd'] + assert [('a','b'),('c','d')] | join_each_by(';') | list == ['a;b','c;d'] + + assert ['a','b'] | zip_with(['c','d']) | list == [('a','c'),('b','d')] + + assert [1,2] | first == 1 + + assert [1,2,3] | length == 3 + + assert [1,2,1] | group | list == [(1,2),(2,1)] + + assert [3,2,1] | sort() | [1,2,3] + + assert [1,2,1] | unique | list == [1,2] \ No newline at end of file diff --git a/useless/pipes/fs.py b/useless/pipes/fs.py new file mode 100644 index 0000000..82300a0 --- /dev/null +++ b/useless/pipes/fs.py @@ -0,0 +1,51 @@ +from pipes import producer, worker, consumer +import os as _os + +@producer +def listdir(path): + for entry in _os.listdir(path): + yield entry + +def listdir_abs(path): + return listdir(path) | join_path(path) + +@worker +def filter_only_dirs(entries): + for entry in entries: + if _os.path.isdir(entry): + yield entry +filter_only_dirs = filter_only_dirs() + +@worker +def filter_only_files(entries): + for entry in entries: + if _os.path.isfile(entry): + yield entry +filter_only_files = filter_only_files() + +@worker +def filter_by_ext(files, extensions): + for file in files: + _, ext = _os.path.splitext(file) + if ext.lower() in extensions: + yield file + +@worker +def join_path(names, root): + for name in names: + yield _os.path.join(root, name) + +@worker +def exists(paths): + for path in paths: + yield _os.path.exists(path) +exists = exists() + +@worker +def get_ext(filenames): + for filename in filenames: + _, ext = _os.path.splitext(filename) + yield ext +get_ext = get_ext() + + diff --git a/useless/pipes/pipes.py b/useless/pipes/pipes.py new file mode 100644 index 0000000..1e2f5e7 --- /dev/null +++ b/useless/pipes/pipes.py @@ -0,0 +1,128 @@ +from itertools import izip, repeat +import copy + +__all__ = ['producer','worker','consumer'] + +def producer(func): + def wrapper(*a, **kw): + def f(): + return func(*a, **kw) + f.__name__ = func.__name__ + return Producer(f) + + return wrapper + +def worker(func): + def wrapper(*a, **kw): + def f(iter): + return func(iter, *a, **kw) + f.__name__ = func.__name__ + return Worker(f) + + return wrapper + +def consumer(func): + def wrapper(*a, **kw): + def f(iter): + return func(iter, *a, **kw) + f.__name__ = func.__name__ + return Consumer(f) + + return wrapper + +class Pipeable(object): + def __init__(self, target): + super(Pipeable, self).__init__() + self.target = target + + def __eq__(self, other): + if isinstance(other, Worker): + if self.target != other.target: + return False + else: + return self.left == other.left \ + if hasattr(self,'left') and hasattr(other,'left') else True + elif isinstance(other, list): + return self | list == other + + def as_list(self): + return list(self) + + def __str__(self): + return str(self.as_list()) + +class HasLeftSide(): + def __call__(self, iter=None): + if not iter and not hasattr(self, 'left'): + raise Exception, "Unbound, hence not callable." + iter = iter or self.left + return self.target(iter) + + def the_chain_to_the_left(self): + chain = [self] + iter = self + while hasattr(iter,'left'): + iter = iter.left + chain.append(iter) + return chain + + def clone(self): + return copy.copy(self) + + def bind(self, left): + if isinstance(left, tuple): + from tupleize import tuple_generator + left = tuple_generator(*left) + + self.left = left + return self + +class HasRightSide(): + def __or__(self, right): + if isinstance(right, tuple): + from tupleize import tupleize + right = tupleize(*right) + + if isinstance(right, HasLeftSide): + return right.__ror__(self) + else: + return right(self) + +class Cacheable(object): + def __init__(self): + super(Cacheable, self).__init__() + self._cache = False + self.cached = False + + @property + def cache(self): + self._cache = True + return self + + def __iter__(self): + if self._cache: + if not self.cached: + self.cached = list(self()) + return iter(self.cached) + return iter(self()) + +class Producer(Pipeable, Cacheable, HasRightSide): + def __call__(self): + return self.target() + +class Worker(Pipeable, Cacheable, HasRightSide, HasLeftSide): + def __ror__(self, left): + if not self._cache: + self = self.clone() + + return self.bind(left) + + def __repr__(self): + return 'Worker for %r' % self.target + +class Consumer(Pipeable, HasLeftSide): + def __ror__(self, left): + self.bind(left) + return self() + + diff --git a/useless/pipes/tupleize.py b/useless/pipes/tupleize.py new file mode 100644 index 0000000..f78232b --- /dev/null +++ b/useless/pipes/tupleize.py @@ -0,0 +1,38 @@ +from itertools import izip, repeat +from useless.pipes import worker, producer + +def _expand_tuple(left, tuple_): + from pipes import Worker + from common import concat + + def find_left_end(iter): + while hasattr(iter, 'left'): + iter = iter.left + return iter + + iters = [] + for it in tuple_: + leftmost = find_left_end(it) + + if isinstance(leftmost, Worker): + # When we have a worker on the left side, then it is an unbound one + # so we automatically bind to the 'left' + iters.append(left | concat(it)) + elif not hasattr(it, '__iter__'): + # If it's not iterable it is a literal value, we repeat + iters.append(repeat(it)) + else: + # We have an iterator we can 'execute' + iters.append(it) + return iters + +@worker +def tupleize(items, *tuple_): + iters = _expand_tuple(items, tuple_) + return izip(*iters) + +@producer +def tuple_generator(*tuple_): + iters = _expand_tuple(None, tuple_) + return izip(*iters) +