From 0f60f511f7a0cd1919dd4c1a841bf01ef362c4bc Mon Sep 17 00:00:00 2001 From: scnerd Date: Wed, 4 Apr 2018 18:01:37 -0400 Subject: [PATCH 1/8] Beginning documentation of lazy dictionary, working on some debugging and making the behavior more reasonable --- README.rst | 1 - docs/source/caching.rst | 45 +++- docs/source/conf.py | 4 +- docs/source/index.rst | 1 - docs/source/pragma.rst | 380 --------------------------- miniutils/__init__.py | 2 +- miniutils/caching.py | 18 +- setup.py | 3 +- stress_tests/test_cached_property.py | 34 +++ 9 files changed, 94 insertions(+), 394 deletions(-) delete mode 100644 docs/source/pragma.rst create mode 100644 stress_tests/test_cached_property.py diff --git a/README.rst b/README.rst index 416941b..fcc01f4 100644 --- a/README.rst +++ b/README.rst @@ -18,7 +18,6 @@ This module provides numerous helper utilities for Python3.X code to add functio - Progress bars on serial loops and parallel mappings (leveraging the excellent ``tqdm`` library) - Simple lazy-compute and caching of class properties, including dependency chaining - Executing Python2 code from within a Python3 program -- In-place modification of functions for common "optimization" tasks - More intuitive contract decorator (leveraging ``pycontracts``) Installation diff --git a/docs/source/caching.rst b/docs/source/caching.rst index db5a0d1..5700737 100644 --- a/docs/source/caching.rst +++ b/docs/source/caching.rst @@ -1,6 +1,9 @@ Property Cache ============== +Basic Property +++++++++++++++ + In some cases, an object has properties that don't need to be computed until necessary, and once computed are generally static and could just be cached. This could be accomplished using the following simple recipe:: class Obj: @@ -100,4 +103,44 @@ This isn't the complete feature set of the decorator, but it's a good initial ta .. autoclass:: miniutils.caching.CachedProperty :members: - .. automethod:: __init__ \ No newline at end of file + .. automethod:: __init__ + +Indexed Property +++++++++++++++++ + +Even using the above tools, it is non-concise to allow indexing into a property where values are lazily computed. + +The ``LazyDictionary`` decorator allows you to write a ``__getitem__`` style property that can be used like a dictionary and has its results cached:: + + class Primes: + @LazyDictionary('is_prime') + def primes_under(self, i): + if i == 0: + return [] + else: + return self.primes_under[i-1] + ([i] if self.is_prime[i] else []) + + @LazyDictionary('primes_under') + def is_prime(self, i): + if not isinstance(i, int) or i < 1: + raise ValueError("Can only check if a positive integer is prime") + elif i in [1, 2]: + return True + elif i % 2 == 0: + return False + else: + return all(i % p != 0 for p in self.primes_under[min(i-1, math.sqrt(i))]) + + p = Primes() + p.is_prime[5] # True, caches the fact that 1, 2, and 3 are prime + p.is_prime[500] # False, caches all primes up to sqrt(500) + p.is_prime[501] # False, virtually instant since it uses the cached primes used to compute is_prime[500] + +The indexing notation is used and preferred to make clear that this decorator only aims to support one hashable argument, and is meant to behave like a dictionary or list. It is not iterable, since the result of that would depend on whatever prior code happened to be executed. Instead, you should iterate through all desired keys, and simply index them; that way, any that need to be re-computed are, and those that can are loaded from cache. + +This plugs cleanly into ``CachedProperty``, accepting a list of properties whose values are invalidated when this dictionary is modified. It also supports allowing or disallowing explicit assignment to certain indices:: + + p.is_prime[3] = False + p.is_prime[9] # This is now True, since there is no lesser known prime + +This is meant to provide a slight additional feature to having a cached dictionary \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index ee5e82d..4db4f15 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -60,9 +60,9 @@ # built documents. # # The short X.Y version. -version = '0.0.1' +version = '1.0' # The full version, including alpha/beta/rc tags. -release = '0.0.1' +release = '1.0.1' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/source/index.rst b/docs/source/index.rst index 983d3c8..dde940d 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -23,7 +23,6 @@ Welcome to miniutils's documentation! progress_bar caching python2 - pragma misc api diff --git a/docs/source/pragma.rst b/docs/source/pragma.rst deleted file mode 100644 index ae7bfc6..0000000 --- a/docs/source/pragma.rst +++ /dev/null @@ -1,380 +0,0 @@ -Pragma -++++++ - -When Python code is being executed abnormally, or being replaced entirely (e.g., by ``numba.jit``), it's sometimes highly relevant how your code is written. However, writing it that way isn't always practical, or you might want the code itself to be dependant on runtime data. In these cases, basic code templating or modification can be useful. This sub-module provides some simple utilities to perform Python code modification at runtime, similar to compiler directives in C. - -These functions are designed as decorators that can be stacked together. Each one modifies the provided function's AST, and then re-compiles the function with identical context to the original. A side effect of accomplishing this means that source code is (optionally) made available for each function, either as a return value (replace the function with a string of its modified source code) or, more usefully, by saving it to a temporary file so that ``inspect.getsource`` works correctly on it. - -Because Python is an interpreted language and functions are first-order objects, it's possible to use these functions to perform runtime-based code "optimization" or "templating". As a simple example of this, let's consider ``numba.cuda.jit``, which imposes numerous ``nopython`` limitations on what your function can do. One such limitation is that a ``numba.cuda`` kernel can't treat functions as first order objects. It must know, at function definition time, which function it's calling. Take the following example:: - - funcs = [lambda x: x, lambda x: x ** 2, lambda x: x ** 3] - - def run_func(i, x): - return funcs[i](x) - -How could we re-define this function such that it both:: - -1) Is dynamic to a list that's constant at function definition-time -2) Doesn't actually index that list in its definition - -We'll start by defining the function as an ``if`` check for the index, and call the appropriate function:: - - funcs = [lambda x: x, lambda x: x ** 2, lambda x: x ** 3] - - def run_func(i, x): - for j in range(len(funcs)): - if i == j: - return funcs[j](x) - -The ``miniutils.pragma`` module enables us to go from here to accomplish our goal above by re-writing a function's AST and re-compiling it as a closure, while making certain modifications to its syntax and environment. While each function will be fully described lower, the example above can be succinctly solved by unrolling the loop (whose length is known at function definition time) and by assigning the elements of the list to individual variables and swapping out their indexed references with de-indexed references:: - - funcs = [lambda x: x, lambda x: x ** 2, lambda x: x ** 3] - - @pragma.deindex(funcs, 'funcs') - @pragma.unroll(lf=len(funcs)) - def run_func(i, x): - for j in range(lf): - if i == j: - return funcs[j](x) - - # ... gets transformed at definition time into the below code ... - - funcs = [lambda x: x, lambda x: x ** 2, lambda x: x ** 3] - funcs_0 = funcs[0] - funcs_1 = funcs[1] - funcs_2 = funcs[2] - - def run_func(i, x): - if i == 0: - return funcs_0(x) - if i == 1: - return funcs_1(x) - if i == 2: - return funcs_2(x) - -Unroll ------- - -Unroll constant loops. If the `for`-loop iterator is a known value at function definition time, then replace it with its body duplicated for each value. For example:: - - def f(): - for i in [1, 2, 4]: - yield i - -could be identically replaced by:: - - def f(): - yield 1 - yield 2 - yield 4 - -The ``unroll`` decorator accomplishes this by parsing the input function, performing the unrolling transformation on the function's AST, then compiling and returning the defined function. - -If using a transformational decorator of some sort, such as ``numba.jit`` or ``tangent.grad``, if that function isn't yet able to unwrap loops like this, then using this function might yield cleaner results on constant-length loops. - -``unroll`` is currently smart enough to notice singly-defined variables and literals, as well as able to unroll the ``range`` function and unroll nested loops:: - - @pragma.unroll - def summation(x=0): - a = [x, x, x] - v = 0 - for _a in a: - v += _a - return v - - # ... Becomes ... - - def summation(x=0): - a = [x, x, x] - v = 0 - v += x - v += x - v += x - return v - - # ... But ... - - @pragma.unroll - def f(): - x = 3 - for i in [x, x, x]: - yield i - x = 4 - a = [x, x, x] - for i in a: - yield i - - # ... Becomes ... - - def f(): - x = 3 - yield 3 - yield 3 - yield 3 - x = 4 - a = [x, x, x] - yield 4 - yield 4 - yield 4 - - # Even nested loops and ranges work! - - @pragma.unroll - def f(): - for i in range(3): - for j in range(3): - yield i + j - - # ... Becomes ... - - def f(): - yield 0 + 0 - yield 0 + 1 - yield 0 + 2 - yield 1 + 0 - yield 1 + 1 - yield 1 + 2 - yield 2 + 0 - yield 2 + 1 - yield 2 + 2 - -You can also request to get the function source code instead of the compiled callable by using ``return_source=True``:: - - In [1]: @pragma.unroll(return_source=True) - ...: def f(): - ...: for i in range(3): - ...: print(i) - ...: - - In [2]: print(f) - def f(): - print(0) - print(1) - print(2) - -It also supports limited recognition of externally and internally defined values:: - - @pragma.unroll(a=range) - def f(): - for b in a(3): - print(b) - - # Is equivalent to: - - a = range - @pragma.unroll - def f(): - for b in a(3): - print(b) - - # Both of which become: - - def f(): - print(0) - print(1) - print(2) - -Also supported are recognizing top-level breaks. Breaks inside conditionals aren't yet supported, though they could eventually be by combining unrolling with literal condition collapsing:: - - @pragma.unroll - def f(y): - for i in range(100000): - for x in range(2): - if i == y: - break - break - - # ... Becomes ... - - def f(y): - for x in range(2): - if 0 == y: - break - - -Currently not-yet-supported features include: - -- Handling constant sets and dictionaries (since the values contained in the AST's, not the AST nodes themselves, must be uniquely identified) -- Tuple assignments (``a, b = 3, 4``) -- Assignment to known lists and dictionaries -- ``zip``, ``reversed``, and other known operators, when performed on definition-time constant iterables -- Resolving compile-time known conditionals before detecting top-level breaks - -.. autofunction:: miniutils.pragma.unroll - -Collapse Literals ------------------ - -Collapse literal operations in code to their results, e.g. ``x = 1 + 2`` gets converted to ``x = 3``. - -For example:: - - @pragma.collapse_literals - def f(y): - x = 3 - return x + 2 + y - - # ... Becomes ... - - def f(y): - x = 3 - return 5 + y - -This is capable of resolving expressions of numerous sorts: - -- A variable with a known value is replaced by that value -- An iterable with known values (such as one that could be unrolled by :func:`miniutils.pragma.unroll`), if indexed, is replaced with the value at that location -- A unary, binary, or logical operation on known values is replaced by the result of that operation on those values -- A `if/elif/else` block is trimmed of options that are known at decoration-time to be impossible. If it can be known which branch runs at decoration time, then the conditional is removed altogether and replaced with the body of that branch - -Currently, this decorator is not robust to runtime branches which may or may not affect certain values. For example:: - - @pragma.collapse_literals - def f(y): - x = 0 - if y: - x = 1 - return x - -Ought to become:: - - def f(y): - x = 0 - if y: - x = 1 - return x # This isn't resolved because it isn't known which branch will be taken - -But currently this will fail and become:: - - def f(y): - x = 0 - if y: - x = 1 - return 1 # Since this was the last value we saw assigned to x - -If the branch is constant, and thus known at decoration time, then this flaw won't affect anything:: - - @pragma.collapse_literals - def f(): - x = 1 - if x > 0: - x = 2 - return x - - # ... Becomes ... - - def f(): - x = 1 - x = 2 - return 2 -.. autofunction:: miniutils.pragma.collapse_literals - -De-index Arrays ---------------- - -Convert literal indexing operations for a given array into named value references. The new value names are de-indexed and stashed in the function's closure so that the resulting code both uses no literal indices and still behaves as if it did. Variable indices are unaffected. - -For example:: - - v = [object(), object(), object()] - - @pragma.deindex(v, 'v') - def f(x): - yield v[0] - yield v[x] - - # ... f becomes ... - - def f(x): - yield v_0 # This is defined as v_0 = v[0] by the function's closure - yield v[x] - - # We can check that this works correctly - assert list(f(2)) == [v[0], v[2]] - -This can be easily stacked with :func:`miniutils.pragma.unroll` to unroll iterables in a function when their values are known at function definition time:: - - funcs = [lambda x: x, lambda x: x ** 2, lambda x: x ** 3] - - @pragma.deindex(funcs, 'funcs') - @pragma.unroll(lf=len(funcs)) - def run_func(i, x): - for j in range(lf): - if i == j: - return funcs[j](x) - - # ... Becomes ... - - def run_func(i, x): - if i == 0: - return funcs_0(x) - if i == 1: - return funcs_1(x) - if i == 2: - return funcs_2(x) - -This could be used, for example, in a case where dynamically calling functions isn't supported, such as in ``numba.jit`` or ``numba.cuda.jit``. - -Note that because the array being de-indexed is passed to the decorator, the value of the constant-defined variables (e.g. ``v_0`` in the code above) is "compiled" into the code of the function, and won't update if the array is updated. Again, variable-indexed calls remain unaffected. - -Since names are (and must) be used as references to the array being de-indexed, it's worth noting that any other local variable of the format ``"{iterable_name}_{i}"`` will get shadowed by this function. The string passed to ``iterable_name`` must be the name used for the iterable within the wrapped function. - -.. autofunction:: miniutils.pragma.deindex - -Inlining Functions ------------------- - -Inline specified functions into the decorated function. Unlike in C, this directive is placed not on the function getting inlined, but rather the function into which it's getting inlined (since that's the one whose code needs to be modified and hence decorated). Currently, this is implemented in the following way: - -- When a function is called, its call code is placed within the current code block immediately before the line where its value is needed -- The code is wrapped in a one-iteration ``for`` loop (effectively a ``do {} while(0)``), and the ``return`` statement is replaced by a ``break`` -- Arguments are stored into a dictionary, and variadic keyword arguments are passed as ``dict_name.update(kwargs)``; this dictionary has the name ``_[funcname]`` where ``funcname`` is the name of the function being inlined, so other variables of this name should not be used or relied upon -- The return value is assigned to the function name as well, deleting the argument dictionary, freeing its memory, and making the return value usable when the function's code is exited by the ``break`` -- The call to the function is replaced by the variable holding the return value - -As a result, ``miniutils.pragma.inline`` cannot currently handle functions which contain a ``return`` statement within a loop. Since Python doesn't support anything like ``goto`` besides wrapping the code in a function (which this function implicitly shouldn't do), I don't know how to surmount this problem. Without much effort, it can be overcome by tailoring the function to be inlined. - -To inline a function ``f`` into the code of another function ``g``, use ``pragma.inline(g)(f)``, or, as a decorator:: - - def f(x): - return x**2 - - @pragma.inline(f) - def g(y): - z = y + 3 - return f(z * 4) - - # ... g Becomes ... - - def g(y): - z = y + 3 - _f = {} - _f['x'] = z * 4 - for ____ in [None]: - _f = _f['x'] ** 2 - break - return _f - -This loop can be removed, if it's not necessary, using :func:``miniutils.pragma.unroll``. This can be accomplished if there are no returns within a conditional or loop block. In this case:: - - def f(x): - return x**2 - - @pragma.unroll - @pragma.inline(f) - def g(y): - z = y + 3 - return f(z * 4) - - # ... g Becomes ... - - def g(y): - z = y + 3 - _f = {} - _f['x'] = z * 4 - _f = _f['x'] ** 2 - return _f - -Eventually, this could be collapsed using :func:``miniutils.pragma.collapse_literals``, to produce simply ``return ((y + 3) * 4) ** 2``, but dictionaries aren't yet supported for collapsing. - -.. autofunction:: miniutils.pragma.inline \ No newline at end of file diff --git a/miniutils/__init__.py b/miniutils/__init__.py index 91dd3d6..9661986 100644 --- a/miniutils/__init__.py +++ b/miniutils/__init__.py @@ -1,4 +1,4 @@ -from .caching import CachedProperty +from .caching import CachedProperty, LazyDictionary from .magic_contract import magic_contract from .opt_decorator import optional_argument_decorator from .progress_bar import progbar, parallel_progbar, iparallel_progbar diff --git a/miniutils/caching.py b/miniutils/caching.py index 503667c..2adaca9 100644 --- a/miniutils/caching.py +++ b/miniutils/caching.py @@ -170,25 +170,31 @@ def inner_setter(inner_self, value): class _LazyIndexable: def __init__(self, getter_closure, on_modified, settable=False, values=None): - self._cache = dict(values or {}) + self._known = dict(values or {}) + self._cache = {} self._closure = getter_closure self._on_modified = on_modified self.settable = settable def __getitem__(self, item): + if item in self._known: + return self._known[item] + if item not in self._cache: self._cache[item] = self._closure(item) - self._on_modified() return self._cache[item] def __setitem__(self, key, value): if not self.settable: raise AttributeError("{} is not settable".format(self)) - self._cache[key] = value + self._known[key] = value self._on_modified() def __delitem__(self, key): - del self._cache[key] + if key in self._known: + del self._known[key] + if key in self._cache: # Not elif, we want to purge all knowledge about this key + del self._cache[key] self._on_modified() @property @@ -198,14 +204,14 @@ def __doc__(self): def update(self, new_values): if not self.settable: raise AttributeError("{} is not settable".format(self)) - self._cache.update(new_values) + self._known.update(new_values) self._on_modified() class LazyDictionary: caches = [] - def __init__(self, *affects, allow_collection_mutation=True): + def __init__(self, *affects, allow_collection_mutation=False): """Marks this indexable property to be a cached dictionary. Delete this property to remove the cached value and force it to be rerun. :param affects: Strings that list the names of the other properties in this class that are directly invalidated diff --git a/setup.py b/setup.py index 630a697..4978c49 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name='miniutils', - version='0.0.2', + version='1.0.1', packages=['miniutils'], url='http://miniutils.readthedocs.io/en/latest/', license='MIT', @@ -14,7 +14,6 @@ 'tqdm', 'pycontracts', 'coloredlogs', - 'astor', ], download_url='https://github.com/scnerd/miniutils', keywords=['miniutils', 'utilities', 'decorators', 'minimal'], diff --git a/stress_tests/test_cached_property.py b/stress_tests/test_cached_property.py new file mode 100644 index 0000000..66ea759 --- /dev/null +++ b/stress_tests/test_cached_property.py @@ -0,0 +1,34 @@ +from unittest import TestCase + +import math +import itertools + +from miniutils.caching import CachedProperty, LazyDictionary +from miniutils.progress_bar import progbar + + +class Primes: + @LazyDictionary() + def primes_under(self, i): + if i == 0: + return [] + else: + return self.primes_under[i-1] + ([i] if self.is_prime[i] else []) + + @LazyDictionary() + def is_prime(self, i): + if not isinstance(i, int) or i < 1: + raise ValueError("Can only check if a positive integer is prime") + elif i in [1, 2]: + return True + elif i % 2 == 0: + return False + else: + return all(i % p != 0 for p in self.primes_under[math.sqrt(i)]) + + +class TestCachedProperty(TestCase): + def test_prime_cache(self): + p = Primes() + primes = [j for j in progbar(range(1, 1000000 + 1)) if p.is_prime[j]] + print(len(primes)) From 94bad9fc687b1499f5e35e0e6bad351acdb3a8c5 Mon Sep 17 00:00:00 2001 From: scnerd Date: Thu, 5 Apr 2018 11:15:40 -0400 Subject: [PATCH 2/8] Improved code coverage and resilience slightly --- miniutils/progress_bar.py | 4 ++-- miniutils/py2_wrap.py | 14 ++++++++++---- tests/test_make_python2.py | 3 +++ 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/miniutils/progress_bar.py b/miniutils/progress_bar.py index c8bbd8a..b197cee 100644 --- a/miniutils/progress_bar.py +++ b/miniutils/progress_bar.py @@ -11,7 +11,7 @@ from IPython import get_ipython if type(get_ipython()).__module__.startswith('ipykernel.'): from tqdm import tqdm_notebook as _tqdm - except (ImportError, NameError): + except (ImportError, NameError): # pragma: nocover # IPython isn't even installed, or we're not in it pass except ImportError: # pragma: nocover @@ -115,7 +115,7 @@ def _parallel_progbar_launch(mapper, iterable, nprocs=None, starmap=False, flatm for p in procs: try: p.join(1) - except (TimeoutError, mp.TimeoutError, TimedOutException): + except (TimeoutError, mp.TimeoutError, TimedOutException): # pragma: nocover warnings.warn("parallel_progbar mapping process failed to close properly (check error output)") diff --git a/miniutils/py2_wrap.py b/miniutils/py2_wrap.py index defe3ce..9eed074 100644 --- a/miniutils/py2_wrap.py +++ b/miniutils/py2_wrap.py @@ -7,6 +7,10 @@ import textwrap +_re_var_name = re.compile(r'^[a-zA-Z_]\w*$', re.UNICODE) +_re_module_name = re.compile(r'^[a-zA-Z_.][\w.]*$', re.UNICODE) + + # TODO: Use fd's besides stdin and stdout, so that you don't mess with code that reads or writes to those streams class MakePython2: pickle_protocol = 2 @@ -31,7 +35,6 @@ def __init__(self, func=None, *, imports=None, global_values=None, copy_function self.python2_path = python2_path self.proc = None - valid_name = re.compile(r'^[\w.]+$', re.UNICODE) if isinstance(self.imports, dict): self.imports = list(self.imports.items()) for i, imp in enumerate(self.imports): @@ -40,12 +43,15 @@ def __init__(self, func=None, *, imports=None, global_values=None, copy_function elif isinstance(imp, (tuple, list)): if len(imp) not in [1, 2]: raise ValueError("Imports must be given as 'name', ('name',), or ('pkg', 'name')") - if not all(isinstance(n, str) and valid_name.match(n) for n in imp): + if not all(isinstance(n, str) and _re_module_name.match(n) for n in imp): raise ValueError("Invalid import name: 'import {}{}'" .format(imp[0], 'as {}'.format(imp[1]) if len(imp) == 2 else '')) - if not all(isinstance(k, str) for k in self.globals.keys()): - raise ValueError("Global variables must be given as {'name': value}") + for k in self.globals.keys(): + if not isinstance(k, str): + raise ValueError("Global variables must be given as {'name': value}") + elif not _re_var_name.match(k): + raise ValueError("Invalid variable name given: '{}'".format(k)) if func: self(func) diff --git a/tests/test_make_python2.py b/tests/test_make_python2.py index 638dd03..9ee38eb 100644 --- a/tests/test_make_python2.py +++ b/tests/test_make_python2.py @@ -52,3 +52,6 @@ def fail(): return True self.assertRaises(ValueError, MakePython2, fail, imports=[('os', 'path', 'exists')]) + self.assertRaises(ValueError, MakePython2, fail, global_values={1: 2}) + self.assertRaises(ValueError, MakePython2, fail, global_values={'123invalid_name': 5}) + self.assertRaises(ValueError, MakePython2, fail, global_values={'invalid name': 5}) From c2b117c871cfb3bcba5182be3cfbcf40115d85d2 Mon Sep 17 00:00:00 2001 From: scnerd Date: Thu, 5 Apr 2018 13:28:02 -0400 Subject: [PATCH 3/8] Increased code coverage to 100% --- miniutils/logs.py | 6 +++--- tests/test_logging.py | 32 +++++++++++++++++++++++++++++--- 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/miniutils/logs.py b/miniutils/logs.py index b70d400..6a666e6 100644 --- a/miniutils/logs.py +++ b/miniutils/logs.py @@ -12,8 +12,8 @@ def enable_logging(log_level='NOTSET', *, logdir=None, use_colors=True, capture_ import sys import logging.handlers - if logdir is not None and not os.path.exists(logdir): - os.makedirs(logdir) + if logdir is not None: + os.makedirs(logdir, exist_ok=True) logs_base.logger = logging.getLogger() for handler in logs_base.logger.handlers: @@ -64,7 +64,7 @@ def format(self, record): return super().format(record) color_formatter = SmarterColorer(fmt=format_str) - except ImportError: + except ImportError: # pragma: nocover color_formatter = plain_formatter else: color_formatter = plain_formatter diff --git a/tests/test_logging.py b/tests/test_logging.py index ffd0482..b4c1313 100644 --- a/tests/test_logging.py +++ b/tests/test_logging.py @@ -1,4 +1,7 @@ import sys +import os +import shutil +import tempfile from unittest import TestCase from miniutils.capture_output import captured_output @@ -29,9 +32,6 @@ def test_logging_imports(self): #self.assertEqual(third.lower(), '__3__') def test_log_dir(self): - import tempfile - import os - with tempfile.TemporaryDirectory() as d: from miniutils.logs import enable_logging log = enable_logging(logdir=d) @@ -44,3 +44,29 @@ def test_log_dir(self): print(">>> {} <<<".format(log_files), file=sys.__stderr__) self.assertIn('TEST', log_files) + def test_log_dir_not_exists(self): + from miniutils.logs import enable_logging + + dir_path = '__test_logs' + assert not os.path.exists(dir_path) + + try: + log = enable_logging(logdir=dir_path) + + assert os.path.exists(dir_path) + assert os.path.isdir(dir_path) + + log.critical('TEST') + + del log + + log_files = [os.path.join(dir_path, f) for f in os.listdir(dir_path)] + log_files = [f for f in log_files if os.path.isfile(f)] + log_files = "\n".join(open(f).read() for f in log_files) + print(">>> {} <<<".format(log_files), file=sys.__stderr__) + self.assertIn('TEST', log_files) + except Exception: + raise + finally: + if os.path.exists(dir_path): + shutil.rmtree(dir_path, ignore_errors=True) From 687c3f3723655b0c6d420e80e08844282370eb1b Mon Sep 17 00:00:00 2001 From: scnerd Date: Thu, 5 Apr 2018 15:39:31 -0400 Subject: [PATCH 4/8] Refined and filled out tests a little more --- miniutils/caching.py | 3 ++- miniutils/py2_wrap.py | 2 +- tests/test_cached_property.py | 6 ++++-- tests/test_logging.py | 14 ++++++++++---- tests/test_make_python2.py | 6 ++++++ 5 files changed, 23 insertions(+), 8 deletions(-) diff --git a/miniutils/caching.py b/miniutils/caching.py index 2adaca9..fcc2123 100644 --- a/miniutils/caching.py +++ b/miniutils/caching.py @@ -188,7 +188,8 @@ def __setitem__(self, key, value): if not self.settable: raise AttributeError("{} is not settable".format(self)) self._known[key] = value - self._on_modified() + if key in self._cache and self._cache[key] is not value: + self._on_modified() def __delitem__(self, key): if key in self._known: diff --git a/miniutils/py2_wrap.py b/miniutils/py2_wrap.py index 9eed074..98622a9 100644 --- a/miniutils/py2_wrap.py +++ b/miniutils/py2_wrap.py @@ -87,7 +87,7 @@ def __call__(self, func): function_code = '' function_name = func else: - raise AttributeError("MakePython2 must be given either a function or an expression string to execute") + raise TypeError("MakePython2 must be given either a function or an expression string to execute") self.proc = sp.Popen([self.python2_path, MakePython2.template], executable=self.python2_path, stdin=sp.PIPE, stdout=sp.PIPE) diff --git a/tests/test_cached_property.py b/tests/test_cached_property.py index a474af3..c8620be 100644 --- a/tests/test_cached_property.py +++ b/tests/test_cached_property.py @@ -292,6 +292,9 @@ def test_cached_dict(self): w.f[2] = 7 self.assertEqual(w.b, 8) self.assertEqual(w.a, 10) + del w.f[2] + self.assertEqual(w.b, 5) + self.assertEqual(w.a, 7) w.f.update({1: 0, 2: 0}) self.assertEqual(w.b, 0) self.assertEqual(w.a, 2) @@ -304,7 +307,6 @@ def test_cached_dict(self): except AttributeError: pass - self.assertListEqual(w.calls, ['a', 'b', 'f(1)', 'f(2)', 'b', 'f(5)', 'f(4)', 'f(5)', 'f(2)', 'a', 'b', 'f(1)', - 'b', 'a', 'b', 'a', 'g(3)']) + self.assertListEqual(w.calls, 'a b f(1) f(2) b f(5) f(4) f(5) f(2) a b f(1) b a b f(2) a b a g(3)'.split()) self.assertIn('G docstring', w.g.__doc__) diff --git a/tests/test_logging.py b/tests/test_logging.py index b4c1313..ef89161 100644 --- a/tests/test_logging.py +++ b/tests/test_logging.py @@ -45,9 +45,9 @@ def test_log_dir(self): self.assertIn('TEST', log_files) def test_log_dir_not_exists(self): - from miniutils.logs import enable_logging + from miniutils.logs import enable_logging, disable_logging - dir_path = '__test_logs' + dir_path = '.test_logs' assert not os.path.exists(dir_path) try: @@ -58,7 +58,13 @@ def test_log_dir_not_exists(self): log.critical('TEST') + for handler in log.handlers: + import logging + if isinstance(handler, logging.FileHandler): + handler.close() + log.removeHandler(handler) del log + disable_logging() log_files = [os.path.join(dir_path, f) for f in os.listdir(dir_path)] log_files = [f for f in log_files if os.path.isfile(f)] @@ -68,5 +74,5 @@ def test_log_dir_not_exists(self): except Exception: raise finally: - if os.path.exists(dir_path): - shutil.rmtree(dir_path, ignore_errors=True) + print("DELETING TEMP LOG DIR '{}'".format(dir_path), file=sys.__stderr__) + shutil.rmtree(dir_path) diff --git a/tests/test_make_python2.py b/tests/test_make_python2.py index 9ee38eb..9033c84 100644 --- a/tests/test_make_python2.py +++ b/tests/test_make_python2.py @@ -52,6 +52,12 @@ def fail(): return True self.assertRaises(ValueError, MakePython2, fail, imports=[('os', 'path', 'exists')]) + self.assertRaises(ValueError, MakePython2, fail, imports={1: 2}) + self.assertRaises(ValueError, MakePython2, fail, imports={'123invalid_name': 5}) + self.assertRaises(ValueError, MakePython2, fail, imports={'invalid name': 5}) self.assertRaises(ValueError, MakePython2, fail, global_values={1: 2}) self.assertRaises(ValueError, MakePython2, fail, global_values={'123invalid_name': 5}) self.assertRaises(ValueError, MakePython2, fail, global_values={'invalid name': 5}) + + def test_bad_func(self): + self.assertRaises(TypeError, MakePython2, object()) From c776cafbb4303d76ef0d889e0a88f00dcda1463d Mon Sep 17 00:00:00 2001 From: scnerd Date: Thu, 5 Apr 2018 17:13:29 -0400 Subject: [PATCH 5/8] Improved lazy dictionary behavior to include caching KeyErrors and adding a .get function --- docs/source/caching.rst | 12 ++------ miniutils/caching.py | 19 ++++++++++++- stress_tests/test_cached_property.py | 42 ++++++++++++++++++++-------- tests/test_cached_property.py | 15 ++++++++++ 4 files changed, 66 insertions(+), 22 deletions(-) diff --git a/docs/source/caching.rst b/docs/source/caching.rst index 5700737..c6dfc9b 100644 --- a/docs/source/caching.rst +++ b/docs/source/caching.rst @@ -113,14 +113,7 @@ Even using the above tools, it is non-concise to allow indexing into a property The ``LazyDictionary`` decorator allows you to write a ``__getitem__`` style property that can be used like a dictionary and has its results cached:: class Primes: - @LazyDictionary('is_prime') - def primes_under(self, i): - if i == 0: - return [] - else: - return self.primes_under[i-1] + ([i] if self.is_prime[i] else []) - - @LazyDictionary('primes_under') + @LazyDictionary() def is_prime(self, i): if not isinstance(i, int) or i < 1: raise ValueError("Can only check if a positive integer is prime") @@ -129,7 +122,7 @@ The ``LazyDictionary`` decorator allows you to write a ``__getitem__`` style pro elif i % 2 == 0: return False else: - return all(i % p != 0 for p in self.primes_under[min(i-1, math.sqrt(i))]) + return all(i % p != 0 for p in range(3, int(math.sqrt(i)) + 1, 2) if self.is_prime[p]) p = Primes() p.is_prime[5] # True, caches the fact that 1, 2, and 3 are prime @@ -140,6 +133,7 @@ The indexing notation is used and preferred to make clear that this decorator on This plugs cleanly into ``CachedProperty``, accepting a list of properties whose values are invalidated when this dictionary is modified. It also supports allowing or disallowing explicit assignment to certain indices:: + p = Primes() p.is_prime[3] = False p.is_prime[9] # This is now True, since there is no lesser known prime diff --git a/miniutils/caching.py b/miniutils/caching.py index fcc2123..e9f9b0d 100644 --- a/miniutils/caching.py +++ b/miniutils/caching.py @@ -172,6 +172,7 @@ class _LazyIndexable: def __init__(self, getter_closure, on_modified, settable=False, values=None): self._known = dict(values or {}) self._cache = {} + self._key_errors = {} self._closure = getter_closure self._on_modified = on_modified self.settable = settable @@ -180,8 +181,16 @@ def __getitem__(self, item): if item in self._known: return self._known[item] + if item in self._key_errors: + raise KeyError(*self._key_errors[item]) + if item not in self._cache: - self._cache[item] = self._closure(item) + try: + self._cache[item] = self._closure(item) + except KeyError as e: + self._key_errors[item] = e.args + raise e + return self._cache[item] def __setitem__(self, key, value): @@ -196,12 +205,20 @@ def __delitem__(self, key): del self._known[key] if key in self._cache: # Not elif, we want to purge all knowledge about this key del self._cache[key] + if key in self._key_errors: + del self._key_errors[key] self._on_modified() @property def __doc__(self): return self._closure.__doc__ + def get(self, key, default): + try: + return self[key] + except KeyError: + return default + def update(self, new_values): if not self.settable: raise AttributeError("{} is not settable".format(self)) diff --git a/stress_tests/test_cached_property.py b/stress_tests/test_cached_property.py index 66ea759..ab9d1fc 100644 --- a/stress_tests/test_cached_property.py +++ b/stress_tests/test_cached_property.py @@ -1,20 +1,12 @@ from unittest import TestCase import math -import itertools from miniutils.caching import CachedProperty, LazyDictionary from miniutils.progress_bar import progbar class Primes: - @LazyDictionary() - def primes_under(self, i): - if i == 0: - return [] - else: - return self.primes_under[i-1] + ([i] if self.is_prime[i] else []) - @LazyDictionary() def is_prime(self, i): if not isinstance(i, int) or i < 1: @@ -24,11 +16,37 @@ def is_prime(self, i): elif i % 2 == 0: return False else: - return all(i % p != 0 for p in self.primes_under[math.sqrt(i)]) + return all(i % p != 0 for p in range(3, int(math.sqrt(i)) + 1, 2) if self.is_prime[p]) + + +class PropertyComparison: + @property + def x(self): + return 5 + + @CachedProperty() + def y(self): + return 5 class TestCachedProperty(TestCase): - def test_prime_cache(self): + def test_prime_correctness(self): p = Primes() - primes = [j for j in progbar(range(1, 1000000 + 1)) if p.is_prime[j]] - print(len(primes)) + n = 1000000 + print("Computing number of primes under {}".format(n)) + self.assertEqual(sum(1 for i in progbar(range(2, n)) if p.is_prime[i]), 78498) + + def test_cache_speed(self): + p = PropertyComparison() + self.assertEqual(p.x, 5) + self.assertEqual(p.y, 5) + + from miniutils.timing import tic + n = 1000000 + toc = tic() + for _ in range(n): + self.assertEqual(p.x, 5) + toc("Time required to access a simple property {} times".format(n)) + for _ in range(n): + self.assertEqual(p.y, 5) + toc("Time required to access a cached property {} times".format(n)) diff --git a/tests/test_cached_property.py b/tests/test_cached_property.py index c8620be..05b31ca 100644 --- a/tests/test_cached_property.py +++ b/tests/test_cached_property.py @@ -130,6 +130,13 @@ def g(self, x): self.calls.append('g({})'.format(x)) return x ** 2 + @LazyDictionary() + def ex(self, x): + if x % 2: + raise KeyError("Odd numbers not allowed") + else: + return x // 2 + class TestCachedProperty(TestCase): def test_matrix(self): @@ -310,3 +317,11 @@ def test_cached_dict(self): self.assertListEqual(w.calls, 'a b f(1) f(2) b f(5) f(4) f(5) f(2) a b f(1) b a b f(2) a b a g(3)'.split()) self.assertIn('G docstring', w.g.__doc__) + + def test_cached_dict_errors(self): + w = WithCachedDict() + self.assertEqual(w.ex[2], 1) + self.assertRaisesRegex(KeyError, 'Odd', lambda: w.ex[3]) + self.assertRaisesRegex(KeyError, 'Odd', lambda: w.ex[3]) + self.assertEqual(w.ex.get(3, 1), 1) + From 5c8f4b6886608faa82bd19be23ab1c5295f21129 Mon Sep 17 00:00:00 2001 From: scnerd Date: Thu, 5 Apr 2018 17:34:08 -0400 Subject: [PATCH 6/8] Minor addition to code coverage --- tests/test_cached_property.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/test_cached_property.py b/tests/test_cached_property.py index 05b31ca..9c3acd4 100644 --- a/tests/test_cached_property.py +++ b/tests/test_cached_property.py @@ -324,4 +324,9 @@ def test_cached_dict_errors(self): self.assertRaisesRegex(KeyError, 'Odd', lambda: w.ex[3]) self.assertRaisesRegex(KeyError, 'Odd', lambda: w.ex[3]) self.assertEqual(w.ex.get(3, 1), 1) + w.ex[3] = 2 + self.assertEqual(w.ex.get(3, 1), 2) + del w.ex[3] + self.assertRaisesRegex(KeyError, 'Odd', lambda: w.ex[3]) + From 33e539e639393bd2f54907053aba9e6165df0730 Mon Sep 17 00:00:00 2001 From: scnerd Date: Thu, 5 Apr 2018 17:45:12 -0400 Subject: [PATCH 7/8] Minor fix to tests --- tests/test_cached_property.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_cached_property.py b/tests/test_cached_property.py index 9c3acd4..9f3601c 100644 --- a/tests/test_cached_property.py +++ b/tests/test_cached_property.py @@ -130,7 +130,7 @@ def g(self, x): self.calls.append('g({})'.format(x)) return x ** 2 - @LazyDictionary() + @LazyDictionary(allow_collection_mutation=True) def ex(self, x): if x % 2: raise KeyError("Odd numbers not allowed") From 05ed15d74c23227704eb5fbe43cdcc63a6628155 Mon Sep 17 00:00:00 2001 From: scnerd Date: Fri, 6 Apr 2018 12:11:07 -0400 Subject: [PATCH 8/8] Added a little more documentation --- docs/source/caching.rst | 12 +++++++++--- miniutils/caching.py | 8 ++++---- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/docs/source/caching.rst b/docs/source/caching.rst index c6dfc9b..98743d1 100644 --- a/docs/source/caching.rst +++ b/docs/source/caching.rst @@ -103,8 +103,6 @@ This isn't the complete feature set of the decorator, but it's a good initial ta .. autoclass:: miniutils.caching.CachedProperty :members: - .. automethod:: __init__ - Indexed Property ++++++++++++++++ @@ -137,4 +135,12 @@ This plugs cleanly into ``CachedProperty``, accepting a list of properties whose p.is_prime[3] = False p.is_prime[9] # This is now True, since there is no lesser known prime -This is meant to provide a slight additional feature to having a cached dictionary \ No newline at end of file +This is meant to provide a slight additional feature to having a cached dictionary, though honestly it's probably a very small improvement over ``self.is_prime = defaultdict(self._is_prime)``, since it has the additions of invalidating cached properties and making values dependant on their indices. + +Values can be explicitly assigned to indices (if ``allow_collection_mutation=True``); assigned values override cached values. Raised ``KeyError``s are cached to prevent re-running indices where failure is known. If an error is not due solely to the index, raise some other error to allow that index to be retried later if some variation to the program's state might allow it to succeed. ``.get(key, default)`` and ``.update(dict)`` are also provided to offer a more dictionary-like interface. A particular object instance will have a :class:`miniutils.caching._LazyDictionary` instance which provides its caching, though the decorated function is once again replaced with a simple ``@property``. + +.. autoclass:: miniutils.caching.LazyDictionary + :members: + +.. autoclass:: miniutils.caching._LazyDictionary + :members: \ No newline at end of file diff --git a/miniutils/caching.py b/miniutils/caching.py index e9f9b0d..e4c3e91 100644 --- a/miniutils/caching.py +++ b/miniutils/caching.py @@ -168,7 +168,7 @@ def inner_setter(inner_self, value): return property(fget=inner_getter, fset=inner_setter, fdel=inner_deleter, doc=self.f.__doc__) -class _LazyIndexable: +class _LazyDictionary: def __init__(self, getter_closure, on_modified, settable=False, values=None): self._known = dict(values or {}) self._cache = {} @@ -251,9 +251,9 @@ def reset_dependents(inner_self): @functools.wraps(f) def inner_getter(inner_self): if not hasattr(inner_self, cache_name): - new_indexable = _LazyIndexable(functools.wraps(f)(partial(f, inner_self)), - partial(reset_dependents, inner_self), - self.allow_mutation) + new_indexable = _LazyDictionary(functools.wraps(f)(partial(f, inner_self)), + partial(reset_dependents, inner_self), + self.allow_mutation) setattr(inner_self, cache_name, new_indexable) return getattr(inner_self, cache_name)