From 994ac1c98fc0663004530029db8e56066ffecd9f Mon Sep 17 00:00:00 2001 From: Jirka Date: Fri, 23 Feb 2024 10:45:45 +0100 Subject: [PATCH 1/8] debug wrong results for runtime func --- tests/test_general.py | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/tests/test_general.py b/tests/test_general.py index 6b0a071b..f15292f4 100644 --- a/tests/test_general.py +++ b/tests/test_general.py @@ -368,28 +368,33 @@ def dummy_func(a, b): def test_partial_handling(tmpdir): - count = 0 + count_p = count_m = 0 - def dummy_func(a, b=2): - nonlocal count - count += 1 + def fn_plus(a, b=2): + nonlocal count_p + count_p += 1 return a + b - cachier_ = cachier.cachier(cache_dir=tmpdir) - assert count == 0 + def fn_minus(a, b=2): + nonlocal count_m + count_m += 1 + return a - b - dummy_ = functools.partial(dummy_func, 1) - cachier_(dummy_)() + cachier_ = cachier.cachier(cache_dir=tmpdir) + assert count_p == count_m == 0 - dummy_ = functools.partial(dummy_func, a=1) - cachier_(dummy_)() + for fn, expected in [(fn_plus, 3), (fn_minus, -1)]: + dummy_ = functools.partial(fn, 1) + assert cachier_(dummy_)() == expected - dummy_ = functools.partial(dummy_func, b=2) - cachier_(dummy_)(1) + dummy_ = functools.partial(fn, a=1) + assert cachier_(dummy_)() == expected - assert count == 1 + dummy_ = functools.partial(fn, b=2) + assert cachier_(dummy_)(1) == expected - cachier_(dummy_func)(1, 2) - cachier_(dummy_func)(a=1, b=2) + assert cachier_(fn)(1, 2) == expected + assert cachier_(fn)(a=1, b=2) == expected - assert count == 1 + assert count_p == 1 + assert count_m == 1 From fb7417e5884987b2cefbde012316c2bda9406ad5 Mon Sep 17 00:00:00 2001 From: Jirka Date: Fri, 23 Feb 2024 10:55:45 +0100 Subject: [PATCH 2/8] more tests --- tests/test_general.py | 48 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/tests/test_general.py b/tests/test_general.py index f15292f4..3db7f045 100644 --- a/tests/test_general.py +++ b/tests/test_general.py @@ -352,19 +352,51 @@ def dummy_func(a, b=2): assert count == 1 +def test_diff_functions_same_args(tmpdir): + count_p = count_m = 0 + + @cachier.cachier(cache_dir=tmpdir) + def fn_plus(a, b=2): + nonlocal count_p + count_p += 1 + return a + b + + @cachier.cachier(cache_dir=tmpdir) + def fn_minus(a, b=2): + nonlocal count_m + count_m += 1 + return a - b + + assert count_p == count_m == 0 + + for fn, expected in [(fn_plus, 3), (fn_minus, -1)]: + assert fn(1) == expected + assert fn(a=1, b=2) == expected + assert count_p == 1 + assert count_m == 1 + + def test_runtime_handling(tmpdir): - count = 0 + count_p = count_m = 0 - def dummy_func(a, b): - nonlocal count - count += 1 + def fn_plus(a, b=2): + nonlocal count_p + count_p += 1 return a + b + def fn_minus(a, b=2): + nonlocal count_m + count_m += 1 + return a - b + cachier_ = cachier.cachier(cache_dir=tmpdir) - assert count == 0 - cachier_(dummy_func)(a=1, b=2) - cachier_(dummy_func)(a=1, b=2) - assert count == 1 + assert count_p == count_m == 0 + + for fn, expected in [(fn_plus, 3), (fn_minus, -1)]: + assert cachier_(fn)(1, 2) == expected + assert cachier_(fn)(a=1, b=2) == expected + assert count_p == 1 + assert count_m == 1 def test_partial_handling(tmpdir): From 8fd48b296a82d8db36b89220e80952381a7ab399 Mon Sep 17 00:00:00 2001 From: Jirka Date: Fri, 23 Feb 2024 12:19:28 +0100 Subject: [PATCH 3/8] tests --- tests/test_general.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test_general.py b/tests/test_general.py index 3db7f045..2594464a 100644 --- a/tests/test_general.py +++ b/tests/test_general.py @@ -398,6 +398,12 @@ def fn_minus(a, b=2): assert count_p == 1 assert count_m == 1 + for fn, expected in [(fn_plus, 5), (fn_minus, 1)]: + assert cachier_(fn)(3, 2) == expected + assert cachier_(fn)(a=3, b=2) == expected + assert count_p == 2 + assert count_m == 2 + def test_partial_handling(tmpdir): count_p = count_m = 0 From 49df59ff81e51502e1f282164bafd5271ce22a36 Mon Sep 17 00:00:00 2001 From: Jirka Date: Fri, 23 Feb 2024 13:02:00 +0100 Subject: [PATCH 4/8] _ --- cachier/core.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cachier/core.py b/cachier/core.py index 6bd0d871..987e35c5 100644 --- a/cachier/core.py +++ b/cachier/core.py @@ -268,19 +268,19 @@ def func_wrapper(*args, **kwds): _print("No entry found. No current calc. Calling like a boss.") return _calc_entry(core, key, func, args, kwds) - def clear_cache(): + def _clear_cache(): """Clear the cache.""" core.clear_cache() - def clear_being_calculated(): + def _clear_being_calculated(): """Marks all entries in this cache as not being calculated.""" core.clear_being_calculated() - def cache_dpath(): + def _cache_dpath(): """Returns the path to the cache dir, if exists; None if not.""" return getattr(core, "cache_dir", None) - def precache_value(*args, value_to_cache, **kwds): + def _precache_value(*args, value_to_cache, **kwds): """Add an initial value to the cache. Arguments @@ -295,10 +295,10 @@ def precache_value(*args, value_to_cache, **kwds): ) return core.precache_value(tuple(), kwargs, value_to_cache) - func_wrapper.clear_cache = clear_cache - func_wrapper.clear_being_calculated = clear_being_calculated - func_wrapper.cache_dpath = cache_dpath - func_wrapper.precache_value = precache_value + func_wrapper.clear_cache = _clear_cache + func_wrapper.clear_being_calculated = _clear_being_calculated + func_wrapper.cache_dpath = _cache_dpath + func_wrapper.precache_value = _precache_value return func_wrapper return _cachier_decorator From b26ba991e58c71626da0662dd54cc12230b6fa4c Mon Sep 17 00:00:00 2001 From: Jirka Date: Fri, 23 Feb 2024 20:17:49 +0100 Subject: [PATCH 5/8] fixing & tests --- cachier/cores/base.py | 5 ++ cachier/cores/mongo.py | 20 +++---- cachier/cores/pickle.py | 115 +++++++++++++++++++--------------------- tests/test_general.py | 12 +++-- 4 files changed, 74 insertions(+), 78 deletions(-) diff --git a/cachier/cores/base.py b/cachier/cores/base.py index 0bcbd95d..644ba630 100644 --- a/cachier/cores/base.py +++ b/cachier/cores/base.py @@ -9,6 +9,7 @@ import abc # for the _BaseCore abstract base class import inspect import threading +from typing import Callable from .._types import HashFunc from ..config import _update_with_defaults @@ -18,6 +19,10 @@ class RecalculationNeeded(Exception): pass +def _get_func_str(func: Callable) -> str: + return f".{func.__module__}.{func.__name__}" + + class _BaseCore: __metaclass__ = abc.ABCMeta diff --git a/cachier/cores/mongo.py b/cachier/cores/mongo.py index 09cc36b1..d9833bc9 100644 --- a/cachier/cores/mongo.py +++ b/cachier/cores/mongo.py @@ -21,7 +21,7 @@ from pymongo import ASCENDING, IndexModel from pymongo.errors import OperationFailure -from .base import RecalculationNeeded, _BaseCore +from .base import RecalculationNeeded, _BaseCore, _get_func_str MONGO_SLEEP_DURATION_IN_SEC = 1 @@ -60,13 +60,9 @@ def __init__( ) self.mongo_collection.create_indexes([func1key1]) - @staticmethod - def _get_func_str(func): - return f".{func.__module__}.{func.__name__}" - def get_entry_by_key(self, key): res = self.mongo_collection.find_one( - {"func": _MongoCore._get_func_str(self.func), "key": key} + {"func": _get_func_str(self.func), "key": key} ) if not res: return key, None @@ -89,10 +85,10 @@ def get_entry_by_key(self, key): def set_entry(self, key, func_res): thebytes = pickle.dumps(func_res) self.mongo_collection.update_one( - filter={"func": _MongoCore._get_func_str(self.func), "key": key}, + filter={"func": _get_func_str(self.func), "key": key}, update={ "$set": { - "func": _MongoCore._get_func_str(self.func), + "func": _get_func_str(self.func), "key": key, "value": Binary(thebytes), "time": datetime.now(), @@ -105,7 +101,7 @@ def set_entry(self, key, func_res): def mark_entry_being_calculated(self, key): self.mongo_collection.update_one( - filter={"func": _MongoCore._get_func_str(self.func), "key": key}, + filter={"func": _get_func_str(self.func), "key": key}, update={"$set": {"being_calculated": True}}, upsert=True, ) @@ -114,7 +110,7 @@ def mark_entry_not_calculated(self, key): with suppress(OperationFailure): # don't care in this case self.mongo_collection.update_one( filter={ - "func": _MongoCore._get_func_str(self.func), + "func": _get_func_str(self.func), "key": key, }, update={"$set": {"being_calculated": False}}, @@ -135,13 +131,13 @@ def wait_on_entry_calc(self, key): def clear_cache(self): self.mongo_collection.delete_many( - filter={"func": _MongoCore._get_func_str(self.func)} + filter={"func": _get_func_str(self.func)} ) def clear_being_calculated(self): self.mongo_collection.update_many( filter={ - "func": _MongoCore._get_func_str(self.func), + "func": _get_func_str(self.func), "being_calculated": True, }, update={"$set": {"being_calculated": False}}, diff --git a/cachier/cores/pickle.py b/cachier/cores/pickle.py index 4180f934..90158417 100644 --- a/cachier/cores/pickle.py +++ b/cachier/cores/pickle.py @@ -86,35 +86,27 @@ def __init__( self.separate_files = _update_with_defaults( separate_files, "separate_files" ) - self.cache_fname = None - self.cache_fpath = None - - def _cache_fname(self): - if self.cache_fname is None: - fname = f".{self.func.__module__}.{self.func.__qualname__}" - self.cache_fname = fname.replace("<", "_").replace(">", "_") - return self.cache_fname - - def _cache_fpath(self): - if self.cache_fpath is None: - os.makedirs(self.cache_dir, exist_ok=True) - self.cache_fpath = os.path.abspath( - os.path.join( - os.path.realpath(self.cache_dir), self._cache_fname() - ) - ) - return self.cache_fpath + + @property + def cache_fname(self): + fname = f".{self.func.__module__}.{self.func.__qualname__}" + return fname.replace("<", "_").replace(">", "_") + + @property + def cache_fpath(self): + os.makedirs(self.cache_dir, exist_ok=True) + return os.path.abspath( + os.path.join(os.path.realpath(self.cache_dir), self.cache_fname) + ) def _reload_cache(self): with self.lock: - fpath = self._cache_fpath() try: - with portalocker.Lock(fpath, mode="rb") as cache_file: - try: - self.cache = pickle.load(cache_file) # noqa: S301 - except EOFError: - self.cache = {} - except FileNotFoundError: + with portalocker.Lock( + self.cache_fpath, mode="rb" + ) as cache_file: + self.cache = pickle.load(cache_file) # noqa: S301 + except (FileNotFoundError, EOFError): self.cache = {} def _get_cache(self): @@ -124,7 +116,7 @@ def _get_cache(self): return self.cache def _get_cache_by_key(self, key=None, hash=None): - fpath = self._cache_fpath() + fpath = self.cache_fpath fpath += f"_{key}" if hash is None else f"_{hash}" try: with portalocker.Lock(fpath, mode="rb") as cache_file: @@ -133,15 +125,13 @@ def _get_cache_by_key(self, key=None, hash=None): return None def _clear_all_cache_files(self): - fpath = self._cache_fpath() - path, name = os.path.split(fpath) + path, name = os.path.split(self.cache_fpath) for subpath in os.listdir(path): if subpath.startswith(f"{name}_"): os.remove(os.path.join(path, subpath)) def _clear_being_calculated_all_cache_files(self): - fpath = self._cache_fpath() - path, name = os.path.split(fpath) + path, name = os.path.split(self.cache_fpath) for subpath in os.listdir(path): if subpath.startswith(name): entry = self._get_cache_by_key(hash=subpath.split("_")[-1]) @@ -150,13 +140,13 @@ def _clear_being_calculated_all_cache_files(self): self._save_cache(entry, hash=subpath.split("_")[-1]) def _save_cache(self, cache, key=None, hash=None): + fpath = self.cache_fpath + if key is not None: + fpath += f"_{key}" + elif hash is not None: + fpath += f"_{hash}" with self.lock: self.cache = cache - fpath = self._cache_fpath() - if key is not None: - fpath += f"_{key}" - elif hash is not None: - fpath += f"_{hash}" with portalocker.Lock(fpath, mode="wb") as cache_file: pickle.dump(cache, cache_file, protocol=4) if key is None: @@ -179,11 +169,12 @@ def set_entry(self, key, func_res): } if self.separate_files: self._save_cache(key_data, key) - else: - with self.lock: - cache = self._get_cache() - cache[key] = key_data - self._save_cache(cache) + return # pragma: no cover + + with self.lock: + cache = self._get_cache() + cache[key] = key_data + self._save_cache(cache) def mark_entry_being_calculated_separate_files(self, key): self._save_cache( @@ -204,19 +195,20 @@ def mark_entry_not_calculated_separate_files(self, key): def mark_entry_being_calculated(self, key): if self.separate_files: self.mark_entry_being_calculated_separate_files(key) - else: - with self.lock: - cache = self._get_cache() - try: - cache[key]["being_calculated"] = True - except KeyError: - cache[key] = { - "value": None, - "time": datetime.now(), - "stale": False, - "being_calculated": True, - } - self._save_cache(cache) + return # pragma: no cover + + with self.lock: + cache = self._get_cache() + try: + cache[key]["being_calculated"] = True + except KeyError: + cache[key] = { + "value": None, + "time": datetime.now(), + "stale": False, + "being_calculated": True, + } + self._save_cache(cache) def mark_entry_not_calculated(self, key): if self.separate_files: @@ -231,12 +223,12 @@ def mark_entry_not_calculated(self, key): def wait_on_entry_calc(self, key): if self.separate_files: entry = self._get_cache_by_key(key) - filename = f"{self._cache_fname()}_{key}" + filename = f"{self.cache_fname}_{key}" else: with self.lock: self._reload_cache() entry = self._get_cache()[key] - filename = self._cache_fname() + filename = self.cache_fname if not entry["being_calculated"]: return entry["value"] event_handler = _PickleCore.CacheChangeHandler( @@ -262,9 +254,10 @@ def clear_cache(self): def clear_being_calculated(self): if self.separate_files: self._clear_being_calculated_all_cache_files() - else: - with self.lock: - cache = self._get_cache() - for key in cache: - cache[key]["being_calculated"] = False - self._save_cache(cache) + return # pragma: no cover + + with self.lock: + cache = self._get_cache() + for key in cache: + cache[key]["being_calculated"] = False + self._save_cache(cache) diff --git a/tests/test_general.py b/tests/test_general.py index 2594464a..6339ef0a 100644 --- a/tests/test_general.py +++ b/tests/test_general.py @@ -352,16 +352,17 @@ def dummy_func(a, b=2): assert count == 1 -def test_diff_functions_same_args(tmpdir): +@pytest.mark.parametrize("backend", ["memory", "pickle"]) +def test_diff_functions_same_args(tmpdir, backend: str): count_p = count_m = 0 - @cachier.cachier(cache_dir=tmpdir) + @cachier.cachier(cache_dir=tmpdir, backend=backend) def fn_plus(a, b=2): nonlocal count_p count_p += 1 return a + b - @cachier.cachier(cache_dir=tmpdir) + @cachier.cachier(cache_dir=tmpdir, backend=backend) def fn_minus(a, b=2): nonlocal count_m count_m += 1 @@ -376,7 +377,8 @@ def fn_minus(a, b=2): assert count_m == 1 -def test_runtime_handling(tmpdir): +@pytest.mark.parametrize("backend", ["memory", "pickle"]) +def test_runtime_handling(tmpdir, backend): count_p = count_m = 0 def fn_plus(a, b=2): @@ -389,7 +391,7 @@ def fn_minus(a, b=2): count_m += 1 return a - b - cachier_ = cachier.cachier(cache_dir=tmpdir) + cachier_ = cachier.cachier(cache_dir=tmpdir, backend=backend) assert count_p == count_m == 0 for fn, expected in [(fn_plus, 3), (fn_minus, -1)]: From 126c70c33a4e0bc157862f2a517c35ebf5cfa5f0 Mon Sep 17 00:00:00 2001 From: Jirka Date: Fri, 23 Feb 2024 20:45:07 +0100 Subject: [PATCH 6/8] memory --- cachier/cores/memory.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/cachier/cores/memory.py b/cachier/cores/memory.py index de87da00..00e94818 100644 --- a/cachier/cores/memory.py +++ b/cachier/cores/memory.py @@ -4,7 +4,7 @@ from datetime import datetime from .._types import HashFunc -from .base import _BaseCore +from .base import _BaseCore, _get_func_str class _MemoryCore(_BaseCore): @@ -14,9 +14,13 @@ def __init__(self, hash_func: HashFunc, wait_for_calc_timeout: int): super().__init__(hash_func, wait_for_calc_timeout) self.cache = {} + def _hash_func_key(self, key): + return f"{_get_func_str(self.func)}:{key}" + def get_entry_by_key(self, key, reload=False): + with self.lock: - return key, self.cache.get(key, None) + return key, self.cache.get(self._hash_func_key(key), None) def set_entry(self, key, func_res): with self.lock: @@ -24,10 +28,10 @@ def set_entry(self, key, func_res): # we need to retain the existing condition so that # mark_entry_not_calculated can notify all possibly-waiting # threads about it - cond = self.cache[key]["condition"] + cond = self.cache[self._hash_func_key(key)]["condition"] except KeyError: # pragma: no cover cond = None - self.cache[key] = { + self.cache[self._hash_func_key(key)] = { "value": func_res, "time": datetime.now(), "stale": False, @@ -40,10 +44,10 @@ def mark_entry_being_calculated(self, key): condition = threading.Condition() # condition.acquire() try: - self.cache[key]["being_calculated"] = True - self.cache[key]["condition"] = condition + self.cache[self._hash_func_key(key)]["being_calculated"] = True + self.cache[self._hash_func_key(key)]["condition"] = condition except KeyError: - self.cache[key] = { + self.cache[self._hash_func_key(key)] = { "value": None, "time": datetime.now(), "stale": False, @@ -54,7 +58,7 @@ def mark_entry_being_calculated(self, key): def mark_entry_not_calculated(self, key): with self.lock: try: - entry = self.cache[key] + entry = self.cache[self._hash_func_key(key)] except KeyError: # pragma: no cover return # that's ok, we don't need an entry in that case entry["being_calculated"] = False @@ -67,13 +71,13 @@ def mark_entry_not_calculated(self, key): def wait_on_entry_calc(self, key): with self.lock: # pragma: no cover - entry = self.cache[key] + entry = self.cache[self._hash_func_key(key)] if not entry["being_calculated"]: return entry["value"] entry["condition"].acquire() entry["condition"].wait() entry["condition"].release() - return self.cache[key]["value"] + return self.cache[self._hash_func_key(key)]["value"] def clear_cache(self): with self.lock: From afd047f88a3ae6d4f4b3894e784c1b24988afe5a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 23 Feb 2024 19:45:25 +0000 Subject: [PATCH 7/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- cachier/cores/memory.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cachier/cores/memory.py b/cachier/cores/memory.py index 00e94818..f7fe83dc 100644 --- a/cachier/cores/memory.py +++ b/cachier/cores/memory.py @@ -18,7 +18,6 @@ def _hash_func_key(self, key): return f"{_get_func_str(self.func)}:{key}" def get_entry_by_key(self, key, reload=False): - with self.lock: return key, self.cache.get(self._hash_func_key(key), None) From 448d57ff4a8b36a6874d9ee9e5ea80eb5b08e1b0 Mon Sep 17 00:00:00 2001 From: Jirka Date: Sat, 24 Feb 2024 15:08:30 +0100 Subject: [PATCH 8/8] rev --- cachier/core.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cachier/core.py b/cachier/core.py index 987e35c5..6bd0d871 100644 --- a/cachier/core.py +++ b/cachier/core.py @@ -268,19 +268,19 @@ def func_wrapper(*args, **kwds): _print("No entry found. No current calc. Calling like a boss.") return _calc_entry(core, key, func, args, kwds) - def _clear_cache(): + def clear_cache(): """Clear the cache.""" core.clear_cache() - def _clear_being_calculated(): + def clear_being_calculated(): """Marks all entries in this cache as not being calculated.""" core.clear_being_calculated() - def _cache_dpath(): + def cache_dpath(): """Returns the path to the cache dir, if exists; None if not.""" return getattr(core, "cache_dir", None) - def _precache_value(*args, value_to_cache, **kwds): + def precache_value(*args, value_to_cache, **kwds): """Add an initial value to the cache. Arguments @@ -295,10 +295,10 @@ def _precache_value(*args, value_to_cache, **kwds): ) return core.precache_value(tuple(), kwargs, value_to_cache) - func_wrapper.clear_cache = _clear_cache - func_wrapper.clear_being_calculated = _clear_being_calculated - func_wrapper.cache_dpath = _cache_dpath - func_wrapper.precache_value = _precache_value + func_wrapper.clear_cache = clear_cache + func_wrapper.clear_being_calculated = clear_being_calculated + func_wrapper.cache_dpath = cache_dpath + func_wrapper.precache_value = precache_value return func_wrapper return _cachier_decorator