diff --git a/.all-contributorsrc b/.all-contributorsrc index d8833825..cab0a59a 100644 --- a/.all-contributorsrc +++ b/.all-contributorsrc @@ -753,6 +753,15 @@ "contributions": [ "bug" ] + }, + { + "login": "rasmuse", + "name": "Rasmus Einarsson", + "avatar_url": "https://avatars.githubusercontent.com/u/1210973?v=4", + "profile": "https://rasmuse.github.io/", + "contributions": [ + "bug" + ] } ], "contributorsPerLine": 7, diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index a53abae4..749c0eb8 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -74,31 +74,32 @@ contributions that have helped to improve requests-cache:
Parker Hancock

πŸ’» ✨ πŸ› ⚠️ πŸ“– πŸ›‘οΈ πŸ€”
Philipp A.

πŸ› +
Rasmus Einarsson

πŸ›
Roderic Day

πŸ›
Roman Haritonov

πŸ’» 🚧 ✨ πŸ› ⚠️ πŸ“– πŸš‡
Samuel T.

πŸ› πŸ€”
Sebastian HΓΆffner

πŸ’» ✨ ⚠️ πŸ€” -
Serhii Chvaliuk

πŸ› πŸ’» +
Serhii Chvaliuk

πŸ› πŸ’»
Simon Biewald

πŸ›‘οΈ πŸ€”
Skipper Seabold

πŸ›
Slin Lee

πŸ“–
Stavros Korokithakis

πŸš‡ πŸ”§ πŸ“–
Taher Chegini

πŸ›
Vladimir Panteleev

πŸ€” -
Willem de Groot

πŸ’» πŸ› +
Willem de Groot

πŸ’» πŸ›
Wouter Vanden Hove

πŸ›
YetAnotherNerd

πŸ’» ✨ πŸ›
aaron-mf1

πŸ€”
coryairbhb

πŸ›
craig

πŸ’» πŸ›
denis-bz

πŸ› -
girst

πŸ› +
girst

πŸ›
gorogoroumaru

πŸ’»
harvey251

πŸ›
mbarkhau

πŸ’» ⚠️ πŸš‡ πŸ› diff --git a/HISTORY.md b/HISTORY.md index 1d80b308..7a4027a4 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,9 +1,10 @@ # History ## 0.9.2 (2022-02-15) -Fix some regression bugs introduced in 0.9.0: -* Add support `params` as a positional argument to `CachedSession.request()` -* Add support for disabling expiration for a single request with `CachedSession.request(..., expire_after=-1)` +* Fix serialization in filesystem backend with binary content that is also valid UTF-8 +* Fix some regression bugs introduced in 0.9.0: + * Add support for `params` as a positional argument to `CachedSession.request()` + * Add support for disabling expiration for a single request with `CachedSession.request(..., expire_after=-1)` ## 0.9.1 (2022-01-15) * Add support for python 3.10.2 and 3.9.10 (regarding resolving `ForwardRef` types during deserialization) diff --git a/docs/user_guide/serializers.md b/docs/user_guide/serializers.md index 2e3621e7..b2bae926 100644 --- a/docs/user_guide/serializers.md +++ b/docs/user_guide/serializers.md @@ -113,10 +113,13 @@ For example, a compressed pickle serializer can be built as: ```python >>> import gzip >>> from requests_cache import CachedSession, SerializerPipeline, Stage, pickle_serializer ->>> compressed_serializer = SerializerPipeline([ -... pickle_serializer, -... Stage(dumps=gzip.compress, loads=gzip.decompress), -... ]) +>>> compressed_serializer = SerializerPipeline( +... [ +... pickle_serializer, +... Stage(dumps=gzip.compress, loads=gzip.decompress), +... ], +... is_binary=True, +... ) >>> session = CachedSession(serializer=compressed_serializer) ``` ::: diff --git a/requests_cache/backends/filesystem.py b/requests_cache/backends/filesystem.py index b5da0dfc..c94b6622 100644 --- a/requests_cache/backends/filesystem.py +++ b/requests_cache/backends/filesystem.py @@ -96,7 +96,7 @@ def __init__( super().__init__(**kwargs) self.cache_dir = get_cache_path(cache_name, use_cache_dir=use_cache_dir, use_temp=use_temp) self.extension = _get_extension(extension, self.serializer) - self.is_binary = False + self.is_binary = getattr(self.serializer, 'is_binary', False) makedirs(self.cache_dir, exist_ok=True) @contextmanager @@ -114,24 +114,16 @@ def _path(self, key) -> Path: def __getitem__(self, key): mode = 'rb' if self.is_binary else 'r' with self._try_io(): - try: - with self._path(key).open(mode) as f: - return self.serializer.loads(f.read()) - except UnicodeDecodeError: - self.is_binary = True - return self.__getitem__(key) + with self._path(key).open(mode) as f: + return self.serializer.loads(f.read()) def __delitem__(self, key): with self._try_io(): self._path(key).unlink() def __setitem__(self, key, value): - serialized_value = self.serializer.dumps(value) - if isinstance(serialized_value, bytes): - self.is_binary = True - mode = 'wb' if self.is_binary else 'w' with self._try_io(): - with self._path(key).open(mode) as f: + with self._path(key).open(mode='wb' if self.is_binary else 'w') as f: f.write(self.serializer.dumps(value)) def __iter__(self): diff --git a/requests_cache/serializers/pipeline.py b/requests_cache/serializers/pipeline.py index 3bbfbda1..2a22761d 100644 --- a/requests_cache/serializers/pipeline.py +++ b/requests_cache/serializers/pipeline.py @@ -3,7 +3,7 @@ :classes-only: :nosignatures: """ -from typing import Any, Callable, List, Union +from typing import Any, Callable, Sequence, Union from ..models import CachedResponse @@ -29,22 +29,26 @@ def __init__( class SerializerPipeline: - """A sequence of steps used to serialize and deserialize response objects. - This can be initialized with :py:class:`Stage` objects, or any objects with ``dumps()`` and - ``loads()`` methods + """A pipeline of stages chained together to serialize and deserialize response objects. + + Args: + stages: A sequence of :py:class:`Stage` objects, or any objects with ``dumps()`` and + ``loads()`` methods + is_binary: Indicates whether the serialized content is binary """ - def __init__(self, stages: List): - self.steps = stages - self.dump_steps = [step.dumps for step in stages] - self.load_steps = [step.loads for step in reversed(stages)] + def __init__(self, stages: Sequence, is_binary: bool = False): + self.is_binary = is_binary + self.stages = stages + self.dump_stages = [stage.dumps for stage in stages] + self.load_stages = [stage.loads for stage in reversed(stages)] def dumps(self, value) -> Union[str, bytes]: - for step in self.dump_steps: + for step in self.dump_stages: value = step(value) return value def loads(self, value) -> CachedResponse: - for step in self.load_steps: + for step in self.load_stages: value = step(value) return value diff --git a/requests_cache/serializers/preconf.py b/requests_cache/serializers/preconf.py index 6cf83bc1..35453b5e 100644 --- a/requests_cache/serializers/preconf.py +++ b/requests_cache/serializers/preconf.py @@ -34,7 +34,9 @@ class that raises an ``ImportError`` at initialization time instead of at import yaml_preconf_stage = CattrStage(pyyaml.make_converter) #: Pre-serialization steps for YAML toml_preconf_stage = CattrStage(tomlkit.make_converter) #: Pre-serialization steps for TOML ujson_preconf_stage = CattrStage(ujson.make_converter) #: Pre-serialization steps for ultrajson -pickle_serializer = SerializerPipeline([base_stage, pickle]) #: Complete pickle serializer +pickle_serializer = SerializerPipeline( + [base_stage, pickle], is_binary=True +) #: Complete pickle serializer utf8_encoder = Stage(dumps=str.encode, loads=lambda x: x.decode()) #: Encode to bytes @@ -55,7 +57,9 @@ def safe_pickle_serializer( """Create a serializer that uses ``pickle`` + ``itsdangerous`` to add a signature to responses on write, and validate that signature with a secret key on read. """ - return SerializerPipeline([base_stage, pickle, signer_stage(secret_key, salt)]) + return SerializerPipeline( + [base_stage, pickle, signer_stage(secret_key, salt)], is_binary=True + ) except ImportError as e: signer_stage = get_placeholder_class(e) @@ -70,8 +74,8 @@ def safe_pickle_serializer( import bson bson_serializer = SerializerPipeline( - [bson_preconf_stage, bson] - ) #: Complete BSON serializer; using pymongo's ``bson.json_util`` if installed, otherwise standalone ``bson`` codec + [bson_preconf_stage, bson], is_binary=False + ) #: Complete BSON serializer; uses pymongo's ``bson.json_util`` if installed, otherwise standalone ``bson`` codec except ImportError as e: bson_serializer = get_placeholder_class(e) @@ -88,7 +92,7 @@ def safe_pickle_serializer( _json_stage = Stage(dumps=partial(json.dumps, indent=2), loads=json.loads) json_serializer = SerializerPipeline( - [_json_preconf_stage, _json_stage] + [_json_preconf_stage, _json_stage], is_binary=False ) #: Complete JSON serializer; uses ultrajson if available @@ -100,7 +104,8 @@ def safe_pickle_serializer( [ yaml_preconf_stage, Stage(yaml, loads='safe_load', dumps='safe_dump'), - ] + ], + is_binary=False, ) #: Complete YAML serializer except ImportError as e: yaml_serializer = get_placeholder_class(e) diff --git a/tests/integration/test_filesystem.py b/tests/integration/test_filesystem.py index 3707f205..4a4884e3 100644 --- a/tests/integration/test_filesystem.py +++ b/tests/integration/test_filesystem.py @@ -1,4 +1,3 @@ -import pickle from shutil import rmtree from tempfile import gettempdir @@ -20,7 +19,7 @@ def teardown_class(cls): rmtree(CACHE_NAME, ignore_errors=True) def init_cache(self, index=0, clear=True, **kwargs): - cache = FileDict(f'{CACHE_NAME}_{index}', serializer=pickle, use_temp=True, **kwargs) + cache = FileDict(f'{CACHE_NAME}_{index}', serializer='pickle', use_temp=True, **kwargs) if clear: cache.clear() return cache diff --git a/tests/unit/test_serializers.py b/tests/unit/test_serializers.py index bf776fd4..80d0f3a1 100644 --- a/tests/unit/test_serializers.py +++ b/tests/unit/test_serializers.py @@ -68,7 +68,7 @@ def test_optional_dependencies(): def test_cache_signing(tempfile_path): serializer = safe_pickle_serializer(secret_key=str(uuid4())) session = CachedSession(tempfile_path, serializer=serializer) - assert isinstance(session.cache.responses.serializer.steps[-1].obj, Signer) + assert isinstance(session.cache.responses.serializer.stages[-1].obj, Signer) # Simple serialize/deserialize round trip response = CachedResponse()