Skip to content

Commit

Permalink
Fix serialization in filesystem backend with binary content that is a…
Browse files Browse the repository at this point in the history
…lso valid UTF-8
  • Loading branch information
JWCook committed Feb 15, 2022
1 parent 43c9287 commit b5bac5a
Show file tree
Hide file tree
Showing 9 changed files with 55 additions and 41 deletions.
9 changes: 9 additions & 0 deletions .all-contributorsrc
Expand Up @@ -753,6 +753,15 @@
"contributions": [
"bug"
]
},
{
"login": "rasmuse",
"name": "Rasmus Einarsson",
"avatar_url": "https://avatars.githubusercontent.com/u/1210973?v=4",
"profile": "https://rasmuse.github.io/",
"contributions": [
"bug"
]
}
],
"contributorsPerLine": 7,
Expand Down
7 changes: 4 additions & 3 deletions CONTRIBUTORS.md
Expand Up @@ -74,31 +74,32 @@ contributions that have helped to improve requests-cache:
<tr>
<td align="center"><a href="https://github.com/parkerhancock"><img src="https://avatars.githubusercontent.com/u/633163?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Parker Hancock</b></sub></a><br /><a href="https://github.com/reclosedev/requests-cache/commits?author=parkerhancock" title="Code">💻</a> <a href="#feature-parkerhancock" title="New features">✨</a> <a href="https://github.com/reclosedev/requests-cache/issues?q=author%3Aparkerhancock" title="Bug reports">🐛</a> <a href="https://github.com/reclosedev/requests-cache/commits?author=parkerhancock" title="Tests">⚠️</a> <a href="https://github.com/reclosedev/requests-cache/commits?author=parkerhancock" title="Documentation">📖</a> <a href="#security-parkerhancock" title="Security">🛡️</a> <a href="#ideas-parkerhancock" title="Ideas, Planning, & Feedback">🤔</a></td>
<td align="center"><a href="https://phil.red/"><img src="https://avatars.githubusercontent.com/u/291575?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Philipp A.</b></sub></a><br /><a href="https://github.com/reclosedev/requests-cache/issues?q=author%3Aflying-sheep" title="Bug reports">🐛</a></td>
<td align="center"><a href="https://rasmuse.github.io/"><img src="https://avatars.githubusercontent.com/u/1210973?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Rasmus Einarsson</b></sub></a><br /><a href="https://github.com/reclosedev/requests-cache/issues?q=author%3Arasmuse" title="Bug reports">🐛</a></td>
<td align="center"><a href="https://roderic.ca/"><img src="https://avatars.githubusercontent.com/u/6867226?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Roderic Day</b></sub></a><br /><a href="https://github.com/reclosedev/requests-cache/issues?q=author%3ARodericDay" title="Bug reports">🐛</a></td>
<td align="center"><a href="https://github.com/reclosedev"><img src="https://avatars.githubusercontent.com/u/660112?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Roman Haritonov</b></sub></a><br /><a href="https://github.com/reclosedev/requests-cache/commits?author=reclosedev" title="Code">💻</a> <a href="#maintenance-reclosedev" title="Maintenance">🚧</a> <a href="#feature-reclosedev" title="New features">✨</a> <a href="https://github.com/reclosedev/requests-cache/issues?q=author%3Areclosedev" title="Bug reports">🐛</a> <a href="https://github.com/reclosedev/requests-cache/commits?author=reclosedev" title="Tests">⚠️</a> <a href="https://github.com/reclosedev/requests-cache/commits?author=reclosedev" title="Documentation">📖</a> <a href="#infra-reclosedev" title="Infrastructure (Hosting, Build-Tools, etc)">🚇</a></td>
<td align="center"><a href="https://www.facebook.com/avasamdev"><img src="https://avatars.githubusercontent.com/u/1350584?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Samuel T.</b></sub></a><br /><a href="https://github.com/reclosedev/requests-cache/issues?q=author%3AAvasam" title="Bug reports">🐛</a> <a href="#ideas-Avasam" title="Ideas, Planning, & Feedback">🤔</a></td>
<td align="center"><a href="https://sebastian-hoeffner.de/"><img src="https://avatars.githubusercontent.com/u/1836815?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Sebastian Höffner</b></sub></a><br /><a href="https://github.com/reclosedev/requests-cache/commits?author=shoeffner" title="Code">💻</a> <a href="#feature-shoeffner" title="New features">✨</a> <a href="https://github.com/reclosedev/requests-cache/commits?author=shoeffner" title="Tests">⚠️</a> <a href="#ideas-shoeffner" title="Ideas, Planning, & Feedback">🤔</a></td>
<td align="center"><a href="https://github.com/grubberr"><img src="https://avatars.githubusercontent.com/u/195743?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Serhii Chvaliuk</b></sub></a><br /><a href="https://github.com/reclosedev/requests-cache/issues?q=author%3Agrubberr" title="Bug reports">🐛</a> <a href="https://github.com/reclosedev/requests-cache/commits?author=grubberr" title="Code">💻</a></td>
</tr>
<tr>
<td align="center"><a href="https://github.com/grubberr"><img src="https://avatars.githubusercontent.com/u/195743?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Serhii Chvaliuk</b></sub></a><br /><a href="https://github.com/reclosedev/requests-cache/issues?q=author%3Agrubberr" title="Bug reports">🐛</a> <a href="https://github.com/reclosedev/requests-cache/commits?author=grubberr" title="Code">💻</a></td>
<td align="center"><a href="https://sbiewald.de/"><img src="https://avatars.githubusercontent.com/u/5983372?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Simon Biewald</b></sub></a><br /><a href="#security-Varbin" title="Security">🛡️</a> <a href="#ideas-Varbin" title="Ideas, Planning, & Feedback">🤔</a></td>
<td align="center"><a href="https://github.com/jseabold"><img src="https://avatars.githubusercontent.com/u/296164?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Skipper Seabold</b></sub></a><br /><a href="https://github.com/reclosedev/requests-cache/issues?q=author%3Ajseabold" title="Bug reports">🐛</a></td>
<td align="center"><a href="http://pathmind.com/"><img src="https://avatars.githubusercontent.com/u/1197406?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Slin Lee</b></sub></a><br /><a href="https://github.com/reclosedev/requests-cache/commits?author=slinlee" title="Documentation">📖</a></td>
<td align="center"><a href="https://www.stavros.io/"><img src="https://avatars.githubusercontent.com/u/23648?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Stavros Korokithakis</b></sub></a><br /><a href="#infra-skorokithakis" title="Infrastructure (Hosting, Build-Tools, etc)">🚇</a> <a href="#tool-skorokithakis" title="Tools">🔧</a> <a href="https://github.com/reclosedev/requests-cache/commits?author=skorokithakis" title="Documentation">📖</a></td>
<td align="center"><a href="https://cheginit.github.io/"><img src="https://avatars.githubusercontent.com/u/13016644?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Taher Chegini</b></sub></a><br /><a href="https://github.com/reclosedev/requests-cache/issues?q=author%3Acheginit" title="Bug reports">🐛</a></td>
<td align="center"><a href="https://vladimir.panteleev.md/"><img src="https://avatars.githubusercontent.com/u/160894?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Vladimir Panteleev</b></sub></a><br /><a href="#ideas-CyberShadow" title="Ideas, Planning, & Feedback">🤔</a></td>
<td align="center"><a href="https://sansec.io/"><img src="https://avatars.githubusercontent.com/u/1145479?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Willem de Groot</b></sub></a><br /><a href="https://github.com/reclosedev/requests-cache/commits?author=gwillem" title="Code">💻</a> <a href="https://github.com/reclosedev/requests-cache/issues?q=author%3Agwillem" title="Bug reports">🐛</a></td>
</tr>
<tr>
<td align="center"><a href="https://sansec.io/"><img src="https://avatars.githubusercontent.com/u/1145479?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Willem de Groot</b></sub></a><br /><a href="https://github.com/reclosedev/requests-cache/commits?author=gwillem" title="Code">💻</a> <a href="https://github.com/reclosedev/requests-cache/issues?q=author%3Agwillem" title="Bug reports">🐛</a></td>
<td align="center"><a href="https://github.com/WouterVH"><img src="https://avatars.githubusercontent.com/u/469509?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Wouter Vanden Hove</b></sub></a><br /><a href="https://github.com/reclosedev/requests-cache/issues?q=author%3AWouterVH" title="Bug reports">🐛</a></td>
<td align="center"><a href="https://github.com/YetAnotherNerd"><img src="https://avatars.githubusercontent.com/u/320738?v=4?s=100" width="100px;" alt=""/><br /><sub><b>YetAnotherNerd</b></sub></a><br /><a href="https://github.com/reclosedev/requests-cache/commits?author=YetAnotherNerd" title="Code">💻</a> <a href="#feature-YetAnotherNerd" title="New features">✨</a> <a href="https://github.com/reclosedev/requests-cache/issues?q=author%3AYetAnotherNerd" title="Bug reports">🐛</a></td>
<td align="center"><a href="https://github.com/aaron-mf1"><img src="https://avatars.githubusercontent.com/u/65560918?v=4?s=100" width="100px;" alt=""/><br /><sub><b>aaron-mf1</b></sub></a><br /><a href="#ideas-aaron-mf1" title="Ideas, Planning, & Feedback">🤔</a></td>
<td align="center"><a href="https://github.com/coryairbhb"><img src="https://avatars.githubusercontent.com/u/50755629?v=4?s=100" width="100px;" alt=""/><br /><sub><b>coryairbhb</b></sub></a><br /><a href="https://github.com/reclosedev/requests-cache/issues?q=author%3Acoryairbhb" title="Bug reports">🐛</a></td>
<td align="center"><a href="https://github.com/craigls"><img src="https://avatars.githubusercontent.com/u/972350?v=4?s=100" width="100px;" alt=""/><br /><sub><b>craig</b></sub></a><br /><a href="https://github.com/reclosedev/requests-cache/commits?author=craigls" title="Code">💻</a> <a href="https://github.com/reclosedev/requests-cache/issues?q=author%3Acraigls" title="Bug reports">🐛</a></td>
<td align="center"><a href="https://stackoverflow.com/users/86643/denis"><img src="https://avatars.githubusercontent.com/u/1280390?v=4?s=100" width="100px;" alt=""/><br /><sub><b>denis-bz</b></sub></a><br /><a href="https://github.com/reclosedev/requests-cache/issues?q=author%3Adenis-bz" title="Bug reports">🐛</a></td>
<td align="center"><a href="https://gir.st/"><img src="https://avatars.githubusercontent.com/u/11820748?v=4?s=100" width="100px;" alt=""/><br /><sub><b>girst</b></sub></a><br /><a href="https://github.com/reclosedev/requests-cache/issues?q=author%3Agirst" title="Bug reports">🐛</a></td>
</tr>
<tr>
<td align="center"><a href="https://gir.st/"><img src="https://avatars.githubusercontent.com/u/11820748?v=4?s=100" width="100px;" alt=""/><br /><sub><b>girst</b></sub></a><br /><a href="https://github.com/reclosedev/requests-cache/issues?q=author%3Agirst" title="Bug reports">🐛</a></td>
<td align="center"><a href="https://github.com/gorogoroumaru"><img src="https://avatars.githubusercontent.com/u/30716350?v=4?s=100" width="100px;" alt=""/><br /><sub><b>gorogoroumaru</b></sub></a><br /><a href="https://github.com/reclosedev/requests-cache/commits?author=gorogoroumaru" title="Code">💻</a></td>
<td align="center"><a href="https://github.com/harvey251"><img src="https://avatars.githubusercontent.com/u/33844174?v=4?s=100" width="100px;" alt=""/><br /><sub><b>harvey251</b></sub></a><br /><a href="https://github.com/reclosedev/requests-cache/issues?q=author%3Aharvey251" title="Bug reports">🐛</a></td>
<td align="center"><a href="https://github.com/mbarkhau"><img src="https://avatars.githubusercontent.com/u/446561?v=4?s=100" width="100px;" alt=""/><br /><sub><b>mbarkhau</b></sub></a><br /><a href="https://github.com/reclosedev/requests-cache/commits?author=mbarkhau" title="Code">💻</a> <a href="https://github.com/reclosedev/requests-cache/commits?author=mbarkhau" title="Tests">⚠️</a> <a href="#infra-mbarkhau" title="Infrastructure (Hosting, Build-Tools, etc)">🚇</a> <a href="https://github.com/reclosedev/requests-cache/issues?q=author%3Ambarkhau" title="Bug reports">🐛</a></td>
Expand Down
7 changes: 4 additions & 3 deletions HISTORY.md
@@ -1,9 +1,10 @@
# History

## 0.9.2 (2022-02-15)
Fix some regression bugs introduced in 0.9.0:
* Add support `params` as a positional argument to `CachedSession.request()`
* Add support for disabling expiration for a single request with `CachedSession.request(..., expire_after=-1)`
* Fix serialization in filesystem backend with binary content that is also valid UTF-8
* Fix some regression bugs introduced in 0.9.0:
* Add support for `params` as a positional argument to `CachedSession.request()`
* Add support for disabling expiration for a single request with `CachedSession.request(..., expire_after=-1)`

## 0.9.1 (2022-01-15)
* Add support for python 3.10.2 and 3.9.10 (regarding resolving `ForwardRef` types during deserialization)
Expand Down
11 changes: 7 additions & 4 deletions docs/user_guide/serializers.md
Expand Up @@ -113,10 +113,13 @@ For example, a compressed pickle serializer can be built as:
```python
>>> import gzip
>>> from requests_cache import CachedSession, SerializerPipeline, Stage, pickle_serializer
>>> compressed_serializer = SerializerPipeline([
... pickle_serializer,
... Stage(dumps=gzip.compress, loads=gzip.decompress),
... ])
>>> compressed_serializer = SerializerPipeline(
... [
... pickle_serializer,
... Stage(dumps=gzip.compress, loads=gzip.decompress),
... ],
... is_binary=True,
... )
>>> session = CachedSession(serializer=compressed_serializer)
```
:::
Expand Down
16 changes: 4 additions & 12 deletions requests_cache/backends/filesystem.py
Expand Up @@ -96,7 +96,7 @@ def __init__(
super().__init__(**kwargs)
self.cache_dir = get_cache_path(cache_name, use_cache_dir=use_cache_dir, use_temp=use_temp)
self.extension = _get_extension(extension, self.serializer)
self.is_binary = False
self.is_binary = getattr(self.serializer, 'is_binary', False)
makedirs(self.cache_dir, exist_ok=True)

@contextmanager
Expand All @@ -114,24 +114,16 @@ def _path(self, key) -> Path:
def __getitem__(self, key):
mode = 'rb' if self.is_binary else 'r'
with self._try_io():
try:
with self._path(key).open(mode) as f:
return self.serializer.loads(f.read())
except UnicodeDecodeError:
self.is_binary = True
return self.__getitem__(key)
with self._path(key).open(mode) as f:
return self.serializer.loads(f.read())

def __delitem__(self, key):
with self._try_io():
self._path(key).unlink()

def __setitem__(self, key, value):
serialized_value = self.serializer.dumps(value)
if isinstance(serialized_value, bytes):
self.is_binary = True
mode = 'wb' if self.is_binary else 'w'
with self._try_io():
with self._path(key).open(mode) as f:
with self._path(key).open(mode='wb' if self.is_binary else 'w') as f:
f.write(self.serializer.dumps(value))

def __iter__(self):
Expand Down
24 changes: 14 additions & 10 deletions requests_cache/serializers/pipeline.py
Expand Up @@ -3,7 +3,7 @@
:classes-only:
:nosignatures:
"""
from typing import Any, Callable, List, Union
from typing import Any, Callable, Sequence, Union

from ..models import CachedResponse

Expand All @@ -29,22 +29,26 @@ def __init__(


class SerializerPipeline:
"""A sequence of steps used to serialize and deserialize response objects.
This can be initialized with :py:class:`Stage` objects, or any objects with ``dumps()`` and
``loads()`` methods
"""A pipeline of stages chained together to serialize and deserialize response objects.
Args:
stages: A sequence of :py:class:`Stage` objects, or any objects with ``dumps()`` and
``loads()`` methods
is_binary: Indicates whether the serialized content is binary
"""

def __init__(self, stages: List):
self.steps = stages
self.dump_steps = [step.dumps for step in stages]
self.load_steps = [step.loads for step in reversed(stages)]
def __init__(self, stages: Sequence, is_binary: bool = False):
self.is_binary = is_binary
self.stages = stages
self.dump_stages = [stage.dumps for stage in stages]
self.load_stages = [stage.loads for stage in reversed(stages)]

def dumps(self, value) -> Union[str, bytes]:
for step in self.dump_steps:
for step in self.dump_stages:
value = step(value)
return value

def loads(self, value) -> CachedResponse:
for step in self.load_steps:
for step in self.load_stages:
value = step(value)
return value
17 changes: 11 additions & 6 deletions requests_cache/serializers/preconf.py
Expand Up @@ -34,7 +34,9 @@ class that raises an ``ImportError`` at initialization time instead of at import
yaml_preconf_stage = CattrStage(pyyaml.make_converter) #: Pre-serialization steps for YAML
toml_preconf_stage = CattrStage(tomlkit.make_converter) #: Pre-serialization steps for TOML
ujson_preconf_stage = CattrStage(ujson.make_converter) #: Pre-serialization steps for ultrajson
pickle_serializer = SerializerPipeline([base_stage, pickle]) #: Complete pickle serializer
pickle_serializer = SerializerPipeline(
[base_stage, pickle], is_binary=True
) #: Complete pickle serializer
utf8_encoder = Stage(dumps=str.encode, loads=lambda x: x.decode()) #: Encode to bytes


Expand All @@ -55,7 +57,9 @@ def safe_pickle_serializer(
"""Create a serializer that uses ``pickle`` + ``itsdangerous`` to add a signature to
responses on write, and validate that signature with a secret key on read.
"""
return SerializerPipeline([base_stage, pickle, signer_stage(secret_key, salt)])
return SerializerPipeline(
[base_stage, pickle, signer_stage(secret_key, salt)], is_binary=True
)

except ImportError as e:
signer_stage = get_placeholder_class(e)
Expand All @@ -70,8 +74,8 @@ def safe_pickle_serializer(
import bson

bson_serializer = SerializerPipeline(
[bson_preconf_stage, bson]
) #: Complete BSON serializer; using pymongo's ``bson.json_util`` if installed, otherwise standalone ``bson`` codec
[bson_preconf_stage, bson], is_binary=False
) #: Complete BSON serializer; uses pymongo's ``bson.json_util`` if installed, otherwise standalone ``bson`` codec
except ImportError as e:
bson_serializer = get_placeholder_class(e)

Expand All @@ -88,7 +92,7 @@ def safe_pickle_serializer(

_json_stage = Stage(dumps=partial(json.dumps, indent=2), loads=json.loads)
json_serializer = SerializerPipeline(
[_json_preconf_stage, _json_stage]
[_json_preconf_stage, _json_stage], is_binary=False
) #: Complete JSON serializer; uses ultrajson if available


Expand All @@ -100,7 +104,8 @@ def safe_pickle_serializer(
[
yaml_preconf_stage,
Stage(yaml, loads='safe_load', dumps='safe_dump'),
]
],
is_binary=False,
) #: Complete YAML serializer
except ImportError as e:
yaml_serializer = get_placeholder_class(e)
3 changes: 1 addition & 2 deletions tests/integration/test_filesystem.py
@@ -1,4 +1,3 @@
import pickle
from shutil import rmtree
from tempfile import gettempdir

Expand All @@ -20,7 +19,7 @@ def teardown_class(cls):
rmtree(CACHE_NAME, ignore_errors=True)

def init_cache(self, index=0, clear=True, **kwargs):
cache = FileDict(f'{CACHE_NAME}_{index}', serializer=pickle, use_temp=True, **kwargs)
cache = FileDict(f'{CACHE_NAME}_{index}', serializer='pickle', use_temp=True, **kwargs)
if clear:
cache.clear()
return cache
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_serializers.py
Expand Up @@ -68,7 +68,7 @@ def test_optional_dependencies():
def test_cache_signing(tempfile_path):
serializer = safe_pickle_serializer(secret_key=str(uuid4()))
session = CachedSession(tempfile_path, serializer=serializer)
assert isinstance(session.cache.responses.serializer.steps[-1].obj, Signer)
assert isinstance(session.cache.responses.serializer.stages[-1].obj, Signer)

# Simple serialize/deserialize round trip
response = CachedResponse()
Expand Down

0 comments on commit b5bac5a

Please sign in to comment.