From 2081f31d9a91d3df15d8cc81272c6f54c58841e2 Mon Sep 17 00:00:00 2001 From: Jordan Cook Date: Tue, 11 May 2021 21:25:03 -0500 Subject: [PATCH] WIP: Add a filesystem backend --- aiohttp_client_cache/backends/filesystem.py | 104 ++++++++++++++++++++ poetry.lock | 14 ++- pyproject.toml | 3 +- 3 files changed, 119 insertions(+), 2 deletions(-) create mode 100644 aiohttp_client_cache/backends/filesystem.py diff --git a/aiohttp_client_cache/backends/filesystem.py b/aiohttp_client_cache/backends/filesystem.py new file mode 100644 index 0000000..bb29380 --- /dev/null +++ b/aiohttp_client_cache/backends/filesystem.py @@ -0,0 +1,104 @@ +from contextlib import contextmanager +from os import listdir, makedirs +from os.path import abspath, dirname, expanduser, isabs, isfile, join +from pathlib import Path +from pickle import PickleError +from shutil import rmtree +from tempfile import gettempdir +from typing import AsyncIterable, Union + +import aiofiles +import aiofiles.os + +from aiohttp_client_cache.backends import BaseCache, CacheBackend, ResponseOrKey +from aiohttp_client_cache.backends.sqlite import SQLiteCache + + +class FileBackend(CacheBackend): + """Backend that stores cached responses as files on the local filesystem. + Response paths will be in the format ``/responses/``. + Redirects are stored in a SQLite database, located at ``/redirects.sqlite``. + + Args: + cache_name: Base directory for cache files + use_temp: Store cache files in a temp directory (e.g., ``/tmp/http_cache/``). + Note: if ``cache_name`` is an absolute path, this option will be ignored. + """ + + def __init__( + self, cache_name: Union[Path, str] = 'http_cache', use_temp: bool = False, **kwargs + ): + super().__init__(**kwargs) + self.responses = FileCache(cache_name, use_temp=use_temp, **kwargs) + db_path = join(dirname(self.responses.cache_dir), 'redirects.sqlite') + self.redirects = SQLiteCache(db_path, 'redirects', **kwargs) + + +class FileCache(BaseCache): + """A dictionary-like interface to files on the local filesystem""" + + def __init__(self, cache_name, use_temp: bool = False, **kwargs): + super().__init__(**kwargs) + self.cache_dir = _get_cache_dir(cache_name, use_temp) + + @contextmanager + def _try_io(self, ignore_errors: bool = False): + """Attempt an I/O operation, and either ignore errors or re-raise them as KeyErrors""" + try: + yield + except (IOError, OSError, PickleError) as e: + if not ignore_errors: + raise KeyError(e) + + def _join(self, key): + return join(self.cache_dir, str(key)) + + async def clear(self): + """Note: Currently this is a blocking operation""" + with self._try_io(ignore_errors=True): + rmtree(self.cache_dir, ignore_errors=True) + makedirs(self.cache_dir) + + async def contains(self, key: str) -> bool: + return isfile(self._join(key)) + + async def read(self, key: str) -> ResponseOrKey: + with self._try_io(): + async with aiofiles.open(self._join(key), 'rb') as f: + return self.deserialize(await f.read()) + + async def delete(self, key: str): + with self._try_io(): + await aiofiles.os.remove(self._join(key)) + + async def write(self, key: str, value: ResponseOrKey): + with self._try_io(): + async with aiofiles.open(self._join(key), 'wb') as f: + await f.write(self.serialize(value) or b'') + + async def keys(self) -> AsyncIterable[str]: + for filename in listdir(self.cache_dir): + yield filename + + async def size(self) -> int: + return len(listdir(self.cache_dir)) + + async def values(self) -> AsyncIterable[ResponseOrKey]: + async for key in self.keys(): + yield await self.read(key) + + async def paths(self): + """Get file paths to all cached responses""" + async for key in self.keys(): + yield self._join(key) + + +def _get_cache_dir(cache_dir: Union[Path, str], use_temp: bool) -> str: + # Save to a temp directory, if specified + if use_temp and not isabs(cache_dir): + cache_dir = join(gettempdir(), cache_dir, 'responses') + + # Expand relative and user paths (~/*), and make sure parent dirs exist + cache_dir = abspath(expanduser(str(cache_dir))) + makedirs(cache_dir, exist_ok=True) + return cache_dir diff --git a/poetry.lock b/poetry.lock index 363dc4e..a97de30 100644 --- a/poetry.lock +++ b/poetry.lock @@ -42,6 +42,14 @@ python-versions = "*" [package.dependencies] pycares = ">=3.0.0" +[[package]] +name = "aiofiles" +version = "0.6.0" +description = "File support for asyncio." +category = "main" +optional = false +python-versions = "*" + [[package]] name = "aiohttp" version = "3.7.4.post0" @@ -1170,7 +1178,7 @@ docs = ["docutils", "m2r2", "Sphinx", "sphinx-autodoc-typehints", "sphinx-automo [metadata] lock-version = "1.1" python-versions = "^3.7" -content-hash = "cf3110dacaf1bf2064615cadeca11807d7a931d274cf0edb1c62855896f62a87" +content-hash = "2ac93d4f2836f0b957b04f7235d01175ba5296ac5375fb3f4b909a4f698e0f78" [metadata.files] aioboto3 = [ @@ -1183,6 +1191,10 @@ aiodns = [ {file = "aiodns-2.0.0-py2.py3-none-any.whl", hash = "sha256:aaa5ac584f40fe778013df0aa6544bf157799bd3f608364b451840ed2c8688de"}, {file = "aiodns-2.0.0.tar.gz", hash = "sha256:815fdef4607474295d68da46978a54481dd1e7be153c7d60f9e72773cd38d77d"}, ] +aiofiles = [ + {file = "aiofiles-0.6.0-py3-none-any.whl", hash = "sha256:bd3019af67f83b739f8e4053c6c0512a7f545b9a8d91aaeab55e6e0f9d123c27"}, + {file = "aiofiles-0.6.0.tar.gz", hash = "sha256:e0281b157d3d5d59d803e3f4557dcc9a3dff28a4dd4829a9ff478adae50ca092"}, +] aiohttp = [ {file = "aiohttp-3.7.4.post0-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:3cf75f7cdc2397ed4442594b935a11ed5569961333d49b7539ea741be2cc79d5"}, {file = "aiohttp-3.7.4.post0-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:4b302b45040890cea949ad092479e01ba25911a15e648429c7c5aae9650c67a8"}, diff --git a/pyproject.toml b/pyproject.toml index 6c6198c..5934cc7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ url-normalize = "^1.4" # Optional backend dependencies aioboto3 = { version = ">=7.0", optional = true } +aiofiles = { version = ">=0.6.0", optional = true } aioredis = { version = "^1.3", optional = true } aiosqlite = { version = "^0.16", optional = true } motor = { version = ">=2.0", optional = true } @@ -51,7 +52,7 @@ sphinx-material = { version = "*", optional = true } sphinxcontrib-apidoc = { version = "^0.3", optional = true } [tool.poetry.extras] -backends = ["aioboto3", "aioredis", "aiosqlite", "motor"] +backends = ["aioboto3", "aiofiles", "aioredis", "aiosqlite", "motor"] docs = ["docutils", "m2r2", "Sphinx", "sphinx-autodoc-typehints", "sphinx-automodapi", "sphinx-copybutton", "sphinx-material", "sphinxcontrib-apidoc"]