diff --git a/aiohttp_client_cache/backends/base.py b/aiohttp_client_cache/backends/base.py index f4a93d5..c7e454f 100644 --- a/aiohttp_client_cache/backends/base.py +++ b/aiohttp_client_cache/backends/base.py @@ -27,51 +27,6 @@ class CacheBackend: To extend this with your own custom backend, implement one or more subclasses of :py:class:`.BaseCache` to use as :py:attr:`CacheBackend.responses` and :py:attr:`CacheBackend.response_aliases`. - - **Cache Name:** - - The ``cache_name`` parameter will be used as follows depending on the backend: - - * ``sqlite``: Cache filename prefix, e.g ``my_cache.sqlite`` - * ``mongodb``: Database name - * ``redis``: Namespace, meaning all keys will be prefixed with ``'cache_name:'`` - - **Cache Keys:** - - The cache key is a hash created from request information, and is used as an index for cached - responses. There are a couple ways you can customize how the cache key is created: - - * Use ``include_get_headers`` if you want headers to be included in the cache key. In other - words, this will create separate cache items for responses with different headers. - * Use ``ignored_parameters`` to exclude specific request params from the cache key. This is - useful, for example, if you request the same resource with different credentials or access - tokens. - - **URL Patterns:** - - The ``urls_expire_after`` parameter can be used to set different expiration times for different - requests, based on glob patterns. This allows you to customize caching based on what you - know about what you're requesting. For example, you might request one resource that gets updated - frequently, another that changes infrequently, and another that never changes. - - Example:: - - urls_expire_after = { - '*.site_1.com': timedelta(days=1), - 'site_2.com/resource_1': timedelta(hours=12), - 'site_2.com/resource_2': 60, - 'site_2.com/static': -1, - } - - Notes: - - * ``urls_expire_after`` should be a dict in the format ``{'pattern': expiration_time}`` - * ``expiration_time`` may be either a number (in seconds) or a ``timedelta`` - (same as ``expire_after``) - * Patterns will match request **base URLs**, so the pattern ``site.com/base`` is equivalent to - ``https://site.com/base/**`` - * If there is more than one match, the first match (in the order they are defined) will be used - * If no patterns match a request, ``expire_after`` will be used as a default. """ def __init__( @@ -317,7 +272,7 @@ async def clear(self): @abstractmethod async def delete(self, key: str): - """Delete a single item from the cache. Does not raise an error if the item is missing.""" + """Delete an item from the cache. Does not raise an error if the item is missing.""" @abstractmethod def keys(self) -> AsyncIterable[str]: @@ -325,7 +280,7 @@ def keys(self) -> AsyncIterable[str]: @abstractmethod async def read(self, key: str) -> ResponseOrKey: - """Read a single item from the cache. Returns ``None`` if the item is missing.""" + """Read an item from the cache. Returns ``None`` if the item is missing.""" @abstractmethod async def size(self) -> int: diff --git a/aiohttp_client_cache/session.py b/aiohttp_client_cache/session.py index 551d9ee..0d9bc3a 100644 --- a/aiohttp_client_cache/session.py +++ b/aiohttp_client_cache/session.py @@ -46,16 +46,16 @@ async def _request( return new_response @asynccontextmanager - async def disable_cache(self): + async def disabled(self): """Temporarily disable the cache Example: - >>> session = CachedSession() - >>> await session.get('http://httpbin.org/ip') - >>> async with session.disable_cache(): - >>> # Will return a new response, not a cached one + >>> async with CachedSession() as session: >>> await session.get('http://httpbin.org/ip') + >>> async with session.disabled(): + >>> # Will return a new response, not a cached one + >>> await session.get('http://httpbin.org/ip') """ self.cache.disabled = True yield diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst new file mode 100644 index 0000000..66831bf --- /dev/null +++ b/docs/advanced_usage.rst @@ -0,0 +1,123 @@ +.. _advanced_usage: + +Advanced Usage +============== +This section covers some more advanced and use-case-specific features. + +.. contents:: + :local: + +Custom Response Filtering +------------------------- +If you need more advanced behavior for determining what to cache, you can provide a custom filtering +function via the ``filter_fn`` param. This can by any function that takes a :py:class:`requests.Response` +object and returns a boolean indicating whether or not that response should be cached. It will be applied +to both new responses (on write) and previously cached responses (on read). Example: + + >>> from sys import getsizeof + >>> from aiohttp_client_cache import CachedSession, SQLiteCache + >>> + >>> def filter_by_size(response): + >>> """Don't cache responses with a body over 1 MB""" + >>> return getsizeof(response.content) <= 1024 * 1024 + >>> + >>> cache = SQLiteCache(filter_fn=filter_by_size) + +Custom Backends +--------------- +If the built-in :py:mod:`Cache Backends ` don't suit your needs, you can +create your own by making subclasses of :py:class:`.CacheBackend` and :py:class:`.BaseCache`: + + >>> from aiohttp_client_cache import CachedSession + >>> from aiohttp_client_cache.backends import BaseCache, BaseStorage + >>> + >>> class CustomCache(BaseCache): + ... """Wrapper for higher-level cache operations. In most cases, the only thing you need + ... to specify here is which storage class(es) to use. + ... """ + ... def __init__(self, **kwargs): + ... super().__init__(**kwargs) + ... self.redirects = CustomStorage(**kwargs) + ... self.responses = CustomStorage(**kwargs) + >>> + >>> class CustomStorage(BaseStorage): + ... """interface for lower-level backend storage operations""" + ... def __init__(self, **kwargs): + ... super().__init__(**kwargs) + ... + ... async def contains(self, key: str) -> bool: + ... """Check if a key is stored in the cache""" + ... + ... async def clear(self): + ... """Delete all items from the cache""" + ... + ... async def delete(self, key: str): + ... """Delete an item from the cache""" + ... + ... async def keys(self) -> AsyncIterable[str]: + ... """Get all keys stored in the cache""" + ... + ... async def read(self, key: str) -> ResponseOrKey: + ... """Read anitem from the cache""" + ... + ... async def size(self) -> int: + ... """Get the number of items in the cache""" + ... + ... def values(self) -> AsyncIterable[ResponseOrKey]: + ... """Get all values stored in the cache""" + ... + ... async def write(self, key: str, item: ResponseOrKey): + ... """Write an item to the cache""" + +You can then use your custom backend in a :py:class:`.CachedSession` with the ``cache`` parameter: + + >>> session = CachedSession(cache=CustomCache()) + +Cache Inspection +---------------- +Here are some ways to get additional information out of the cache session, backend, and responses: + +Response Attributes +~~~~~~~~~~~~~~~~~~~ +The following attributes are available on responses: +* ``from_cache``: indicates if the response came from the cache +* ``created_at``: :py:class:`~datetime.datetime` of when the cached response was created or last updated +* ``expires``: :py:class:`~datetime.datetime` after which the cached response will expire +* ``is_expired``: indicates if the cached response is expired (if an old response was returned due to a request error) + +Examples: + + >>> from aiohttp_client_cache import CachedSession + >>> session = CachedSession(expire_after=timedelta(days=1)) + + >>> # Placeholders are added for non-cached responses + >>> r = session.get('http://httpbin.org/get') + >>> print(r.from_cache, r.created_at, r.expires, r.is_expired) + False None None None + + >>> # Values will be populated for cached responses + >>> r = session.get('http://httpbin.org/get') + >>> print(r.from_cache, r.created_at, r.expires, r.is_expired) + True 2021-01-01 18:00:00 2021-01-02 18:00:00 False + +Cache Contents +~~~~~~~~~~~~~~ +You can use :py:meth:`.CachedSession.cache.urls` to see all URLs currently in the cache: + + >>> session = CachedSession() + >>> print(session.urls) + ['https://httpbin.org/get', 'https://httpbin.org/stream/100'] + +If needed, you can get more details on cached responses via ``CachedSession.cache.responses``, which +is a dict-like interface to the cache backend. See :py:class:`.CachedResponse` for a full list of +attributes available. + +For example, if you wanted to to see all URLs requested with a specific method: + + >>> post_urls = [ + >>> response.url for response in session.cache.responses.values() + >>> if response.request.method == 'POST' + >>> ] + +You can also inspect ``CachedSession.cache.redirects``, which maps redirect URLs to keys of the +responses they redirect to. diff --git a/docs/conf.py b/docs/conf.py index 91a7453..f45d214 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -30,6 +30,7 @@ # Sphinx extension modules extensions = [ 'sphinx.ext.autodoc', + 'sphinx.ext.autosectionlabel', 'sphinx.ext.intersphinx', 'sphinx.ext.napoleon', # 'sphinx.ext.viewcode', @@ -81,7 +82,7 @@ html_theme_options = { 'color_primary': 'blue', 'color_accent': 'light-blue', - 'globaltoc_depth': 1, + 'globaltoc_depth': 3, 'globaltoc_includehidden': False, 'logo_icon': '', 'repo_url': 'https://github.com/JWCook/aiohttp-client-cache', diff --git a/docs/index.rst b/docs/index.rst index d3a9df7..693a31d 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -14,6 +14,9 @@ Contents .. toctree:: :maxdepth: 2 + user_guide + advanced_usage + security reference history contributing diff --git a/docs/security.rst b/docs/security.rst new file mode 100644 index 0000000..25b1dc3 --- /dev/null +++ b/docs/security.rst @@ -0,0 +1,57 @@ +.. _security: + +Security +======== + +Pickle Vulnerabilities +---------------------- +.. warning:: The python ``pickle`` module has `known security vulnerabilities `_, + potentially leading to code execution when deserialzing data. + +This means it should only be used to deserialize data that you trust hasn't been tampered with. +Since this isn't always possible, aiohttp-client-cache can optionally use +`itsdangerous `_ to add a layer of security around these operations. +It works by signing serialized data with a secret key that you control. Then, if the data is tampered +with, the signature check fails and raises an error. + +Creating and Storing a Secret Key +--------------------------------- +To enable this behavior, first create a secret key, which can be any ``str`` or ``bytes`` object. + +One common pattern for handling this is to store it wherever you store the rest of your credentials +(`Linux keyring `_, +`macOS keychain `_, +`password database `_, etc.), +set it in an environment variable, and then read it in your application: + + >>> import os + >>> secret_key = os.environ['SECRET_KEY'] + +Alternatively, you can use the `keyring `_ package to read the key +directly: + + >>> import keyring + >>> secret_key = keyring.get_password('aiohttp-client-cache-example', 'secret_key') + +Signing Cached Responses +------------------------ +Once you have your key, just pass it to :py:class:`.CachedSession` or :py:func:`.install_cache` to start using it: + + >>> from aiohttp_client_cache import CachedSession, RedisBackend + >>> + >>> cache = RedisBackend(secret_key=secret_key) + >>> async with CachedSession(cache=cache) as session: + >>> await session.get('https://httpbin.org/get') + +You can verify that it's working by modifying the cached item (*without* your key): + + >>> cache_2 = RedisBackend(secret_key='a different key') + >>> async with CachedSession(cache=cache) as session_2: + >>> cache_key = list(await session_2.cache.responses.keys())[0] + >>> await session_2.cache.responses.write(cache_key, 'exploit!') + +Then, if you try to get that cached response again (*with* your key), you will get an error: + + >>> async with CachedSession(cache=cache) as session: + >>> await session.get('https://httpbin.org/get') + BadSignature: Signature b'iFNmzdUOSw5vqrR9Cb_wfI1EoZ8' does not match diff --git a/docs/user_guide.rst b/docs/user_guide.rst new file mode 100644 index 0000000..ecc12d8 --- /dev/null +++ b/docs/user_guide.rst @@ -0,0 +1,202 @@ +User Guide +========== +This section covers the main features of aiohttp-client-cache. + +.. contents:: + :local: + :depth: 2 + +Installation +------------ +Install with pip: + + $ pip install aiohttp-client-cache + +Requirements +~~~~~~~~~~~~ +* Requires python 3.7+. +* You may need additional dependencies depending on which backend you want to use. To install with + extra dependencies for all supported :ref:`cache backends`: + + $ pip install aiohttp-client-cache[backends] + +Optional Setup Steps +~~~~~~~~~~~~~~~~~~~~ +* See :ref:`security` for recommended setup steps for more secure cache serialization. +* See :ref:`Contributing Guide ` for setup steps for local development. + +General Usage +------------- + +:py:class:`.CachedSession` can be used as a drop-in replacement for :py:class:`aiohttp.ClientSession`. +Basic usage looks like this: + + >>> from aiohttp_client_cache import CachedSession + >>> + >>> async with CachedSession() as session: + >>> await session.get('http://httpbin.org/delay/1') + +Any :py:class:`~aiohttp.ClientSession` method can be used (but see :ref:`http methods` section +below for config details): + + >>> await session.request('GET', 'http://httpbin.org/get') + >>> await session.head('http://httpbin.org/get') + +Caching can be temporarily disabled with :py:meth:`.CachedSession.disabled`: + + >>> with session.disabled(): + ... await session.get('http://httpbin.org/get') + +The best way to clean up your cache is through :ref:`cache expiration`, but you can also +clear out everything at once with :py:meth:`.CacheBackend.clear`: + + >>> await session.cache.clear() + +Cache Backends +-------------- +Several cache backends are included, which can be selected using the ``cache`` parameter for +:py:class:`.CachedSession`: + +* Default: A non-persistent cache that just stores responses in memory +* `DynamoDB `_ :py:class:`.DynamoDBBackend` (requires ``aioboto3``) +* `MongoDB `_ :py:class:`.MongoDBBackend` (requires ``motor``) +* `Redis `_ :py:class:`.RedisBackend` (requires ``aioredis``) +* `SQLite `_: :py:class:`.SQLiteBackend` (requires ``aiosqlite``) + +Usage example: + + >>> from aiohttp_client_cache import CachedSession, RedisCache + >>> + >>> async with CachedSession(cache=RedisCache()) as session: + ... await session.get('http://httpbin.org/get') + +See :py:mod:`aiohttp_client_cache.backends` for more backend-specific usage details, and see +:ref:`custom backends` for details on creating your own implementation. + +Cache Name +~~~~~~~~~~ +The ``cache_name`` parameter will be used as follows depending on the backend: + +* DynamoDb: Table name +* MongoDb: Database name +* Redis: Namespace, meaning all keys will be prefixed with ``':'`` +* SQLite: Database path, e.g ``~/.cache/my_cache.sqlite`` + +Cache Options +------------- +A number of options are available to modify which responses are cached and how they are cached. + +HTTP Methods +~~~~~~~~~~~~ +By default, only GET and HEAD requests are cached. To cache additional HTTP methods, specify them +with ``allowed_methods``. For example, caching POST requests can be used to ensure you don't send +the same data multiple times: + + >>> cache = SQLiteBackend(allowed_methods=('GET', 'POST')) + >>> async with CachedSession(cache=cache) as session: + >>> await session.post('http://httpbin.org/post', json={'param': 'value'}) + +Status Codes +~~~~~~~~~~~~ +By default, only responses with a 200 status code are cached. To cache additional status codes, +specify them with ``allowed_codes``" + + >>> cache = SQLiteBackend(allowed_codes=(200, 418)) + >>> async with CachedSession(cache=cache) as session: + >>> await session.get('http://httpbin.org/teapot') + +Request Parameters +~~~~~~~~~~~~~~~~~~ +By default, all request parameters are taken into account when caching responses. In some cases, +there may be request parameters that don't affect the response data, for example authentication tokens +or credentials. If you want to ignore specific parameters, specify them with ``ignored_parameters``: + + >>> cache = SQLiteBackend(ignored_parameters=['auth-token']) + >>> async with CachedSession(cache=cache) as session: + >>> # Only the first request will be sent + >>> await session.get('http://httpbin.org/get', params={'auth-token': '2F63E5DF4F44'}) + >>> await session.get('http://httpbin.org/get', params={'auth-token': 'D9FAEB3449D3'}) + +Request Headers +~~~~~~~~~~~~~~~ +By default, request headers are not taken into account when caching responses. In some cases, +different headers may result in different response data, so you may want to cache them separately. +To enable this, use ``include_headers``: + + >>> cache = SQLiteBackend(include_headers=True) + >>> async with CachedSession(cache=cache) as session: + >>> # Both of these requests will be sent and cached separately + >>> await session.get('http://httpbin.org/headers', {'Accept': 'text/plain'}) + >>> await session.get('http://httpbin.org/headers', {'Accept': 'application/json'}) + +Cache Expiration +---------------- +By default, cached responses will be stored indefinitely. You can initialize the cache with an +``expire_after`` value to specify how long responses will be cached. + +Expiration Types +~~~~~~~~~~~~~~~~ +``expire_after`` can be any of the following: + +* ``-1`` (to never expire) +* A positive number (in seconds) +* A :py:class:`~datetime.timedelta` +* A :py:class:`~datetime.datetime` + +Examples: + + >>> # Set expiration for the session using a value in seconds + >>> cache = SQLiteBackend(expire_after=360) + + >>> # To specify a different unit of time, use a timedelta + >>> from datetime import timedelta + >>> cache = SQLiteBackend(expire_after=timedelta(days=30)) + + >>> # Update an existing session to disable expiration (i.e., store indefinitely) + >>> session.expire_after = -1 + +Expiration Scopes +~~~~~~~~~~~~~~~~~ +Passing ``expire_after`` to a session's :py:class:`.CacheBackend` will set the expiration for the +duration of that session. Expiration can also be set on a per-URL or per-request basis. +The following order of precedence is used: + +1. Per-request expiration (``expire_after`` argument for :py:meth:`.CachedSession.request`) +2. Per-URL expiration (``urls_expire_after`` argument for :py:class:`.CachedSession`) +3. Per-session expiration (``expire_after`` argument for :py:class:`.CacheBackend`) + +URL Patterns +~~~~~~~~~~~~ +You can use ``urls_expire_after`` to set different expiration values for different requests, based on +URL glob patterns. This allows you to customize caching based on what you know about the resources +you're requesting. For example, you might request one resource that gets updated frequently, another +that changes infrequently, and another that never changes. Example: + + >>> cache = SQLiteBackend(urls_expire_after={ + ... '*.site_1.com': 30, + ... 'site_2.com/resource_1': 60 * 2, + ... 'site_2.com/resource_2': 60 * 60 * 24, + ... 'site_2.com/static': -1, + ... }) + +**Notes:** + +* ``urls_expire_after`` should be a dict in the format ``{'pattern': expire_after}`` +* ``expire_after`` accepts the same types as ``CacheBackend.expire_after`` +* Patterns will match request **base URLs**, so the pattern ``site.com/resource/`` is equivalent to + ``http*://site.com/resource/**`` +* If there is more than one match, the first match will be used in the order they are defined +* If no patterns match a request, ``CacheBackend.expire_after`` will be used as a default. + +Removing Expired Responses +~~~~~~~~~~~~~~~~~~~~~~~~~~ +For better performance, expired responses won't be removed immediately, but will be removed +(or replaced) the next time they are requested. To manually clear all expired responses, use +:py:meth:`.CachedSession.remove_expired_responses`: + + >>> session.remove_expired_responses() + +You can also apply a different ``expire_after`` to previously cached responses, which will +revalidate the cache with the new expiration time: + + >>> session.remove_expired_responses(expire_after=timedelta(days=30))