Skip to content

Commit

Permalink
Add a more thorough usage example
Browse files Browse the repository at this point in the history
  • Loading branch information
JWCook committed Nov 24, 2020
1 parent 9dc97f5 commit ac18bdb
Show file tree
Hide file tree
Showing 5 changed files with 112 additions and 17 deletions.
19 changes: 9 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,19 +47,18 @@ $ pre-commit install --config .github/pre-commit.yml
```

## Usage example
See the [examples](https://github.com/JWCook/aiohttp-client-cache/blob/master/examples)
folder for more detailed usage examples.

Here is a simple example using an endpoint that takes 1 second to fetch.
After the first request, subsequent requests to the same URL will return near-instantly; so,
fetching it 10 times will only take ~1 second instead of 10.
```python
from aiohttp_client_cache import CachedSession
session = CachedSession('demo_cache', backend='sqlite')
response = await session.get('http://httpbin.org/get')
```

Afterward, all responses with headers and cookies will be transparently cached to
a database named `demo_cache.sqlite`. For example, following code will take only
1-2 seconds instead of 10, and will run instantly on next launch:

```python
for i in range(10):
await session.get('http://httpbin.org/delay/1')
async with CachedSession(backend='sqlite') as session:
for i in range(10):
await session.get('http://httpbin.org/delay/1')
```

## Cache Backends
Expand Down
2 changes: 1 addition & 1 deletion aiohttp_client_cache/backends/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ def is_cacheable(self, response: Union[ClientResponse, CachedResponse, None]) ->
return all(
[
not self.disabled,
not self.is_expired(response),
response.status in self.allowed_codes,
response.method in self.allowed_methods,
not self.is_expired(response),
self.filter_fn(response),
]
)
Expand Down
4 changes: 2 additions & 2 deletions aiohttp_client_cache/backends/sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import pickle
import sqlite3
from contextlib import asynccontextmanager
from typing import AsyncIterator, Iterable, Optional
from typing import AsyncIterator, Iterable, Optional, Union

import aiosqlite

Expand Down Expand Up @@ -160,7 +160,7 @@ async def values(self) -> Iterable[ResponseOrKey]:
cur = await db.execute(f'SELECT value FROM `{self.table_name}`')
return [row[0] for row in await cur.fetchall()]

async def write(self, key: str, item: ResponseOrKey):
async def write(self, key: str, item: Union[ResponseOrKey, sqlite3.Binary]):
async with self.get_connection(autocommit=True) as db:
await db.execute(
f'INSERT OR REPLACE INTO `{self.table_name}` (key,value) VALUES (?,?)',
Expand Down
8 changes: 4 additions & 4 deletions aiohttp_client_cache/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,10 @@ async def _request(self, method: str, str_or_url: StrOrURL, **kwargs) -> AnyResp
if cached_response and not getattr(cached_response, 'is_expired', False):
return cached_response
else:
client_response = await super()._request(method, str_or_url, **kwargs)
await client_response.read()
await self.cache.save_response(cache_key, client_response)
return client_response
new_response = await super()._request(method, str_or_url, **kwargs)
await new_response.read()
await self.cache.save_response(cache_key, new_response)
return new_response

@contextmanager
def disable_cache(self):
Expand Down
96 changes: 96 additions & 0 deletions examples/preache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#!/usr/bin/env python3
"""A contrived example of using aiohttp + caching.
Fetches and caches the content of a given web page and all links found on that page
Usage::
./precache.py <url>
Example::
$ # Run twice and note stats before and after
$ ./precache.py https://www.nytimes.com
Found 102 links
Completed run in 6.195 seconds and cached 53.570 MB
$ ./precache.py https://www.nytimes.com
Found 102 links
Completed run in 0.436 seconds and cached 0.000 MB
"""
import asyncio
import re
import sys
import time
import urllib.parse
from contextlib import contextmanager
from os.path import getsize

from aiohttp_client_cache import CachedSession

CACHE_NAME = 'precache'
DEFAULT_URL = 'https://www.nytimes.com'
HREF_PATTERN = re.compile(r'href="(.*?)"')


async def precache_page_links(parent_url):
"""Fetch and cache the content of a given web page and all links found on that page"""
async with CachedSession(backend='sqlite', cache_name='precache') as session:
urls = await get_page_links(session, parent_url)

tasks = [asyncio.create_task(cache_url(session, url)) for url in urls]
responses = await asyncio.gather(*tasks)

return responses


async def get_page_links(session, url):
"""Get all links found in the HTML of the given web page"""
print(f'Finding all links on page: {url}')
links = set()
response = await session.get(url)
response.raise_for_status()
html = await response.text()

for link in HREF_PATTERN.findall(html):
try:
links.add(urllib.parse.urljoin(url, link))
except Exception as e:
print(f'Failed to add link: {link}')
print(e)

print(f'Found {len(links)} links')
return links


async def cache_url(session, url):
try:
return await session.get(url)
except Exception as e:
print(e)
return None


def get_cache_bytes():
"""Get the current size of the cache, in bytes"""
try:
return getsize(f'{CACHE_NAME}.sqlite')
except Exception:
return 0


@contextmanager
def measure_cache():
"""Measure time elapsed and size of added cache content"""
start_time = time.perf_counter()
start_bytes = get_cache_bytes()
yield

elapsed_time = time.perf_counter() - start_time
cached_bytes = (get_cache_bytes() - start_bytes) / 1024 / 1024
print(f'Completed run in {elapsed_time:0.3f} seconds and cached {cached_bytes:0.3f} MB')


if __name__ == "__main__":
parent_url = sys.argv[1] if len(sys.argv) > 1 else DEFAULT_URL
with measure_cache():
asyncio.run(precache_page_links(parent_url))

0 comments on commit ac18bdb

Please sign in to comment.