In [1]:
import asyncio
import functools as fntools
import itertools as it
import sys

from collections.abc import Callable, Iterable, Iterator, Sequence, Set
from copy import deepcopy
from typing import assert_never, TypeVar, Union

from loguru import logger
from pydantic import BaseModel, ValidationError
from pydantic_extra_types.isbn import ISBN

import httpx
import polars as pl

In [2]:
logger.remove()
logger.add(
    sys.stderr,
    format="<green>{time}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level> | {extra}",
    colorize=True,  # This enables colors
)

1

In [3]:
T = TypeVar("T")

In [4]:
JSON = Union[None, int, str, bool, list["JSON"], dict[str, "JSON"]]

In [5]:
class Book(BaseModel):
    model_config = {
        "frozen": True,
        "extra": "ignore",
    }

    title: str
    authors: Set[str]
    publisher: str
    publish_date: str
    isbn: ISBN


class BookError:
    message: str

In [6]:
BOOKS_API = "https://www.googleapis.com/books/v1/volumes"
AUTHORS = ["William Shakespare", "George Orwell", "Aldous Huxley", "Hermann Hesse"]

aclient = httpx.AsyncClient()

In [7]:
def try_parse_volume_info(volume: JSON) -> Book | None:
    if (volume_info := volume.get("volumeInfo")) is None:
        return None

    try:
        ids = {
            id["type"]: id["identifier"]
            for id in volume_info.get("industryIdentifiers", [])
            if id.get("type", None) in {"ISBN_10", "ISBN_13"} and "identifier" in id
        }

        isbn = ids.get("ISBN_13", ids.get("ISBN_10", None))
        if isbn is None:
            logger.warning(
                "no identifiers",
                id=volume["id"],
                authors=volume_info.get("authors"),
            )
            return None

        return Book(
            title=volume_info.get("title"),
            authors=set(volume_info.get("authors")),
            publisher=volume_info.get("publisher"),
            publish_date=volume_info.get("publishedDate"),
            isbn=isbn,
        )
    except ValidationError as e:
        logger.error(e)
        return None

In [8]:
async def get_books_for_author(
    client: httpx.AsyncClient,
    author: str,
) -> Sequence[Book] | BookError:
    with logger.contextualize(query_author=author):
        response = await client.get(BOOKS_API, params={"q": f"inauthor:{author}"})
        if response.is_error:
            logger.error(response.text)
            return BookError(response.text)

        ret: list[Book] = []
        for volume in response.json().get("items", []):
            parsed = try_parse_volume_info(volume)
            if parsed is not None:
                ret.append(parsed)
        return ret


await get_books_for_author(aclient, AUTHORS[0])



[Book(title='HAMLET', authors=frozenset({'William Shakespeare'}), publisher='e-artnow', publish_date='2017-12-06', isbn='9788027237142'),
 Book(title='The Complete Works of William Shakespeare', authors=frozenset({'William Shakespeare'}), publisher='Simon and Schuster', publish_date='2014-10-01', isbn='9781626862760'),
 Book(title='Othello', authors=frozenset({'William Shakespeare'}), publisher='e-artnow', publish_date='2017-10-16', isbn='9788027223824'),
 Book(title='Oluja', authors=frozenset({'William Shakespeare'}), publisher='Bulaja naklada', publish_date='2011-05-30', isbn='9789533280714'),
 Book(title='The Poems & Sonnets of William Shakespeare', authors=frozenset({'William Shakespeare'}), publisher='Wordsworth Editions', publish_date='1994', isbn='9781853264160'),
 Book(title='William Shakespeare', authors=frozenset({'William Shakespeare'}), publisher='Barnes & Noble Publishing', publish_date='1989-05-19', isbn='9781586635565')]

In [9]:
def slice(src: Iterator[T], length: int | None = None) -> Iterable[Sequence[T]]:
    if length is None:
        return (tuple(jt) for jt in (src,))

    jt = iter(src)
    return iter(lambda: tuple(it.islice(jt, length)), ())


print(list(slice(list(range(10)), 1)))
print(list(slice(list(range(10)), 3)))
print(list(slice(list(range(10)), 10)))
print(list(slice(list(range(10)), 11)))

[(0,), (1,), (2,), (3,), (4,), (5,), (6,), (7,), (8,), (9,)]
[(0, 1, 2), (3, 4, 5), (6, 7, 8), (9,)]
[(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)]
[(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)]


In [10]:
async def batch_calls(
    client: httpx.AsyncClient,
    *,
    tasks: Iterator[Callable[[httpx.AsyncClient], Sequence[T]]],
    batch_size: int | None = None,
) -> Sequence[T]:
    if not tasks:
        return []

    if batch_size is None:
        batch_size = len(tasks)

    return [
        result
        for batch in slice(tasks, batch_size)
        for result in (await asyncio.gather(*(task(client) for task in batch)))
    ]


In [11]:
def create_get_books_task(author: str):
    @fntools.wraps(get_books_for_author)
    async def _task(client: httpx.Client):
        return await get_books_for_author(client, author)
    return _task


tasks = [
    {
        "invoker": create_get_books_task(author),
        "author": deepcopy(author),
    }
    for author in AUTHORS
]

for task in tasks:
    match task:
        case {"invoker": invoker, "author": author}:
            print(author)
            _ = await invoker(aclient)
        case _:
            assert_never(_)

William Shakespare




George Orwell




Aldous Huxley




Hermann Hesse




In [12]:
books = [
    book
    for batch in await batch_calls(
        aclient,
        tasks=map(create_get_books_task, AUTHORS),
        batch_size=2,
    )
    for book in batch
]



In [13]:
df = pl.from_dicts(
    (lambda m: {**m, "authors": ", ".join(m["authors"])})(book.model_dump())
    for book in books
)

df.unique()

title,authors,publisher,publish_date,isbn
str,str,str,str,str
"""The Seasons of the Soul""","""Hermann Hesse""","""North Atlantic Books""","""2011-10-11""","""9781583943410"""
"""The Fairy Tales of Hermann Hes…","""Hermann Hesse""","""Bantam""","""2009-09-30""","""9780307420510"""
"""Hermann Hesse""","""Hermann Hesse""","""Farrar, Straus and Giroux""","""2013-01-22""","""9781466835085"""
"""The Complete Works of William …","""William Shakespeare""","""Simon and Schuster""","""2014-10-01""","""9781626862760"""
"""HAMLET""","""William Shakespeare""","""e-artnow""","""2017-12-06""","""9788027237142"""
…,…,…,…,…
"""Oluja""","""William Shakespeare""","""Bulaja naklada""","""2011-05-30""","""9789533280714"""
"""Othello""","""William Shakespeare""","""e-artnow""","""2017-10-16""","""9788027223824"""
"""The George Orwell Collection""","""George Orwell""","""Birlinn Ltd""","""2023-04-01""","""9781788856348"""
"""The Poems & Sonnets of William…","""William Shakespeare""","""Wordsworth Editions""","""1994""","""9781853264160"""
