In [72]:
import requests
import time
from datetime import datetime, timedelta
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from scipy import sparse
from concurrent.futures import ThreadPoolExecutor, as_completed
from functools import lru_cache

API_KEY = "fh5hj47dynk4nvx4s9ewufj4"
BASE = "https://api.penguinrandomhouse.com/resources/v2/title/domains/PRH.US/works"


session = requests.Session()

In [69]:
def fetch_page(start, rows=50):
    params = {
        "api_key": API_KEY,
        "suppressLinks": "true",
        "suppressRecordCount": "true",
        "preferLanguage": "E",
        "showNewReleases": "true",
        "showComingSoon": "false",
        "showPublishedBooks": "true",
        "formatFamily": "Paperback",
        "showFlapCopy": "true",
        "ageRangeMax": "18",
        "onSaleFrom": "10/01/2025",
        "start": str(start),
        "rows": str(rows)
    }

    r = session.get(BASE, params=params, timeout=20)

    print("STATUS:", r.status_code)
    print("RAW:", r.text[:300])  # inspect what PRH sends
    print("REQUEST:", r.url)

    r.raise_for_status()
    return r.json()

In [67]:
def collect_titles(max_titles=500, page_size=50):
    texts = []
    isbns = []
    start = 0

    while len(texts) < max_titles:
        data = fetch_page(start, rows=page_size)
        works = data.get("data", {}).get("works", [])

        if not works:
            break

        for w in works:
            flap = w.get("flapCopy")
            isbn = w.get("isbnStr") or str(w.get("isbn"))

            if flap:
                texts.append(flap.strip())
                isbns.append(isbn)

                if len(texts) >= max_titles:
                    break

        start += page_size
        time.sleep(0.25)

    return isbns, texts

In [73]:
isbns, texts = collect_titles(max_titles=10)

print("Collected:", len(texts))
print("Example snippet:", texts[0][:300])


STATUS: 200
RAW: {"status":"ok","recordCount":null,"startTimestamp":"2025-11-18T15:15:53Z","endTimestamp":"2025-11-18T15:15:53Z","timeTaken":120,"data":{"works":[{"workId":258548,"title":"Double Puppy Trouble","author":"Danica McKellar; illustrated by Josée Masse","onsale":"2025-11-04","language":"E","seoFriendlyUrl
REQUEST: https://api.penguinrandomhouse.com/resources/v2/title/domains/PRH.US/works?api_key=fh5hj47dynk4nvx4s9ewufj4&suppressLinks=true&suppressRecordCount=true&preferLanguage=E&showNewReleases=true&showComingSoon=false&showPublishedBooks=true&formatFamily=Paperback&showFlapCopy=true&ageRangeMax=18&onSaleFrom=10%2F01%2F2025&start=0&rows=50
STATUS: 200
RAW: {"status":"ok","recordCount":null,"startTimestamp":"2025-11-18T15:15:54Z","endTimestamp":"2025-11-18T15:15:54Z","timeTaken":93,"data":{"works":[{"workId":768022,"title":"History Smashers: Ancient Egypt","author":"Kate Messner","onsale":"2025-10-07","language":"E","seoFriendlyUrl":"/books/768022/hist
REQUEST: https://api.p

HTTPError: 404 Client Error: Not Found for url: https://api.penguinrandomhouse.com/resources/v2/title/domains/PRH.US/works?api_key=fh5hj47dynk4nvx4s9ewufj4&suppressLinks=true&suppressRecordCount=true&preferLanguage=E&showNewReleases=true&showComingSoon=false&showPublishedBooks=true&formatFamily=Paperback&showFlapCopy=true&ageRangeMax=18&onSaleFrom=10%2F01%2F2025&start=200&rows=50

In [74]:
fetch_page(0, 10)

STATUS: 200
RAW: {"status":"ok","recordCount":null,"startTimestamp":"2025-11-18T15:16:08Z","endTimestamp":"2025-11-18T15:16:09Z","timeTaken":116,"data":{"works":[{"workId":258548,"title":"Double Puppy Trouble","author":"Danica McKellar; illustrated by Josée Masse","onsale":"2025-11-04","language":"E","seoFriendlyUrl
REQUEST: https://api.penguinrandomhouse.com/resources/v2/title/domains/PRH.US/works?api_key=fh5hj47dynk4nvx4s9ewufj4&suppressLinks=true&suppressRecordCount=true&preferLanguage=E&showNewReleases=true&showComingSoon=false&showPublishedBooks=true&formatFamily=Paperback&showFlapCopy=true&ageRangeMax=18&onSaleFrom=10%2F01%2F2025&start=0&rows=10


{'status': 'ok',
 'recordCount': None,
 'startTimestamp': '2025-11-18T15:16:08Z',
 'endTimestamp': '2025-11-18T15:16:09Z',
 'timeTaken': 116,
 'data': {'works': [{'workId': 258548,
    'title': 'Double Puppy Trouble',
    'author': 'Danica McKellar; illustrated by Josée Masse',
    'onsale': '2025-11-04',
    'language': 'E',
    'seoFriendlyUrl': '/books/258548/double-puppy-trouble-by-danica-mckellar-illustrated-by-josee-masse',
    'contribRoleCode': None,
    'contribRoleDesc': None,
    'seriesNumber': None,
    'firstOnsale': '2022-07-26',
    'isbnCounts': {'variant': 0, 'format': 4},
    '_embeds': None,
    '_links': []},
   {'workId': 262934,
    'title': 'Happy Times in Noisy Village',
    'author': 'Astrid Lindgren; illustrated by Mini Grey',
    'onsale': '2025-11-04',
    'language': 'E',
    'seoFriendlyUrl': '/books/262934/happy-times-in-noisy-village-by-astrid-lindgren-illustrated-by-mini-grey',
    'contribRoleCode': None,
    'contribRoleDesc': None,
    'seriesNumber