In [None]:
from dotenv import load_dotenv
import os
import requests
import time

# Load environment variables from .env file
load_dotenv()

# Retrieve the key securely
API_KEY = os.getenv("GOOGLE_BOOKS_API_KEY")

if not API_KEY:
    raise ValueError("Google Books API key not found. Check your .env file.")

def get_page_count(title, author=None):
    query = f"intitle:{title}"
    if author:
        query += f"+inauthor:{author}"

    url = f"https://www.googleapis.com/books/v1/volumes?q={query}&key={API_KEY}"

    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        data = response.json()

        if 'items' in data:
            for item in data['items']:
                info = item.get('volumeInfo', {})
                if 'pageCount' in info:
                    return info['pageCount']
    except Exception as e:
        print(f"Error for {title}: {e}")
    return None


In [None]:
mask_missing_pages = bbe_clean['pages_clean'].isna()
subset = bbe_clean[mask_missing_pages].head(5)  # test with small batch first

for idx, row in subset.iterrows():
    pages = get_page_count(row['title'], row.get('author_clean'))
    if pages:
        bbe_clean.at[idx, 'pages_clean'] = pages
    time.sleep(1)  # respect API rate limit

In [None]:
new_missing = bbe_clean['pages_clean'].isna().sum()
print(f"Remaining missing pages: {new_missing}")

> Be mindful of Googleâ€™s free limit (1,000 requests/day per API key).