In [1]:
import requests
import time
import random
import pandas as pd

all_books = []
max_results = 40          # Max allowed per request
total_books_needed = 1000 # Total number of books to retrieve

base_url = "https://www.googleapis.com/books/v1/volumes"

# Loop over startIndex values (0, 40, 80, ..., 960)
for start_index in range(0, total_books_needed, max_results):
    params = {
        "q": "a",  # Broad query to get many books
        "startIndex": start_index,
        "maxResults": max_results
    }
    response = requests.get(base_url, params=params)
    if response.status_code != 200:
        print(f"Error fetching data at startIndex {start_index}: {response.status_code}")
        break
    data = response.json()
    items = data.get("items", [])
    
    for item in items:
        volume_info = item.get("volumeInfo", {})
        title = volume_info.get("title", "N/A")
        authors = volume_info.get("authors", ["N/A"])
        author = authors[0] if authors else "N/A"  # Taking the first author
        
        avg_rating = volume_info.get("averageRating", None)
        ratings_count = volume_info.get("ratingsCount", 0)
        try:
            ratings_count = int(ratings_count)
        except Exception:
            ratings_count = 0
        
        categories = volume_info.get("categories", ["N/A"])
        category = categories[0] if categories else "N/A"
        
        published_date = volume_info.get("publishedDate", "N/A")
        # Extract the year (first 4 characters) if possible
        year = published_date[:4] if published_date != "N/A" else "N/A"
        
        all_books.append({
            "Title": title,
            "Author": author,
            "Avg_Rating": avg_rating,
            "Num_Ratings": ratings_count,
            "Category": category,
            "Year": year
        })
    
    time.sleep(random.uniform(1, 4))

# Compute popularity rank based on Num_Ratings (largest ratingsCount gets rank 1)
sorted_books = sorted(all_books, key=lambda x: x["Num_Ratings"] if x["Num_Ratings"] is not None else 0, reverse=True)
for rank, book in enumerate(sorted_books, start=1):
    book["Popularity_Rank"] = rank

# Create a DataFrame with the desired column order
df = pd.DataFrame(sorted_books)
df = df[["Popularity_Rank", "Title", "Author", "Avg_Rating", "Num_Ratings", "Category", "Year"]]

print(df.head(10))


   Popularity_Rank                                              Title  \
0                1                      Sapiens. De animales a dioses   
1                2                                     Luna de Plutón   
2                3                            Culpa mía (Culpables 1)   
3                4                                     Luna de Plutón   
4                5                         Cómo entender a los chicos   
5                6                                 Revista Vochomanía   
6                7                     ¿A quién estás pensando matar?   
7                8                 Report on the meteorology of India   
8                9                                      Tan poca vida   
9               10  Los Juegos del Hambre 4 - Balada de pájaros ca...   

                             Author  Avg_Rating  Num_Ratings  \
0                 Yuval Noah Harari         4.0           13   
1                             Dross         3.5           12   
2   

In [3]:
display(df)

Unnamed: 0,Popularity_Rank,Title,Author,Avg_Rating,Num_Ratings,Category,Year
0,1,Sapiens. De animales a dioses,Yuval Noah Harari,4.0,13,Social Science,2014
1,2,Luna de Plutón,Dross,3.5,12,Juvenile Nonfiction,2015
2,3,Culpa mía (Culpables 1),Mercedes Ron,4.5,12,Young Adult Fiction,2017
3,4,Luna de Plutón,Dross,3.5,12,Juvenile Nonfiction,2015
4,5,Cómo entender a los chicos,Cristina Alemany,4.0,6,Interpersonal relations in adolescence,2006
...,...,...,...,...,...,...,...
315,316,New York Magazine,,,0,,1997
316,317,Mi Cartilla Fonética,Ediciones Norte,,0,Alfabeto,2009
317,318,Spy,,,0,,1990
318,319,New York Magazine,,,0,,1969
