In [3]:
import requests
import json

def get_books(pages=5, limit=100, output_file="OpenLibraryBooks.json"):
    all_books = []  # List to store book data
    base_url = "https://openlibrary.org/search.json"

    for page in range(1, pages + 1):
        params = {
            "q": "fiction",  # Change query to something general like "fiction"
            "limit": limit,
            "page": page
        }
        
        response = requests.get(base_url, params=params)

        if response.status_code == 200:
            data = response.json()
            books = data.get("docs", [])

            #print(f"Fetching Page {page} - {len(books)} books found")

            for book in books:
                book_data = {
                    "title": book.get("title", "Unknown Title"),
                    "author": book.get("author_name", ["Unknown Author"])[0],
                    "year": book.get("first_publish_year", "Unknown Year"), 
                    "isbn": book.get("isbn", ["N/A"])[0],  # Get first ISBN if available else return N/A
                }
                all_books.append(book_data)
        else:
            print(f"Error fetching page {page}. Status Code: {response.status_code}")
            print("Retrying with a different query...")
            break  # Stop fetching if there is an error

    # Save books to a JSON file
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(all_books, f, indent=4, ensure_ascii=False)

    #print(f"\n Saved {len(all_books)} books to {output_file}")

# Run the function to fetch books and save to JSON
get_books(pages=5, limit=100, output_file="OpenLibraryBooks.json")


# update the following code to get the following info about the books
# "Title” "Authors" "Publisher” "Page Count” "Language” "Category" "Thumbnail" "ISBN_13” "ISBN_10"

In [5]:
import requests
import json

def get_books(genres, pages=5, limit=100, output_file="NewOpenLibraryBooks.json"):
    all_books = []  # List to store book data
    base_url = "https://openlibrary.org/search.json"

    for genre in genres:
        print(f"\n📚 Fetching books for genre: {genre}")
        
        for page in range(1, pages + 1):
            params = {
                "q": genre,  # Search books by genre
                "limit": limit,
                "page": page
            }
            
            response = requests.get(base_url, params=params)

            if response.status_code == 200:
                data = response.json()
                books = data.get("docs", [])

                for book in books:
                    book_data = {
                        "Genre": genre,
                        "Title": book.get("title", "Unknown Title"),
                        "Authors": book.get("author_name", ["Unknown Author"]),
                        "Publisher": book.get("publisher", ["Unknown Publisher"]),#[0] if "publisher" in book else "Unknown Publisher",
                        "Pages": book.get("pages", "Unknown Page Count"),
                        "Language": book.get("language", ["Unknown Language"])[0] if "language" in book else "Unknown Language",
                        "Category": book.get("subject", ["Unknown Category"])[0] if "subject" in book else "Unknown Category",
                        "Thumbnail": f"https://covers.openlibrary.org/b/id/{book['cover_i']}-L.jpg" if "cover_i" in book else "No Thumbnail Available",
                        "ISBN": next((isbn for isbn in book.get("isbn", [])), "N/A"),
                        "ISBN 10": next((isbn for isbn in book.get("isbn 10", [])), "N/A"),
                    }
                    all_books.append(book_data)

                print(f"✅ Fetched {len(books)} books from page {page} for {genre}")
            else:
                print(f"❌ Error fetching page {page} for {genre}. Status Code: {response.status_code}")
                break  # Stop fetching if there is an error

    # Save books to a JSON file
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(all_books, f, indent=4, ensure_ascii=False)

    print(f"\n✅ Saved {len(all_books)} books to {output_file}")

# List of genres to fetch
genres_list = ["fiction", "mystery", "fantasy", "science fiction", "romance"]

# Fetch 500 books per genre (5 pages * 100 books per page)
get_books(genres=genres_list, pages=5, limit=100, output_file="NewOpenLibraryBooks.json")



📚 Fetching books for genre: fiction
✅ Fetched 100 books from page 1 for fiction
✅ Fetched 100 books from page 2 for fiction
✅ Fetched 100 books from page 3 for fiction
✅ Fetched 100 books from page 4 for fiction
✅ Fetched 100 books from page 5 for fiction

📚 Fetching books for genre: mystery
✅ Fetched 100 books from page 1 for mystery
✅ Fetched 100 books from page 2 for mystery
✅ Fetched 100 books from page 3 for mystery
✅ Fetched 100 books from page 4 for mystery
✅ Fetched 100 books from page 5 for mystery

📚 Fetching books for genre: fantasy
✅ Fetched 100 books from page 1 for fantasy
✅ Fetched 100 books from page 2 for fantasy
✅ Fetched 100 books from page 3 for fantasy
✅ Fetched 100 books from page 4 for fantasy
✅ Fetched 100 books from page 5 for fantasy

📚 Fetching books for genre: science fiction
✅ Fetched 100 books from page 1 for science fiction
✅ Fetched 100 books from page 2 for science fiction
✅ Fetched 100 books from page 3 for science fiction
✅ Fetched 100 books from pag