In [None]:
import requests
from bs4 import BeautifulSoup
import json

# Function to scrape books by genre
def scrape_books_by_genre(genre):
    url = f"https://www.goodreads.com/shelf/show/{genre}"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }

    # Send a GET request
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        print(f"Failed to fetch data for genre {genre}. Status code: {response.status_code}")
        return []

    # Parse the HTML response
    soup = BeautifulSoup(response.text, "html.parser")
    books = []

    # Extract book titles and links
    for book in soup.select(".bookTitle"):
        title = book.get_text(strip=True)
        link = "https://www.goodreads.com" + book['href']

        # Fetch book details by visiting the book page
        book_details = fetch_book_details(link,genre)
        books.append(book_details)

    return books

# Function to fetch details of a book from its page
def fetch_book_details(book_url,genre):
    response = requests.get(book_url)
    if response.status_code != 200:
        print(f"Failed to fetch book details. Status code: {response.status_code}")
        return {}

    # Parse the book page
    soup = BeautifulSoup(response.text, "html.parser")

    # Extract the JSON data from the script tag
    script_tag = soup.find("script", type="application/ld+json")
    if script_tag:
        json_data = json.loads(script_tag.string)

        # Extract book details from the JSON data
        title = json_data.get("name", "N/A")
        authors = [author["name"] for author in json_data.get("author", [])]
        publisher = json_data.get("publisher", "N/A")
        page_count = json_data.get("numberOfPages", "N/A")
        language = json_data.get("inLanguage", "N/A")
        category = json_data.get("genre", genre)
        thumbnail = json_data.get("image", "N/A")
        isbn = json_data.get("isbn", "N/A")

        # Store the book details in a dictionary
        book_details = {
            "title": title,
            "authors": authors,
            "publisher": publisher,
            "page_count": page_count,
            "language": language,
            "category": category,
            "thumbnail": thumbnail,
            "isbn": isbn,
            "link": book_url
        }

        return book_details
    else:
        print(f"Failed to find script tag with JSON data for {book_url}")
        return {}

# Save books by multiple genres to JSON
def save_books_by_genres(genres, output_file="goodreads_genres_books.json"):
    all_books = []
    for genre in genres:
        books = scrape_books_by_genre(genre)
        all_books.extend(books)

    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(all_books, f, ensure_ascii=False, indent=4)
    print(f"Saved {len(all_books)} books to {output_file}")

# Example usage
genres = ["classics", "science-fiction","travel", "mystery"]  # Add more genres as needed
save_books_by_genres(genres, output_file="goodreads_genres_books.json")
