<a href="https://colab.research.google.com/github/shravya-05/Book-Scraping-Project/blob/initial-error-handling/errorhandling1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
import requests
import logging
from bs4 import BeautifulSoup
import pandas as pd

In [14]:
# Set up logging to keep track of issues
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")


In [15]:
# Function to fetch page content with error handling
def fetch_page(url):
    try:
        response = requests.get(url, timeout=10)  # Don’t wait forever if site is slow
        response.raise_for_status()  # Check if request was successful
        return response.text
    except requests.exceptions.RequestException as e:
        logging.error(f"Couldn’t fetch {url}, moving on... Error: {e}")
        return None  # Skip this page if it fails


In [16]:
# Convert rating from text to number (because "Three stars" isn’t useful in numbers)
def convert_rating(text_rating):
    ratings = {"One": 1, "Two": 2, "Three": 3, "Four": 4, "Five": 5}
    return ratings.get(text_rating, None)  # If we don’t recognize the rating, return None


In [17]:
# Function to extract book details, making sure nothing important is missing
def process_books(book_list, books):
    for book in book_list:
        try:
            title = book.find("h3").find("a")["title"]
            price_text = book.find("p", class_="price_color").text.strip()  # Get the price text
            price = float(price_text.replace("£", ""))  # Remove currency symbol before converting
            rating = convert_rating(book.find("p", class_="star-rating")["class"][1])
            availability = book.find("p", class_="instock availability").text.strip()

            # If key info is missing, skip this book
            if not title or price is None or rating is None:
                logging.warning(f"Skipping '{title}' because some details are missing")
                continue  # Move on to the next book

            books.append({"Title": title, "Price": price, "Rating": rating, "Availability": availability})
        except Exception as e:
            logging.error(f"Something went wrong while processing a book: {e}")

In [18]:
# Scrape multiple pages until there are no more books left
def scrape():
    books = []
    page = 1

    while True:
        url = f"http://books.toscrape.com/catalogue/page-{page}.html"
        page_content = fetch_page(url)
        if page_content is None:
            break  # Stop if the page request keeps failing

        soup = BeautifulSoup(page_content, "html.parser")
        book_list = soup.find_all("article", class_="product_pod")

        if not book_list:
            break  # No more books, time to stop

        process_books(book_list, books)
        page += 1  # Move to the next page

    save_to_csv(books)


In [19]:
# Save book data into a CSV file so we don’t lose it
def save_to_csv(books):
    if books:
        pd.DataFrame(books).to_csv("books_data.csv", index=False)
        logging.info("Saved book data to CSV successfully!")

# Run the scraper (let’s go!)
scrape()

ERROR:root:Something went wrong while processing a book: could not convert string to float: 'Â51.77'
ERROR:root:Something went wrong while processing a book: could not convert string to float: 'Â53.74'
ERROR:root:Something went wrong while processing a book: could not convert string to float: 'Â50.10'
ERROR:root:Something went wrong while processing a book: could not convert string to float: 'Â47.82'
ERROR:root:Something went wrong while processing a book: could not convert string to float: 'Â54.23'
ERROR:root:Something went wrong while processing a book: could not convert string to float: 'Â22.65'
ERROR:root:Something went wrong while processing a book: could not convert string to float: 'Â33.34'
ERROR:root:Something went wrong while processing a book: could not convert string to float: 'Â17.93'
ERROR:root:Something went wrong while processing a book: could not convert string to float: 'Â22.60'
ERROR:root:Something went wrong while processing a book: could not convert string to float: