In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from urllib.parse import urljoin 

def scrape_books(min_rating, max_price):
    url = 'https://books.toscrape.com/'
    response = requests.get(url)
    
    soup = BeautifulSoup(response.content, 'html.parser')
    grid = soup.find('ol', attrs={'class':'row'})
    books_main = grid.find_all('li', attrs={'class': 'col-xs-6 col-sm-4 col-md-3 col-lg-3'})

    def get_price(book):
        book_price = book.find('p', attrs={'class': 'price_color'}).get_text().strip().replace('£', '')
        return float(book_price)

    def get_rating(book):
        rating_mapping = {
            'One': 1,
            'Two': 2,
            'Three': 3,
            'Four': 4,
            'Five': 5
        }
        rating_tag = book.find('p', class_='star-rating')
        if rating_tag:
            classes = rating_tag['class']
            rating_text = classes[1]
            return rating_mapping.get(rating_text, None)
        return None

    def get_stock(book):
        book_available = book.find('i', class_='icon-ok')
        return 'In Stock' if book_available else 'No'

    def get_url(book):
        domain = 'https://books.toscrape.com'
        url = book.find('a')['href']
        full_url = urljoin(domain, url)  
        return full_url

    def get_upc(book_soup):
        upc_tag = book_soup.find('th', string='UPC')
        if upc_tag:
            upc_value = upc_tag.find_next_sibling('td').text.strip()
            return upc_value
        return None

    def get_title(book_soup):
        title_tag = book_soup.find('h1')
        return title_tag.text.strip() if title_tag else None

    def get_genre(book_soup):
        breadcrumb_links = book_soup.select('ul.breadcrumb a')
        if len(breadcrumb_links) > 1:
            genre_tag = breadcrumb_links[-1]
            genre = genre_tag.text.strip()
            return genre
        return None

    def get_description(book_soup):
        description_tag = book_soup.find('div', id='product_description')
        if description_tag:
            description_para = description_tag.find_next_sibling('p')
            return description_para.text.strip() if description_para else 'No description available'
        return 'No description available'

    def get_detail_data(book_soup):
        upc = get_upc(book_soup)
        title = get_title(book_soup)
        genre = get_genre(book_soup)
        description = get_description(book_soup)
        return upc, title, genre, description

    # Create a dictionary to store book details
    books_data = {}
    index = 0

    # Loop through each book element on the main page
    for book in books_main:
        price = get_price(book)
        rating = get_rating(book)

        # Apply filtering conditions
        if rating is not None and price is not None and rating >= min_rating and price <= max_price:
            stock = get_stock(book)
            book_url = get_url(book)
            
            # Fetch details from the detail page
            detail_response = requests.get(book_url)
            book_soup = BeautifulSoup(detail_response.content, 'html.parser')
            
            upc, title, genre, description = get_detail_data(book_soup)

           
            books_data[index] = {
                'Title': title,
                'Genre': genre,
                'Rating': rating,
                'Price (£)': price,
                'UPC': upc,
                'Availability': stock,
                'Description': description
            }
            index += 1

 
    df_books = pd.DataFrame.from_dict(books_data, orient='index')

    return df_books

df = scrape_books(4.0, 20.0)
display(df)

Unnamed: 0,Title,Genre,Rating,Price (£),UPC,Availability,Description
0,Set Me Free,Young Adult,5,17.46,ce6396b0f23f6ecc,In Stock,Aaron Ledbetter’s future had been planned out ...
