In [1]:
import requests
from bs4 import BeautifulSoup

def scrape_product_info(url):
    """Scrapes product information from the given URL using BeautifulSoup.

    Args:
        url (str): The URL of the website to scrape.

    Returns:
        list: A list of dictionaries containing product information,
              or an empty list if no products are found.
    """

    try:
        response = requests.get(url)
        print(response.status_code)
        response.raise_for_status()  # Raise an exception for non-200 status codes

        soup = BeautifulSoup(response.content, 'html.parser')

        # Improved product element selection based on potential class variations
        product_elements = soup.find_all('div', class_=lambda class_name: class_name and 's-product-grid' in class_name)

        products = []
        for product_element in product_elements:
            product_info = {}

            # Extract product name (considering variations in element structure)
            name_element = product_element.find('span', class_=lambda class_name: class_name and 'a-size-mini' in class_name)
            if name_element:
                product_info['Name'] = name_element.text.strip()
            else:
                name_element = product_element.find('h2', class_=lambda class_name: class_name and 'a-size-base' in class_name)
                if name_element:
                    product_info['Name'] = name_element.text.strip()

            # Extract price (considering variations in element structure and potential absence)
            price_element = product_element.find('span', class_=lambda class_name: class_name and 'a-price-whole' in class_name)
            if price_element:
                product_info['Price'] = price_element.text.strip()
            else:
                # Handle potential absence of price information gracefully
                product_info['Price'] = 'Not available'

            # Extract rating (considering variations in element structure and potential absence)
            rating_element = product_element.find('span', class_=lambda class_name: class_name and 'a-star-average' in class_name)
            if rating_element:
                rating_text = rating_element.get('aria-label')
                if rating_text:
                    try:
                        # Extract numerical rating from aria-label (e.g., "Out of 5 stars")
                        rating = float(rating_text.split()[0])
                        product_info['Rating'] = rating
                    except ValueError:
                        pass  # Handle potential non-numerical rating formats
            else:
                product_info['Rating'] = 'Not available'

            products.append(product_info)

        return products

    except requests.exceptions.RequestException as e:
        print(f"An error occurred while fetching the website: {e}")
        return []


if __name__ == '__main__':
    url = 'https://www.amazon.in/s?k=gaming+chair&ref=nb_sb_noss'
    products = scrape_product_info(url)

    if products:
        for i, product in enumerate(products, start=1):
            print(f"\nProduct {i}:")
            for key, value in product.items():
                print(f"{key}: {value}")
    else:
        print("No products found on this page.")

503
An error occurred while fetching the website: 503 Server Error: Service Unavailable for url: https://www.amazon.in/s?k=gaming+chair&ref=nb_sb_noss
No products found on this page.


In [25]:
requests.get('https://www.amazon.in/s?k=gaming+chair&ref=nb_sb_noss')

<Response [503]>

In [21]:
import requests
from bs4 import BeautifulSoup

# Step 2: Send an HTTP GET request to the URL
url = "https://www.flipkart.com/search?q=gaming%20chair&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off"
response = requests.get(url)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Step 3: Create a BeautifulSoup object and parse the HTML content
    soup = BeautifulSoup(response.text, 'html.parser')

    # Step 4: Find and extract the list of products, prices, and ratings
    products = soup.find_all('div', class_='s-title-instructions-bold')
    prices = soup.find_all('span', class_='a-offscreen')
    ratings = soup.find_all('span', class_='a-declarative')

    # Step 5: Organize the extracted information into a structured format
    product_info = []
    for i in range(len(products)):
        product_name = products[i].get_text(strip=True)
        product_price = prices[i].get_text(strip=True)
        product_rating = ratings[i].get_text(strip=True) if i < len(ratings) else 'N/A'

        product_info.append({
            'Name': product_name,
            'Price': product_price,
            'Rating': product_rating
        })

    # Step 6: Print the organized information for each product
    for i, product in enumerate(product_info, start=1):
        print(f"Product {i}:")
        print(f"Name: {product['Name']}")
        print(f"Price: {product['Price']}")
        print(f"Rating: {product['Rating']}")
        print()

else:
    print(f"Failed to retrieve the webpage. Status Code: {response.status_code}")
