In [2]:
import datetime
today = datetime.datetime.now()
print(today)

2025-05-04 14:16:04.229726


In [3]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

# Headers to mimic browser request
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
                  " AppleWebKit/537.36 (KHTML, like Gecko)"
                  " Chrome/91.0.4472.124 Safari/537.36"
}

# Rating conversion dictionary
RATING_MAP = {
    'One': 1,
    'Two': 2,
    'Three': 3,
    'Four': 4,
    'Five': 5
}

def fetch_page(url):
    """Fetch the HTML content of a page."""
    response = requests.get(url, headers=HEADERS)
    if response.status_code != 200:
        print(f"Failed to fetch page: {url}")
        return None
    return BeautifulSoup(response.text, 'html.parser')

def extract_product_links(soup, base_url):
    """Extract product detail page links from the main page."""
    links = []
    products = soup.find_all('article', class_='product_pod')
    for product in products:
        relative_url = product.h3.a['href']
        full_url = urljoin(base_url, relative_url)
        links.append(full_url)
    return links

def extract_product_info(product_url):
    """Extract title, price, rating, reviews, and availability from the product page."""
    soup = fetch_page(product_url)
    if not soup:
        return None

    # Title
    title = soup.find('div', class_='product_main').h1.text.strip()

    # Price
    price = soup.find('p', class_='price_color').text.strip()

    # Rating
    rating_tag = soup.find('p', class_='star-rating')
    rating_class = [cls for cls in rating_tag['class'] if cls != 'star-rating'][0]
    rating = RATING_MAP.get(rating_class, 0)

    # Reviews
    reviews = 0
    table = soup.find('table', class_='table table-striped')
    for row in table.find_all('tr'):
        if row.th.text.strip() == "Number of reviews":
            reviews = int(row.td.text.strip())
            break

    # Availability
    availability_tag = soup.find('p', class_='instock availability')
    availability = availability_tag.text.strip() if availability_tag else "Unknown"

    return {
        'title': title,
        'price': price,
        'rating': rating,
        'reviews': reviews,
        'availability': availability
    }

def display_product(product):
    """Print product details in the required format."""
    print(f"Title: {product['title']}")
    print(f"Price: {product['price']}")
    print(f"Rating: {product['rating']} stars")
    print(f"Reviews: {product['reviews']}")
    print(f"Availability: {product['availability']}")
    print()

def main():
    base_url = "http://books.toscrape.com/"
    homepage_soup = fetch_page(base_url)
    
    if homepage_soup:
        product_links = extract_product_links(homepage_soup, base_url)
        for link in product_links:
            product = extract_product_info(link)
            if product:
                display_product(product)

# Run the script
if __name__ == "__main__":
    main()

Title: A Light in the Attic
Price: Â£51.77
Rating: 3 stars
Reviews: 0
Availability: In stock (22 available)

Title: Tipping the Velvet
Price: Â£53.74
Rating: 1 stars
Reviews: 0
Availability: In stock (20 available)

Title: Soumission
Price: Â£50.10
Rating: 1 stars
Reviews: 0
Availability: In stock (20 available)

Title: Sharp Objects
Price: Â£47.82
Rating: 4 stars
Reviews: 0
Availability: In stock (20 available)

Title: Sapiens: A Brief History of Humankind
Price: Â£54.23
Rating: 5 stars
Reviews: 0
Availability: In stock (20 available)

Title: The Requiem Red
Price: Â£22.65
Rating: 1 stars
Reviews: 0
Availability: In stock (19 available)

Title: The Dirty Little Secrets of Getting Your Dream Job
Price: Â£33.34
Rating: 4 stars
Reviews: 0
Availability: In stock (19 available)

Title: The Coming Woman: A Novel Based on the Life of the Infamous Feminist, Victoria Woodhull
Price: Â£17.93
Rating: 3 stars
Reviews: 0
Availability: In stock (19 available)

Title: The Boys in the Boat: Nine Amer

In [4]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import os

# Headers to mimic browser request
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
                  " AppleWebKit/537.36 (KHTML, like Gecko)"
                  " Chrome/91.0.4472.124 Safari/537.36"
}

# Rating conversion dictionary
RATING_MAP = {
    'One': 1,
    'Two': 2,
    'Three': 3,
    'Four': 4,
    'Five': 5
}

def fetch_page(url):
    """Fetch the HTML content of a page."""
    response = requests.get(url, headers=HEADERS)
    if response.status_code != 200:
        print(f"Failed to fetch page: {url}")
        return None
    return BeautifulSoup(response.text, 'html.parser')

def extract_product_links(soup, base_url):
    """Extract product detail page links from the main page."""
    links = []
    products = soup.find_all('article', class_='product_pod')
    for product in products:
        relative_url = product.h3.a['href']
        full_url = urljoin(base_url, relative_url)
        links.append(full_url)
    return links

def extract_product_info(product_url):
    """Extract title, price, rating, reviews, and availability from the product page."""
    soup = fetch_page(product_url)
    if not soup:
        return None

    # Title
    title = soup.find('div', class_='product_main').h1.text.strip()

    # Price
    price = soup.find('p', class_='price_color').text.strip()

    # Rating
    rating_tag = soup.find('p', class_='star-rating')
    rating_class = [cls for cls in rating_tag['class'] if cls != 'star-rating'][0]
    rating = RATING_MAP.get(rating_class, 0)

    # Reviews
    reviews = 0
    table = soup.find('table', class_='table table-striped')
    for row in table.find_all('tr'):
        if row.th.text.strip() == "Number of reviews":
            reviews = int(row.td.text.strip())
            break

    # Availability
    availability_tag = soup.find('p', class_='instock availability')
    availability = availability_tag.text.strip() if availability_tag else "Unknown"

    return {
        'title': title,
        'price': price,
        'rating': rating,
        'reviews': reviews,
        'availability': availability
    }

def save_to_file(data, filename="products.txt"):
    """Save the extracted product data to a text file."""
    file_path = os.path.abspath(filename)  # Get the absolute path of the file
    with open(filename, mode='w', encoding='utf-8') as file:
        for product in data:
            file.write(f"Title: {product['title']}\n")
            file.write(f"Price: {product['price']}\n")
            file.write(f"Rating: {product['rating']} stars\n")
            file.write(f"Reviews: {product['reviews']}\n")
            file.write(f"Availability: {product['availability']}\n")
            file.write("\n")  # Add a blank line between products
    print(f"Product data has been saved successfully to '{file_path}'.")

def main():
    base_url = "http://books.toscrape.com/"
    homepage_soup = fetch_page(base_url)
    
    if homepage_soup:
        product_links = extract_product_links(homepage_soup, base_url)
        product_data = []
        for link in product_links:
            product = extract_product_info(link)
            if product:
                product_data.append(product)

        # Save data to a text file and confirm
        save_to_file(product_data, "products.txt")

# Run the script
if __name__ == "__main__":
    main()

Product data has been saved successfully to '/home/adnanmalik/code/AI-CV-Lab/experiment7/products.txt'.
