In [1]:
import requests
from bs4 import BeautifulSoup
import csv
import time

# Base URL for Amazon search results
BASE_URL = "https://www.amazon.com/s"

# Headers to mimic a browser
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.9",
}

def fetch_page(url, params):
    """Fetch the page content."""
    response = requests.get(url, headers=HEADERS, params=params)
    if response.status_code == 200:
        return BeautifulSoup(response.content, "html.parser")
    else:
        print(f"Failed to fetch page: {response.status_code}")
        return None

def parse_products(soup):
    """Parse product data from a BeautifulSoup object."""
    products = soup.find_all("div", {"data-component-type": "s-search-result"})
    product_data = []

    for product in products:
        # Extract product title
        title = product.h2.text.strip() if product.h2 else "No title available"

        # Extract product price
        price_whole = product.find("span", class_="a-price-whole")
        price_fraction = product.find("span", class_="a-price-fraction")
        price = f"{price_whole.text}.{price_fraction.text}" if price_whole and price_fraction else "Price not listed"

        # Extract product rating
        rating = product.find("span", class_="a-icon-alt")
        rating = rating.text.strip() if rating else "No rating"

        # Extract product link
        link = product.h2.a["href"] if product.h2 and product.h2.a else "No link available"
        full_link = f"https://www.amazon.com{link}" if link != "No link available" else link

        product_data.append([title, price, rating, full_link])

    return product_data

def scrape_amazon_iphones(output_file, max_pages=5):
    """Scrape Amazon iPhones and save the data to a CSV."""
    # Open CSV file
    with open(output_file, "w", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)
        # Write the header row
        writer.writerow(["Title", "Price", "Rating", "Link"])

        # Iterate through pages
        for page in range(1, max_pages + 1):
            print(f"Fetching page {page}...")
            params = {"k": "iphone", "page": page}
            soup = fetch_page(BASE_URL, params)

            if soup:
                products = parse_products(soup)
                writer.writerows(products)
            else:
                print(f"Stopping due to fetch error on page {page}.")
                break

            # Delay between requests to avoid blocking
            time.sleep(2)

    print(f"Data saved to {output_file}")

# Call the function to scrape and save data
scrape_amazon_iphones("iphones.csv", max_pages=5)


Fetching page 1...
Fetching page 2...
Fetching page 3...
Fetching page 4...
Fetching page 5...
Data saved to iphones.csv


In [None]:
#https://github.com/mariam530/my_assignments