# New Section

In [None]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import time

# Base Amazon URL for searching Apple laptop bags
base_url = "https://www.amazon.in/s?k=apple+laptop+bags&page={}"

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.9",
    "Referer": "https://www.amazon.in/"
}

# Number of pages to scrape
max_pages = 5  # Adjust as needed

# Lists to store extracted data
product_links = []
product_ids, brands_list, product_names, prices_list = [], [], [], []
ratings_list, reviews_list, features_list, category_list, image_urls = [], [], [], [], []

# Function to extract details from each product page
def scrape_product_page(url):
    try:
        r = requests.get(url, headers=headers)
        soup = BeautifulSoup(r.text, "lxml")

        # Extract product title
        title = soup.find("span", id="productTitle")
        product_name = title.text.strip() if title else "N/A"

        # Extract price
        price = soup.find("span", class_="a-price-whole")
        product_price = price.text.strip() if price else "N/A"

        # Extract rating
        rating = soup.find("span", class_="a-icon-alt")
        product_rating = rating.text.split()[0] if rating else "N/A"

        # Extract review count
        reviews = soup.find("span", id="acrCustomerReviewText")
        product_reviews = reviews.text.split()[0] if reviews else "N/A"

        # Extract features (bullet points)
        specification_section = soup.find("div", class_="a-expander-content a-expander-section-content a-section-expander-inner")
        specification = specification_section.text.strip().replace("\n", " ").replace("\u200e", "") if specification_section else "N/A"

        # Extract Product ID (ASIN)
        product_id_element = soup.find("th", string=lambda x: x and "ASIN" in x)
        product_id = product_id_element.find_next_sibling("td").text.strip() if product_id_element else "N/A"

        # Extract Brand Name
        brand_section = soup.find("span", string="Brand")
        brand = brand_section.find_next("span").text.strip() if brand_section else "N/A"

        # Category (custom category for filtering)
        category = "Laptop Bag"

        # Extract Image URL
        image_tag = soup.find("img", id="landingImage")
        image_url = image_tag["src"] if image_tag else "N/A"

        return product_name, product_price, product_rating, product_reviews, specification, product_id, category, brand, image_url

    except Exception as e:
        print(f"⚠ Error scraping {url}: {e}")
        return "N/A", "N/A", "N/A", "N/A", "N/A", "N/A", "N/A", "N/A", "N/A"

# Scrape Amazon search results for product URLs
for page in range(1, max_pages + 1):
    print(f"🔎 Scraping search results from page {page}...")
    url = base_url.format(page)
    r = requests.get(url, headers=headers)
    soup = BeautifulSoup(r.text, "lxml")

    # Extract product containers correctly
    product_containers = soup.find_all("div", {"data-component-type": "s-search-result"})

    if not product_containers:
        print(f"⚠ No data found on page {page}. Stopping.")
        break  # Stop if no products found (last page reached)

    # Extract product links
    for product in product_containers:
        link_tag = product.find("a", class_="a-link-normal s-no-outline")
        if link_tag:
            product_link = "https://www.amazon.in" + link_tag["href"]
            if product_link not in product_links:  # Avoid duplicates
                product_links.append(product_link)

    time.sleep(2)  # Delay to prevent blocking

# Scrape details from each extracted product page
for link in product_links:
    print(f"🔗 Scraping product: {link}")
    product_name, product_price, product_rating, product_reviews, product_features, product_id, category, product_brand, image_url = scrape_product_page(link)

    # Append extracted data to lists
    product_names.append(product_name)
    prices_list.append(product_price)
    ratings_list.append(product_rating)
    reviews_list.append(product_reviews)
    features_list.append(product_features)
    product_ids.append(product_id)
    category_list.append(category)
    brands_list.append(product_brand)
    image_urls.append(image_url)

    time.sleep(2)  # Prevent rapid requests

# Create a DataFrame
df = pd.DataFrame({
    "Product ID": product_ids,
    "Brand": brands_list,
    "Product Name": product_names,
    "Price": prices_list,
    "Rating": ratings_list,
    "Review Count": reviews_list,
    "Features": features_list,
    "Category": category_list,
    "Product Link": product_links,
    "Image URL": image_urls
})

# Save data to a CSV file
csv_filename = "amazon_apple_laptop_bags.csv"
df.to_csv(csv_filename, index=False, encoding="utf-8")

print(f"✅ Data successfully saved to {csv_filename}")


🔎 Scraping search results from page 1...
🔎 Scraping search results from page 2...
🔎 Scraping search results from page 3...
🔎 Scraping search results from page 4...
🔎 Scraping search results from page 5...
🔗 Scraping product: https://www.amazon.in/Dyazo-13-3-MacBook-laptops-Notebooks/dp/B09X643Z2W/ref=sr_1_1?dib=eyJ2IjoiMSJ9.bpD7DYV_ZJPKdlL92-mCzRu0Jj1wRjBm1sHz5n_Gf_KIPykzdbOJJhcrsSMsMG6cmE5xUP4FEGcIFFa76gUUvpunaEUlIi5M6i1vGPSaYt7xEO8YWAq0dQDKKePFYHlK7O5ZNdWPmAC16qozyjoaGmmEUGKj_4vfSJ1WdY-sTY9X6_ZeoK2Ea1eilpfJLSYn_qU3QXdK4KL1WMi6vzgdeT6ior6SsJJMSRW5aVK2WjhJwjE3xh9Nk3DE--YidOFtHh00LXwWhEHkSUD5UZjdD08WnUAr-A3g1ChZnF68dpI.KYWX4c19zIoLF6M_ZnSb87PZuCEow8L3_OvzmeAtbjM&dib_tag=se&keywords=apple+laptop+bags&qid=1741364169&sr=8-1
🔗 Scraping product: https://www.amazon.in/MOCA-Universal-Premium-Leather-Laptop/dp/B07M9L1QRV/ref=sr_1_2?dib=eyJ2IjoiMSJ9.bpD7DYV_ZJPKdlL92-mCzRu0Jj1wRjBm1sHz5n_Gf_KIPykzdbOJJhcrsSMsMG6cmE5xUP4FEGcIFFa76gUUvpunaEUlIi5M6i1vGPSaYt7xEO8YWAq0dQDKKePFYHlK7O5ZNdWPmAC16qozyjoa

In [None]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import time

# Base Amazon URL for searching Apple laptop bags
base_url = "https://www.amazon.in/s?k=samsung+laptop+bags&page={}"

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.9",
    "Referer": "https://www.amazon.in/"
}

# Number of pages to scrape
max_pages = 5  # Adjust as needed

# Lists to store extracted data
product_links = []
product_ids, brands_list, product_names, prices_list = [], [], [], []
ratings_list, reviews_list, features_list, category_list, image_urls = [], [], [], [], []

# Function to extract details from each product page
def scrape_product_page(url):
    try:
        r = requests.get(url, headers=headers)
        soup = BeautifulSoup(r.text, "lxml")

        # Extract product title
        title = soup.find("span", id="productTitle")
        product_name = title.text.strip() if title else "N/A"

        # Extract price
        price = soup.find("span", class_="a-price-whole")
        product_price = price.text.strip() if price else "N/A"

        # Extract rating
        rating = soup.find("span", class_="a-icon-alt")
        product_rating = rating.text.split()[0] if rating else "N/A"

        # Extract review count
        reviews = soup.find("span", id="acrCustomerReviewText")
        product_reviews = reviews.text.split()[0] if reviews else "N/A"

        # Extract features (bullet points)
        specification_section = soup.find("div", class_="a-expander-content a-expander-section-content a-section-expander-inner")
        specification = specification_section.text.strip().replace("\n", " ").replace("\u200e", "") if specification_section else "N/A"

        # Extract Product ID (ASIN)
        product_id_element = soup.find("th", string=lambda x: x and "ASIN" in x)
        product_id = product_id_element.find_next_sibling("td").text.strip() if product_id_element else "N/A"

        # Extract Brand Name
        brand_section = soup.find("span", string="Brand")
        brand = brand_section.find_next("span").text.strip() if brand_section else "N/A"
        model = get_product_details(soup, "Item model number")
        # Category (custom category for filtering)
        category = "Laptop Bag"

        # Extract Image URL
        image_tag = soup.find("img", id="landingImage")
        image_url = image_tag["src"] if image_tag else "N/A"

        return product_name, product_price, product_rating, product_reviews, specification, product_id, category, brand, image_url

    except Exception as e:
        print(f"⚠ Error scraping {url}: {e}")
        return "N/A", "N/A", "N/A", "N/A", "N/A", "N/A", "N/A", "N/A", "N/A"

# Scrape Amazon search results for product URLs
for page in range(1, max_pages + 1):
    print(f"🔎 Scraping search results from page {page}...")
    url = base_url.format(page)
    r = requests.get(url, headers=headers)
    soup = BeautifulSoup(r.text, "lxml")

    # Extract product containers correctly
    product_containers = soup.find_all("div", {"data-component-type": "s-search-result"})

    if not product_containers:
        print(f"⚠ No data found on page {page}. Stopping.")
        break  # Stop if no products found (last page reached)

    # Extract product links
    for product in product_containers:
        link_tag = product.find("a", class_="a-link-normal s-no-outline")
        if link_tag:
            product_link = "https://www.amazon.in" + link_tag["href"]
            if product_link not in product_links:  # Avoid duplicates
                product_links.append(product_link)

    time.sleep(2)  # Delay to prevent blocking

# Scrape details from each extracted product page
for link in product_links:
    print(f"🔗 Scraping product: {link}")
    product_name, product_price, product_rating, product_reviews, product_features, product_id, category, product_brand, image_url = scrape_product_page(link)

    # Append extracted data to lists
    product_names.append(product_name)
    prices_list.append(product_price)
    ratings_list.append(product_rating)
    reviews_list.append(product_reviews)
    features_list.append(product_features)
    product_ids.append(product_id)
    category_list.append(category)
    brands_list.append(product_brand)
    image_urls.append(image_url)

    time.sleep(2)  # Prevent rapid requests

# Create a DataFrame
df = pd.DataFrame({
    "Product ID": product_ids,
    "Brand": brands_list,
    "Product Name": product_names,
    "Price": prices_list,
    "Rating": ratings_list,
    "Review Count": reviews_list,
    "Features": features_list,
    "Category": category_list,
    "Product Link": product_links,
    "Image URL": image_urls
})

# Save data to a CSV file
csv_filename = "amazon_samsung_laptop_bags.csv"
df.to_csv(csv_filename, index=False, encoding="utf-8")

print(f"✅ Data successfully saved to {csv_filename}")


🔎 Scraping search results from page 1...
🔎 Scraping search results from page 2...
🔎 Scraping search results from page 3...
🔎 Scraping search results from page 4...
🔎 Scraping search results from page 5...
🔗 Scraping product: https://www.amazon.in/Accessories-Pockets-Compatible-Samsung-Notbooks/dp/B0BRJ7QX33/ref=sr_1_1?dib=eyJ2IjoiMSJ9.zqZcWCYRG27jur9urpUD4MP2NQ1aFeNMPHSpocG_cjZDZkIxa3Wqnfm7zJLqWsujfdUI6r9gpehtej6FggrCdSqBkMAA_MgV9annWGVXsiGKLvw5betAM663gwyNsO0MoUFaxcDE1zetHgNuNRB9CuYSy-13tdd8Xhn9ubtz04-bWXlXUYueAAy8fREu4hT8Xn0ohyZC6zlXW2FLlgDJ7McnftR4ghgjORoyjIEeHg4.JVgNHBXrb-XHXQSIyMvb_LMOfFG_igt_1BM2KC5UjCU&dib_tag=se&keywords=samsung+laptop+bags&qid=1741365558&sr=8-1
🔗 Scraping product: https://www.amazon.in/Number-Backpack-Compartment-Charging-Organizer/dp/B09VTDMRY7/ref=sr_1_2?dib=eyJ2IjoiMSJ9.zqZcWCYRG27jur9urpUD4MP2NQ1aFeNMPHSpocG_cjZDZkIxa3Wqnfm7zJLqWsujfdUI6r9gpehtej6FggrCdSqBkMAA_MgV9annWGVXsiGKLvw5betAM663gwyNsO0MoUFaxcDE1zetHgNuNRB9CuYSy-13tdd8Xhn9ubtz04-bWXlXUYueAAy8fREu4h

In [None]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import time

# Base Amazon URL for searching Samsung laptop bags
base_url = "https://www.amazon.in/s?k=apple+laptop+bags&page={}"

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.9",
    "Referer": "https://www.amazon.in/"
}

# Number of pages to scrape
max_pages = 5  # Adjust as needed

# Lists to store extracted data
product_links = []
product_ids, brands_list, model_list, product_names, prices_list = [], [], [], [], []
ratings_list, reviews_list, features_list, category_list, image_urls = [], [], [], [], []

# Function to extract details from the product page
def get_product_details(soup, detail_name):
    """Extracts details from the product specifications table."""
    try:
        detail_element = soup.find("th", string=lambda x: x and detail_name in x)
        if detail_element:
            return detail_element.find_next_sibling("td").text.strip()
    except:
        return "N/A"
    return "N/A"

def scrape_product_page(url):
    """Scrapes individual product pages for detailed information."""
    try:
        r = requests.get(url, headers=headers)
        soup = BeautifulSoup(r.text, "lxml")

        # Extract product title
        title = soup.find("span", id="productTitle")
        product_name = title.text.strip() if title else "N/A"

        # Extract price
        price = soup.find("span", class_="a-price-whole")
        product_price = price.text.strip() if price else "N/A"

        # Extract rating
        rating = soup.find("span", class_="a-icon-alt")
        product_rating = rating.text.split()[0] if rating else "N/A"

        # Extract review count
        reviews = soup.find("span", id="acrCustomerReviewText")
        product_reviews = reviews.text.split()[0] if reviews else "N/A"

        # Extract features (bullet points)
        specification_section = soup.find("div", class_="a-expander-content a-expander-section-content a-section-expander-inner")
        product_features = specification_section.text.strip().replace("\n", " ").replace("\u200e", "") if specification_section else "N/A"

        # Extract ASIN (Product ID)
        product_id = get_product_details(soup, "ASIN")

        # Extract Brand Name
        brand = get_product_details(soup, "Brand")

        # Extract Model Number
        model = get_product_details(soup, "Item model number")

        # Extract Image URL
        image_tag = soup.find("img", id="landingImage")
        image_url = image_tag["src"] if image_tag else "N/A"

        category = "Laptop Bags"

        return product_name, product_price, product_rating, product_reviews, product_features, product_id, category, brand, model, image_url

    except Exception as e:
        print(f"⚠ Error scraping {url}: {e}")
        return "N/A", "N/A", "N/A", "N/A", "N/A", "N/A", "N/A", "N/A", "N/A", "N/A"

# Scrape Amazon search results for product URLs
for page in range(1, max_pages + 1):
    print(f"🔎 Scraping search results from page {page}...")
    url = base_url.format(page)
    r = requests.get(url, headers=headers)
    soup = BeautifulSoup(r.text, "lxml")

    # Extract product containers correctly
    product_containers = soup.find_all("div", {"data-component-type": "s-search-result"})

    if not product_containers:
        print(f"⚠ No data found on page {page}. Stopping.")
        break  # Stop if no products found (last page reached)

    # Extract product links
    for product in product_containers:
        link_tag = product.find("a", class_="a-link-normal s-no-outline")
        if link_tag:
            product_link = "https://www.amazon.in" + link_tag["href"]
            if product_link not in product_links:  # Avoid duplicates
                product_links.append(product_link)

    time.sleep(2)  # Delay to prevent blocking

# Scrape details from each extracted product page
for link in product_links:
    print(f"🔗 Scraping product: {link}")
    product_name, product_price, product_rating, product_reviews, product_features, product_id, category, product_brand, model, image_url = scrape_product_page(link)

    # Append extracted data to lists
    product_names.append(product_name)
    prices_list.append(product_price)
    ratings_list.append(product_rating)
    reviews_list.append(product_reviews)
    features_list.append(product_features)
    product_ids.append(product_id)
    category_list.append(category)
    brands_list.append(product_brand)
    model_list.append(model)
    image_urls.append(image_url)

    time.sleep(2)  # Prevent rapid requests

# Create a DataFrame
df = pd.DataFrame({
    "Product ID": product_ids,
    "Brand": brands_list,
    "Model": model_list,
    "Product Name": product_names,
    "Price": prices_list,
    "Rating": ratings_list,
    "Review Count": reviews_list,
    "Features": features_list,
    "Category": category_list,
    "Product Link": product_links,
    "Image URL": image_urls
})

# Save data to a CSV file
csv_filename = "amazon_samsung_laptop_bags.csv"
df.to_csv(csv_filename, index=False, encoding="utf-8")

print(f"✅ Data successfully saved to {csv_filename}")


🔎 Scraping search results from page 1...
🔎 Scraping search results from page 2...
🔎 Scraping search results from page 3...
🔎 Scraping search results from page 4...
🔎 Scraping search results from page 5...
🔗 Scraping product: https://www.amazon.in/Dyazo-13-3-MacBook-laptops-Notebooks/dp/B09X643Z2W/ref=sr_1_1?dib=eyJ2IjoiMSJ9.bpD7DYV_ZJPKdlL92-mCzRu0Jj1wRjBm1sHz5n_Gf_IN0hYRcWK5jo7KlmRWWRTEN5JQ-JOoGk9zIN7dmWmBtRB2JnFqw1UlkuSH4oeYEqlCmyOCb723QPUlIu3RXUj2XWEdEUAIrs6dwhKHY-czWmMzp607KcdK67H7bq48HNQP9JX5i2DnivK2ELuFBDHnOKkdkEp5gRTQ5ldurwUdZ6eWUviJ6oU2NtdKih_wlnA.smsrye1nKsoE2JrRXVD7GeNzzMlEDqksM2nLZWvrFdI&dib_tag=se&keywords=apple+laptop+bags&qid=1741374495&sr=8-1
🔗 Scraping product: https://www.amazon.in/MOCA-Universal-Premium-Leather-Laptop/dp/B07M9L1QRV/ref=sr_1_2?dib=eyJ2IjoiMSJ9.bpD7DYV_ZJPKdlL92-mCzRu0Jj1wRjBm1sHz5n_Gf_IN0hYRcWK5jo7KlmRWWRTEN5JQ-JOoGk9zIN7dmWmBtRB2JnFqw1UlkuSH4oeYEqlCmyOCb723QPUlIu3RXUj2XWEdEUAIrs6dwhKHY-czWmMzp607KcdK67H7bq48HNQP9JX5i2DnivK2ELuFBDHnOKkdkEp5gRTQ5ldurwUd