In [8]:
import selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
from bs4 import BeautifulSoup
from urllib.parse import urljoin

def extract_url(driver):
    product_urls = []

    # scrapping 2 pages for now can change range for more
    for page_num in range(1, 3):
        # Scroll down 
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2) 

        # extracting urls 
        page_source = driver.page_source
        soup = BeautifulSoup(page_source, 'html.parser')
        product_elements = soup.select('h2 a')

        for product_element in product_elements:
            product_url = urljoin(driver.current_url, product_element['href'])
            product_urls.append(product_url)

        # going to the next page if available
        try:
            next_button = driver.find_element(By.CSS_SELECTOR, '.s-pagination-next')
            next_button.click()
            time.sleep(2)
        except selenium.common.exceptions.NoSuchElementException:
            break  # Break the loop if there is no next page button

    return product_urls

def scrape_product_details(driver, product_url, num_pages=3):
    driver.get(product_url)
    time.sleep(2) 

    # extracting title(h1) tag from the product page
    h1_element = driver.find_element(By.TAG_NAME, 'h1')
    product_title = h1_element.text

    try:
        # extracting the store name from the element with ID
        store_name_element = driver.find_element(By.XPATH, '//*[@id="bylineInfo"]')
        store_name = store_name_element.text.strip()
        store_name = store_name.replace("Visit the", "").strip()
        print(f"Store Name: {store_name}")
        print("***********************")

        # extracting the details section for "About this item" 
        about_this_item_div = driver.find_element(By.ID, 'detailBullets_feature_div')
        ul_element_about_this_item = about_this_item_div.find_element(By.CLASS_NAME, 'a-unordered-list')
        li_elements_about_this_item = ul_element_about_this_item.find_elements(By.TAG_NAME, 'li')
       
        # extracting information from each li element under "About this item"
        print("Detail:")
        for index, li in enumerate(li_elements_about_this_item):
            text = li.text.strip()
            if "ASIN" in text:
                asin_value = text.split(':')[-1].strip()
            elif "Department" in text:
                department_value = text.split(':')[-1].strip()
            elif "Manufacturer" in text:
                manufacturer_value = text.split(':')[-1].strip()    

        print("ASIN:", asin_value)
        print("Department:", department_value)
        print("Manufacturer:", manufacturer_value)

        # extracting the details section using XPath
        details_div = driver.find_element(By.XPATH, '//*[@id="productFactsDesktopExpander"]/div[1]')
        ul_elements = details_div.find_elements(By.XPATH, './/ul')
        print("About this item:")
        for ul_index, ul_element in enumerate(ul_elements):
            # Extract information from each li element
            li_elements = ul_element.find_elements(By.TAG_NAME, 'li')
            for li_index, li in enumerate(li_elements):
                text = li.text.strip()
                if text:
                    print(f"{text}")

        # extracting product price 
        try:
            price_element = driver.find_element(By.CSS_SELECTOR, ".a-price")
            price = price_element.text.split('\n')[0]
            print(f"Product Price: {price}")
        except selenium.common.exceptions.NoSuchElementException:
            print(f"Price not found for Product {product_title}")

        # extracting product ratings using XPath
        try:
            ratings_element = driver.find_element(By.XPATH, '//*[@id="acrPopover"]')
            ratings_text = ratings_element.get_attribute('title')
            ratings_value = float(ratings_text.split()[0])
            print(f"Product Ratings: {ratings_value}")
        except selenium.common.exceptions.NoSuchElementException:
            print(f"Ratings not found for Product {product_title}")
        except ValueError:
            print(f"Unable to convert ratings to float for Product {product_title}")
        print("***********************")


        # extracting color images and alternate images
        try:
            # extracting color images from variation_color_name
            color_variation_div = driver.find_element(By.ID, 'variation_color_name')
            color_images = color_variation_div.find_elements(By.TAG_NAME, 'img')
            print("Color Images:")
            for color_image in color_images:
                src = color_image.get_attribute('src')
                alt = color_image.get_attribute('alt')
                print(f"Color image: {src}, Alt: {alt}")
            print("***********************")

            # Extract alternate images from altImages
            alt_images_div = driver.find_element(By.ID, 'altImages')
            alt_images = alt_images_div.find_elements(By.TAG_NAME, 'img')
            print("Alternate Images:")
            for alt_image in alt_images:
                src = alt_image.get_attribute('src')
                alt = alt_image.get_attribute('alt')
                print(f"Src: {src}")
        except selenium.common.exceptions.NoSuchElementException:
            print(f"Images not found for Product {product_title}")



       # clicking on the "See more reviews" link
        try:
            all_reviews_link = driver.find_element(By.PARTIAL_LINK_TEXT, "See more reviews")
            all_reviews_link.click()
            time.sleep(2) 
        except selenium.common.exceptions.NoSuchElementException:
            print("See more reviews link not found.")

        # extracting reviews from multiple pages
        for page in range(num_pages):
            try:
                review_elements = driver.find_elements(By.CSS_SELECTOR, '.a-section.review')
                print(f"Reviews - Page {page + 1}:")
                for review_element in review_elements:
                    # Extract reviewer name
                    reviewer_name = review_element.find_element(By.CLASS_NAME, 'a-profile-name').text
                    # Extract review text
                    review_text = review_element.find_element(By.CLASS_NAME, 'review-text').text
                    # Extract review date
                    review_date = review_element.find_element(By.CLASS_NAME, 'review-date').text
                    print(f"Reviewer: {reviewer_name}, Review Date: {review_date}, Review Text: {review_text}")

            except selenium.common.exceptions.NoSuchElementException:
                print(f"Reviews not found for Product {product_title} - Page {page + 1}")
            print("***********************")

            # Click on the next page button
            try:
                next_page_button = driver.find_element(By.XPATH, '//*[@id="cm_cr-pagination_bar"]/ul/li[2]/a')
                next_page_button.click()
                time.sleep(2)
            except selenium.common.exceptions.NoSuchElementException:
                print(f"No next page button found on Product {product_title} - Page {page + 1}")
                break
        print("***********************")

    except selenium.common.exceptions.NoSuchElementException:
        print("Details section not found on the page.")

    except Exception as e:
        print(f"An error occurred: {str(e)}")

    print(f"Product Title: {product_title}")
    print(f"Product URL: {product_url}\n")
    print("-" * 50)

    return product_title


driver = webdriver.Firefox()
driver.get('https://www.amazon.com/s?i=fashion-mens-intl-ship&bbn=16225019011&rh=n%3A7141123011%2Cn%3A16225019011%2Cn%3A7147441011%2Cn%3A1040658%2Cn%3A2476517011&dc&ds=v1%3AiucC4vWxUQrN40ScYY%2B7Uty8b6PODB4X9tSLBOsEQns&qid=1708532371&rnid=7141123011&ref=sr_nr_n_2')

product_urls = extract_url(driver)

for product_url in product_urls:
    product_title = scrape_product_details(driver, product_url)

driver.quit()


Store Name: Propper Store
***********************
Details section not found on the page.
Product Title: Propper Men's Uniform Polo-Long Sleeve
Product URL: https://www.amazon.com/sspa/click?ie=UTF8&spc=MToxMTc5Mzk0ODA0MTUwNTE6MTcwODUzMzU4MTpzcF9hdGZfYnJvd3NlOjIwMDAyMzY4NTA3OTQxMTo6MDo6&url=%2FPropper-Mens-Uniform-Sleeve-X-Small%2Fdp%2FB075F4P4BK%2Fref%3Dsr_1_1%3Fdib%3DeyJ2IjoiMSJ9.KBbnmzQNW_dUv8sd06soZcK_rm5DouHQOMrOISFsBJn31SD2VakRDyv_n0vN-cuw9gqm1Rb1kEmo4Xj-kPO6MhTP66g0AWnUzB0U2uYVIUZ_OJ263DXfAMmKUwEdPAkQD8_zSaF8px7Ft9pwS6uEwqUTRjeqcxQrWAkQnBqHsMY-7QA8OV2uuX_bR7phsP6ZIbonMCcY9GsNT92K5AE7wMI7zVVln3FnMXyCn7VckqeyMEtosHQzdKCiIojV1cpoC2j4QU95dxCLAMyV1bYt2ETME2ymK8-W_gfvqSRHS1c.yRjbGcpcKPpnPDup4tSu1Xt_Xly9NcTZWxt9963zRZY%26dib_tag%3Dse%26qid%3D1708533581%26rnid%3D7141123011%26s%3Dfashion-mens-intl-ship%26sr%3D1-1-spons%26sp_csd%3Dd2lkZ2V0TmFtZT1zcF9hdGZfYnJvd3Nl%26psc%3D1

--------------------------------------------------
Store Name: Chef Works Store
***********************
Detail:
ASIN: