In [30]:
from selenium import webdriver
from selenium.common.exceptions import (NoSuchElementException,
                                        TimeoutException, WebDriverException, UnexpectedAlertPresentException,
                                       ElementClickInterceptedException, ElementNotInteractableException)
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
from time import sleep
from datetime import date, datetime, timedelta
import os
import re
import shutil
import glob
import requests
import wget
import zipfile

In [3]:


def download_latest_driver():
    # get the latest chrome driver version number
    url = 'https://chromedriver.storage.googleapis.com/LATEST_RELEASE'
    response = requests.get(url)
    version_number = response.text
    
    # build the donwload url
    download_url = "https://chromedriver.storage.googleapis.com/" + version_number +"/chromedriver_win32.zip"
    
    # download the zip file using the url built above
    latest_driver_zip = wget.download(download_url,'chromedriver.zip')
    downloads = os.path.join(os.getenv('USERPROFILE'),'Downloads')
    # extract the zip file
    with zipfile.ZipFile(latest_driver_zip, 'r') as zip_ref:
        zip_ref.extractall(downloads) # you can specify the destination folder path here
    # delete the zip file downloaded above
    os.remove(latest_driver_zip)
    return os.path.join(downloads,'chromedriver.exe')
# download_latest_driver()
# downloads = os.path.join(os.getenv('USERPROFILE'),'Downloads')
# driver_path = os.path.join(downloads,'chromedriver.exe')

In [17]:
def default_options():
    options = webdriver.ChromeOptions()
    options.add_argument('--ignore-certificate-errors')
    # options.add_argument("--test-type")
    # options.add_argument("--headless")
    return options
def start_driver():
    '''
    Initialize Chrome driver
    '''
    options = default_options()
    #options.binary_location = "/usr/bin/chromium"
    driver = webdriver.Chrome(options=options)
    return driver
# with start_driver() as driver:
#     driver.get()
# driver = start_driver()
# driver.close()

In [49]:
class AmazonDriver(webdriver.Chrome):
    def __init__(self,chrome_options):
        super().__init__(options=chrome_options)
    def get_listing(self,asin):
        url = f'https://www.amazon.com/dp/{asin}'
        self.asin = asin
        self.get(url)
        self.continue_shopping()
    def continue_shopping(self):
        '''
        Click continue shopping button if present
        '''
        try:
            shopping_button = WebDriverWait(driver,3).until(EC.element_to_be_clickable((By.CLASS_NAME,'a-button-text')))
            if shopping_button.text == 'Continue shopping':
                shopping_button.click()
        except TimeoutException:
            pass
    def get_by_xpath(self,xpath):
        '''
        Get Text by XPATH
        '''
        return driver.find_element(By.XPATH,xpath).text
    def get_title(self):
        return self.find_element(By.ID,'title').text
    def get_price(self):
        classes = ["a-price-whole","a-price-fraction"]
        price = []
        for c in classes:
            price.append(self.find_element(By.CLASS_NAME,c).text)
        return '.'.join(price)
    def get_list_price(self):
        list_price = self.get_by_xpath('//*[@id="corePriceDisplay_desktop_feature_div"]/div[2]/span/span[1]/span[2]/span')
        return re.sub(r'[$,]','',list_price)
    def get_discount(self):
        return self.get_by_xpath('//*[@id="corePriceDisplay_desktop_feature_div"]/div[1]/span[2]')
    def get_raiting(self):
        return self.get_by_xpath('//*[@id="acrPopover"]/span[1]/a/span')
    def get_raiting_count(self):
        raitings = self.get_by_xpath('//*[@id="acrCustomerReviewText"]')
        try:
            raiting_count = re.search(r'(\d+)')[0]
            return raiting_count
        except:
            return raitings
    def get_ships_from(self):
        return self.get_by_xpath('//*[@id="fulfillerInfoFeature_feature_div"]/div[2]/div[1]/span')
    def get_sold_by(self):
        return self.get_by_xpath('//*[@id="merchantInfoFeature_feature_div"]/div[2]/div[1]/span')
    def get_bullet_points(self):
        bullet_elm = self.find_element(By.XPATH,'//*[@id="feature-bullets"]/ul')
        bullets = bullet_elm.find_elements(By.CLASS_NAME,'a-list-item')
        return [b.text for b in bullets]
    def get_product_overview(self):
        prod_ov = self.find_element(By.XPATH,'//*[@id="productOverview_feature_div"]/div')
        return prod_ov.text.split('\n')
    def get_product_details_tech_spec(self):
        # tech_spec_table = self.find_element(By.XPATH,'//*[@id="productDetails_techSpec_section_1"]')
        # tech_spec_table = driver.find_element(By.XPATH,'//*[@id="productDetails_techSpec_section_1"]/tbody')
        # print(tech_spec_table.text)
        tech_spec_key = 'prodDetSectionEntry'#"a-color-secondary a-size-base prodDetSectionEntry"
        tech_spec_val = 'prodDetAttrValue'# "a-size-base prodDetAttrValue"
        keys = driver.find_elements(By.CLASS_NAME,tech_spec_key)
        print(keys)
        values = driver.find_elements(By.CLASS_NAME,tech_spec_val)
        print(values)
        tech_specs= {}
        for i in range(len(keys)):
            try:
                tech_specs[keys[i].text] = values[i].text
            except IndexError:
                break
        return tech_specs
    def extract_data(self):
        '''
        Get relevant listing data
        '''
        functions = {'title':self.get_title,
                    'price':self.get_price,
                    'list_price':self.get_list_price,
                    'discount':self.get_discount,
                    'raiting':self.get_raiting,
                    'raiting_count':self.get_raiting_count,
                    'ships_from':self.get_ships_from,
                    'sold_by' : self.get_sold_by,
                    'bullet_points' : self.get_bullet_points,
                    'product_overview' : self.get_product_overview}
        data = {}
        for key,func in functions.items():
            try:
                result = func()
                data[key] = result
            except Exception as e:
                print(e)
                pass
        return data

In [54]:
options = default_options()

# with AmazonDriver(options) as driver:
#     asin = 'B0F1SDT1HD'
#     driver.get_listing(asin)
#     element = WebDriverWait(driver,120).until(EC.url_contains(asin))
#     # element.send_keys(Keys.ENTER)
#     print(driver.get_title().text)
driver = AmazonDriver(options)
driver.implicitly_wait(3)
asin = 'B0F5VYPT6H' #'B0F1SDT1HD'
driver.get_listing(asin)


In [63]:
tech_spec_table = driver.find_element(By.XPATH,'//*[@id="productDetails_techSpec_section_1"]/tbody')
# print(tech_spec_table.text)
tech_spec_key = 'prodDetSectionEntry'#"a-color-secondary a-size-base prodDetSectionEntry"
tech_spec_val = 'prodDetAttrValue'# "a-size-base prodDetAttrValue"
keys = driver.find_elements(By.CLASS_NAME,tech_spec_key)
# print(keys)
values = driver.find_elements(By.CLASS_NAME,tech_spec_val)
# print(values)
tech_specs= {}
for i in range(len(keys)):
    try:
        tech_specs[keys[i].text] = values[i].text
    except IndexError:
        break
tech_specs

[<selenium.webdriver.remote.webelement.WebElement (session="324c8c1ce8182f8dcdf343d333e1204b", element="f.0A2D6E66A7AB0708427600691334F33B.d.2B92BFF6B1F17D6DD579173C1709BF2B.e.149")>, <selenium.webdriver.remote.webelement.WebElement (session="324c8c1ce8182f8dcdf343d333e1204b", element="f.0A2D6E66A7AB0708427600691334F33B.d.2B92BFF6B1F17D6DD579173C1709BF2B.e.4487")>, <selenium.webdriver.remote.webelement.WebElement (session="324c8c1ce8182f8dcdf343d333e1204b", element="f.0A2D6E66A7AB0708427600691334F33B.d.2B92BFF6B1F17D6DD579173C1709BF2B.e.4488")>, <selenium.webdriver.remote.webelement.WebElement (session="324c8c1ce8182f8dcdf343d333e1204b", element="f.0A2D6E66A7AB0708427600691334F33B.d.2B92BFF6B1F17D6DD579173C1709BF2B.e.159")>, <selenium.webdriver.remote.webelement.WebElement (session="324c8c1ce8182f8dcdf343d333e1204b", element="f.0A2D6E66A7AB0708427600691334F33B.d.2B92BFF6B1F17D6DD579173C1709BF2B.e.4489")>, <selenium.webdriver.remote.webelement.WebElement (session="324c8c1ce8182f8dcdf343

{'Manufacturer': 'EVOIRON',
 'Item Weight': '8.38 pounds',
 'Product Dimensions': '102 x 2.8 x 7 inches',
 'Item model number': 'MWT-001',
 'Power Source': 'Battery Powered',
 'Wattage': '1000 watts',
 'Included Components': 'chainsaw *1, telescopic pole *1, user manual *1',
 'Batteries Included?': 'No',
 'Batteries Required?': 'Yes',
 'ASIN': 'B0F5VYPT6H',
 'Best Sellers Rank': 'June 23, 2025'}

In [64]:
driver.close()