### Import all the libraries

In [161]:
# Importing the necessary libraries for selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import NoSuchElementException
from rapidfuzz import process
import time

### Initialize WebDriver and navigate to the website

In [162]:
# initialize the chrome driver
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 2)
# get the website URL
driver.get("https://www.autotrader.ca/")
# will wait for an element to be interactable.
driver.implicitly_wait(2)

### Get all the cars maker and model names

In [163]:
# Initialize Dictionary that will store the make as key and model as value
car_data = {}

# Locate the 'select' element that host all the cars brand names
makes_drop_down_element = driver.find_element(By.ID, "rfMakes")

# Find the 'optgroup' with label "All Makes"
all_makes_optgroup = makes_drop_down_element.find_element(By.XPATH, "./optgroup[@label='All Makes']")

# Find all 'option' elements under the 'optgroup'
all_makes_options = all_makes_optgroup.find_elements(By.TAG_NAME, "option")

# Loop through each 'option' and click it
for option in all_makes_options:
    car_make = option.text
    print(car_make)
    option.click()
    time.sleep(1.5)
    
    # Locate the 'select' element for models
    model_drop_down_element = driver.find_element(By.ID, "rfModel")
    
    # Find all 'option' elements for models
    model_options = model_drop_down_element.find_elements(By.TAG_NAME, "option")
    
    # Initialize list for models
    all_models_options = []
    
    # Loop through each 'option' for models
    for model_option in model_options[1:]:  # Skip the first 'option'
        all_models_options.append(model_option.text)
    
    # Store in dictionary
    car_data[car_make] = all_models_options
# adding non specified car make and model as "Other"
car_data['Other'] = 'Other'

AC
Acadian
Acura
Alfa Romeo
Allard
AM General
American Bantam
American Motors (AMC)
Amphicar
Ariel
Aston Martin
Auburn
Audi
Austin
Austin-Healey
Autozam
Bentley
BMW
Bricklin
Bugatti
Buick
Cadillac
Caterham
Chevrolet
Chrysler
Citroen
Clenet
Daihatsu
Daimler
Datsun
De Soto
De Tomaso
DeLorean
Dodge
Dodge or Ram
Dune Buggy
Eagle
Essex
Excalibur
Factory Five Racing
Ferrari
Fiat
Ford
Freightliner
Genesis
Geo
GMC
Hino
Holden
Honda
Hummer
Hyundai
Infiniti
International
Isuzu
Jaguar
Jeep
Jensen
Jensen-Healey
Kaiser
Karma
Kia
Koenigsegg
Lamborghini
Lancia
Land Rover
Lexus
Lincoln
Lotus
Lucid
Maserati
Maybach
Mazda
McLaren
McLaughlin-Buick
Mercedes-AMG
Mercedes-Benz
Mercury
MG
MINI
Mitsubishi
Morgan
Morris
MV-1
Nash
Nissan
Oldsmobile
Packard
Pagani
Passport
Plymouth
Polestar
Pontiac
Porsche
Radical
Ram
Renault
Rivian
Rolls-Royce
Rover
Saab
Saleen
Saturn
Scion
Shelby
smart
Sterling
Studebaker
Subaru
Sunbeam
Superformance
Suzuki
Tesla
Toyota
Triumph
TVR
Volkswagen
Volvo
Willys


In [164]:
# select "Any Make" on the dropdown menu to get all the make
selector = Select(makes_drop_down_element)
selector.select_by_visible_text('Any Make')


In [165]:
print(car_data)

{'AC': ['Ace', 'Cobra', 'Sport'], 'Acadian': ['Beaumont', 'Canso'], 'Acura': ['CL', 'CSX', 'EL', 'ILX', 'Integra', 'MDX', 'NSX', 'RDX', 'RL', 'RLX', 'RSX', 'TL', 'TLX', 'TSX', 'Unspecified', 'ZDX'], 'Alfa Romeo': ['156', '159', '164 Series', '4C Coupe', '4C Spider', 'Alfetta', 'Giulia', 'Giulia Quadrifoglio', 'Giulietta', 'GTV', 'GTV6', 'Milano', 'Spider', 'Stelvio', 'Tonale', 'Tonale PHEV', 'Unspecified'], 'Allard': ['J2X'], 'AM General': ['Hummer', 'Unspecified'], 'American Bantam': ['Roadster'], 'American Motors (AMC)': ['Ambassador', 'American', 'Classic', 'Eagle', 'Gremlin', 'Javelin', 'Rambler'], 'Amphicar': ['770'], 'Ariel': ['Atom'], 'Aston Martin': ['DB11', 'DB7', 'DB7 Vantage', 'DB7 Vantage Volante', 'DB9', 'DBS', 'DBS Superleggera', 'DBX', 'DBX707', 'Rapide S', 'V8 Vantage', 'V8 Vantage S', 'Vanquish', 'Vanquish S', 'Vantage', 'Virage', 'Volante'], 'Auburn': ['160'], 'Audi': ['100', '4000', 'A3', 'A3 Sportback', 'A4', 'A5', 'A6', 'A7', 'A8', 'allroad', 'Cabriolet', 'e-tron',

### Input a random postal code and click on "show me cars" button

In [166]:
postal_code_input_element = driver.find_element(By.ID, "locationAddressV2")
postal_code_input_element.send_keys("M5V 3L9")
show_me_cars_btn = driver.find_element(By.ID, "SearchButton")
show_me_cars_btn.click()

### List all the cars available by removing some filters in the search

In [167]:
# click on the postal code box on the left side and change the radius to national to get all the cars listing
postal_code_element = driver.find_element(By.ID, "faceted-Location")
postal_code_element.click()

# Locate the dropdown element
dropdown_element = driver.find_element(By.ID, "proximity")

# Initialize Select class
select = Select(dropdown_element)

# Select the 'National' option
select.select_by_visible_text("National")

# save by clicking on the apply location button
apply_location_btn = wait.until(EC.element_to_be_clickable((By.ID, "applyLocation")))

apply_location_btn.click()

# Locate the checkbox element
damaged_checkbox_element = driver.find_element(By.ID, "rfDamaged")

# Use JavaScript to click the checkbox
driver.execute_script("arguments[0].click();", damaged_checkbox_element)

try:
    # Wait until the apply button becomes clickable
    apply_condition_btn = wait.until(EC.element_to_be_clickable((By.ID, "applyCondition")))
    
    # Click the button
    apply_condition_btn.click()
except TimeoutException:
    driver.execute_script("document.getElementById('applyCondition').click();")

# click on "Other Options" menu
other_option_menu  = driver.find_element(By.ID, 'faceted-parent-Other')
driver.execute_script("arguments[0].click();", other_option_menu)

# Locate the "With photos" checkbox element
with_photos_checkbox = driver.find_element(By.ID, "rfPhoto")

# Use JavaScript to uncheck the "with photo"checkbox
if with_photos_checkbox.is_selected():
    driver.execute_script("arguments[0].click();", with_photos_checkbox)

# Get the apply button element 
driver.execute_script("arguments[0].click();", driver.find_element(By.ID, "applyOthers"))



# Locate the drop-down element for the displaying the number of cars on the list
display_dropdown_element = driver.find_element(By.ID,"pageSize")

# Initialize Select class
select = Select(display_dropdown_element)

# Select the '100' option by visible text
select.select_by_visible_text("100")



In [168]:
# TODO: create a loop that goes through all the car listed items until the last item
# TODO: by incrementing data-list-numerical-position and at the end of the page click on the next page
# Get the first car
element = driver.find_element("css selector", "span[data-list-numerical-position='1']")
element.click()

### Collection each car information

In [169]:
# Initialize an empty dictionary to store the car specifications
car_specs = {}

# Locate the element using CSS selector
car_header_info = driver.find_element(By.CSS_SELECTOR, "div#heroTitleWrapper > h1").text

# Split the string by spaces and tabs
split_string = car_header_info.split()

# Assign the first, second, and third words to respective variables
year_of_manufacturing = split_string[0]
make_estimate = split_string[1]
model_estimate = split_string[2]
car_specs["Year Of Manufacturing"] = year_of_manufacturing

### Added a word matcher using rapidfuzz to match make and model name that is extracted from the title and the dictionary collection of make and model

In [170]:
def find_closest_words(make, model):
    # Use RapidFuzz to find the closest matches for the make
    closest_make_match = process.extractOne(make, car_data.keys())
    
    # Extract the closest make
    closest_make = closest_make_match[0]
    
    # Access to the value using the key
    all_models = car_data[closest_make]
    
    # Use RapidFuzz to find the closest matches for the model
    closest_model_match = process.extractOne(model, all_models)
    # Extract the closest make
    closest_model = closest_model_match[0]
    
    return closest_make, closest_model


make, model = find_closest_words(make_estimate, model_estimate)

price_elements = driver.find_elements(By.XPATH,'//p[@class="hero-price"]')

price = price_elements[0].text


car_specs["Make"] = make
car_specs["Model"] = model
car_specs["Price"] = price

In [171]:
# Find the number of list items in the unordered list
car_specs_items = driver.find_elements(By.CSS_SELECTOR, "#sl-card-body li")

# Loop through each list item
for i in range(len(car_specs_items)):
    try:
        # Try to find the key and value for each car specification
        key_element = driver.find_element(By.CSS_SELECTOR, f"#spec-key-{i}")
        value_element = driver.find_element(By.CSS_SELECTOR, f"#spec-value-{i}")
        
        # Get the text from the elements
        key = key_element.text
        value = value_element.text
        
        # Store the key-value pair in the dictionary
        car_specs[key] = value
    except NoSuchElementException:
        # If the element is not found, set the value for the key to "None"
        car_specs[key] = "None"
    finally:
        time.sleep(1)

# Print the car specifications
print(car_specs)

{'Year Of Manufacturing': 2020, 'Make': 'Lexus', 'Model': 'RX', 'Price': '43,674', 'Kilometres': '48,732 km', 'Status': 'Used', 'Trim': 'PREMIUM PKG|SUNROOF|BACK CAM|CAR-PLAY|BSM|', 'Body Type': 'SUV', 'Engine': 'V6 Cylinder Engine', '': ''}
