### Import all the libraries

In [512]:
# Importing the necessary libraries for selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import NoSuchElementException, StaleElementReferenceException
from rapidfuzz import process
import time, re

### Initialize WebDriver and navigate to the website

In [513]:
# initialize the chrome driver
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 2)
# get the website URL
driver.get("https://www.autotrader.ca/")
# will wait for an element to be interactable.
driver.implicitly_wait(2)

### Get all the cars maker and model names

In [514]:
# Initialize Dictionary that will store the make as key and model as value
car_data = {}

# Locate the 'select' element that host all the cars brand names
makes_drop_down_element = driver.find_element(By.ID, "rfMakes")

# Find the 'optgroup' with label "All Makes"
all_makes_optgroup = makes_drop_down_element.find_element(By.XPATH, "./optgroup[@label='All Makes']")

# Find all 'option' elements under the 'optgroup'
all_makes_options = all_makes_optgroup.find_elements(By.TAG_NAME, "option")

# Loop through each 'option' and click it
for option in all_makes_options:
    car_make = option.text
    option.click()
    time.sleep(2)
    
    # Locate the 'select' element for models
    model_drop_down_element = driver.find_element(By.ID, "rfModel")
    
    # Find all 'option' elements for models
    model_options = model_drop_down_element.find_elements(By.TAG_NAME, "option")
    
    # Initialize list for models
    all_models_options = []
    
    # Loop through each 'option' for models
    for model_option in model_options[1:]:  # Skip the first 'option'
        all_models_options.append(model_option.text)
    
    # Store in dictionary
    car_data[car_make] = all_models_options
# adding non specified car make and model as "Other"
car_data['Other'] = 'Other'

In [515]:
# select "Any Make" on the dropdown menu to get all the make
selector = Select(makes_drop_down_element)
selector.select_by_visible_text('Any Make')


### Input a random postal code and click on "show me cars" button

In [516]:
postal_code_input_element = driver.find_element(By.ID, "locationAddressV2")
postal_code_input_element.send_keys("M5V 3L9")
show_me_cars_btn = driver.find_element(By.ID, "SearchButton")
show_me_cars_btn.click()

### List all the cars available by removing some filters in the search

In [517]:
# click on the postal code box on the left side and change the radius to national to get all the cars listing
postal_code_element = driver.find_element(By.ID, "faceted-Location")
postal_code_element.click()

# Locate the dropdown element
dropdown_element = driver.find_element(By.ID, "proximity")

# Initialize Select class
select = Select(dropdown_element)

# Select the 'National' option
select.select_by_visible_text("National")

# save by clicking on the apply location button
apply_location_btn = wait.until(EC.element_to_be_clickable((By.ID, "applyLocation")))

apply_location_btn.click()

# Locate the checkbox element
damaged_checkbox_element = driver.find_element(By.ID, "rfDamaged")

# Use JavaScript to click the checkbox
driver.execute_script("arguments[0].click();", damaged_checkbox_element)

try:
    # Wait until the apply button becomes clickable
    apply_condition_btn = wait.until(EC.element_to_be_clickable((By.ID, "applyCondition")))
    
    # Click the button
    apply_condition_btn.click()
except TimeoutException:
    driver.execute_script("document.getElementById('applyCondition').click();")

# click on "Other Options" menu
other_option_menu  = driver.find_element(By.ID, 'faceted-parent-Other')
driver.execute_script("arguments[0].click();", other_option_menu)

# Locate the "With photos" checkbox element
with_photos_checkbox = driver.find_element(By.ID, "rfPhoto")

# Use JavaScript to uncheck the "with photo"checkbox
if with_photos_checkbox.is_selected():
    driver.execute_script("arguments[0].click();", with_photos_checkbox)

# Get the apply button element 
driver.execute_script("arguments[0].click();", driver.find_element(By.ID, "applyOthers"))



# Locate the drop-down element for the displaying the number of cars on the list
display_dropdown_element = driver.find_element(By.ID,"pageSize")

# Initialize Select class
select = Select(display_dropdown_element)

# Select the '100' option by visible text
select.select_by_visible_text("100")



In [518]:
# TODO: create a loop that goes through all the car listed items until the last item
# TODO: by incrementing data-list-numerical-position and at the end of the page click on the next page
# Get the first car
element = driver.find_element("css selector", "span[data-list-numerical-position='1']")
try:
    element.click()
except StaleElementReferenceException:
        # Handle the exception by re-locating the element
        element = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "span[data-list-numerical-position='1']"))
        )
        element.click()

### Collection each car information

In [634]:
# Initialize the dictionary to None for all the specification and to 0 for the highlights and features
car_specs = {
    'Make': None,
    'Model': None,
    'Year Of Manufacturing': None,
    'Kilometres': None,
    'Mileage Condition': None,
    'Status': None,
    'Trim': None,
    'Body Type': None,
    'Cylinder': None,
    'Transmission': None,
    'Drivetrain': None,
    'Exterior Colour': None,
    'Interior Colour': None,
    'Passengers': None,
    'Doors': None,
    'Fuel Type': None,
    'Fuel Economy': None,
    'Air Conditioning': 0,
    'Alarm': 0,
    'Heated Mirrors': 0,
    'Power Seat': 0,
    'Heated Seats': 0,
    'Power Windows': 0,
    'Alloy Wheels': 0,
    'Keyless Entry': 0,
    'Stability Control': 0,
    'Bluetooth': 0,
    'Memory Seats': 0,
    'Sunroof': 0,
    'Dual Climate Controls': 0,
    'Navigation System': 0,
    'Tow Package': 0,
    'Entertainment Package': 0,
    'Power Locks': 0,
    'Xenon Headlights': 0,
    'Fog Lights': 0,
    'Power Mirrors': 0,
    'Price': None
}

In [635]:
# Locate the element using CSS selector
car_header_info = driver.find_element(By.CSS_SELECTOR, "div#heroTitleWrapper > h1").text

# Split the string by spaces and tabs
split_string = car_header_info.split()

# Assign the first, second, and third words to respective variables
year_of_manufacturing = split_string[0]
make_estimate = split_string[1]
model_estimate = split_string[2]
car_specs["Year Of Manufacturing"] = year_of_manufacturing

### Added a word matcher using rapidfuzz to match make and model name that is extracted from the title and the dictionary collection of make and model

In [636]:
def find_closest_words(make, model):
    # Use RapidFuzz to find the closest matches for the make
    closest_make_match = process.extractOne(make, car_data.keys())
    
    # Extract the closest make
    closest_make = closest_make_match[0]
    
    # Access to the value using the key
    all_models = car_data[closest_make]
    
    # Use RapidFuzz to find the closest matches for the model
    closest_model_match = process.extractOne(model, all_models)
    # Extract the closest make
    closest_model = closest_model_match[0]
    
    return closest_make, closest_model


make, model = find_closest_words(make_estimate, model_estimate)

price_elements = driver.find_elements(By.XPATH,'//p[@class="hero-price"]')

price = price_elements[0].text


car_specs["Make"] = make
car_specs["Model"] = model
car_specs["Price"] = price

In [637]:
# Expand to see all the specs
all_spec_toggle = driver.find_element(By.ID, "btn-vdp-specs-toggle")
driver.execute_script("arguments[0].click();", all_spec_toggle)
# wait for the element to load
time.sleep(1)
# Find the number of list items in the unordered list in the Specifications block item
car_specs_items = driver.find_elements(By.CSS_SELECTOR, "#sl-card-body li")
# Loop through each list item
for i in range(len(car_specs_items)):
    key_element = driver.find_element(By.CSS_SELECTOR, f"#spec-key-{i}")
    value_element = driver.find_element(By.CSS_SELECTOR, f"#spec-value-{i}")
    
    # Get the text from the elements
    key = key_element.text
    # only add the text that are present in the dictionary key
    if key in car_specs.keys():
        print(key)

        value = value_element.text      
        # Store the key-value pair in the dictionary
        car_specs[key] = value
    else:
        #if the spec is not found in the car_specs dictionary key, skip it
        pass



Kilometres
Status
Trim
Body Type


In [638]:
# Find the combined fuel economy car mileage condition and add it to the dictionary
try:
    # Find the combined fuel economy
    cbn_fuel_eco = driver.find_element(By.ID,"vdp-fv-combined").text
    car_specs['Combined Fuel Economy'] = cbn_fuel_eco + 'L/100km'
    # Find the element with the class="ca-indicator-active" and get its text and add it to the dictionary
    mileage_condition = driver.find_element(By.CSS_SELECTOR, "p.ca-indicator-active").text
    car_specs['Mileage Condition'] = mileage_condition
except NoSuchElementException:
    # If not found, dont do anything
    pass

In [639]:
# Print the car specifications
print(car_specs)

{'Make': 'Porsche', 'Model': 'Cayenne', 'Year Of Manufacturing': '2021', 'Kilometres': '24,790 km', 'Mileage Condition': 'LOW KM', 'Status': 'Used', 'Trim': 'AWD', 'Body Type': 'SUV', 'Cylinder': None, 'Transmission': None, 'Drivetrain': None, 'Exterior Colour': None, 'Interior Colour': None, 'Passengers': None, 'Doors': None, 'Fuel Type': None, 'Fuel Economy': None, 'Air Conditioning': 0, 'Alarm': 0, 'Heated Mirrors': 0, 'Power Seat': 0, 'Heated Seats': 0, 'Power Windows': 0, 'Alloy Wheels': 0, 'Keyless Entry': 0, 'Stability Control': 0, 'Bluetooth': 0, 'Memory Seats': 0, 'Sunroof': 0, 'Dual Climate Controls': 0, 'Navigation System': 0, 'Tow Package': 0, 'Entertainment Package': 0, 'Power Locks': 0, 'Xenon Headlights': 0, 'Fog Lights': 0, 'Power Mirrors': 0, 'Price': '84,168', 'Combined Fuel Economy': '11.5L/100km'}


### Getting all the features

In [640]:
try:
    # Expand to see all the feature section 
    all_feat_toggle = driver.find_element(By.ID, "vdp-feature-toggle-btn")
    driver.execute_script("arguments[0].click();", all_feat_toggle)
    # wait for the element to load
    time.sleep(1)
    # Find the ul feat element by its ID
    ul_car_feat_element = driver.find_element(By.ID, 'fo-card-body')

    # Find all li elements within the ul
    li_car_feat_elements = ul_car_feat_element.find_elements(By.TAG_NAME, 'li')

    
    
    # Loop through each li element and get the text inside the span tag
    for li in li_car_feat_elements:
        span_text = li.find_element(By.TAG_NAME, 'span').text
        # only add the text that are present in the dictionary key
        if span_text in car_specs.keys():
            car_specs[span_text] = 1
        
        
    
except NoSuchElementException:
    # If not found, dont do anything
    pass

### Getting all the highlights

In [641]:
try:
    # Expand all the highlights
    all_highlights = driver.find_element(By.ID, "vdp-highlight-toggle-btn")
    driver.execute_script("arguments[0].click();", all_highlights)
    time.sleep(1)
except NoSuchElementException:
    # If not found, dont do anything
    pass
try:
    # Wait until the ul element is available
    ul_element = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, 'hl-card-body'))
    )
    
    # Loop through all li elements within the ul
    for li in ul_element.find_elements(By.TAG_NAME, 'li'):
        # Locate the nested span containing the text
        nested_span = li.find_element(By.CSS_SELECTOR, 'span.list-text')
        
        # Get the text from the nested span
        feature_text = nested_span.text
        
        # Check if the text are only present in the car_feat dictionary
        if feature_text in car_specs.keys():
            car_specs[feature_text] = 1

except NoSuchElementException:
    # If not found, dont do anything
    pass

In [642]:
car_specs

{'Make': 'Porsche',
 'Model': 'Cayenne',
 'Year Of Manufacturing': '2021',
 'Kilometres': '24,790 km',
 'Mileage Condition': 'LOW KM',
 'Status': 'Used',
 'Trim': 'AWD',
 'Body Type': 'SUV',
 'Cylinder': None,
 'Transmission': None,
 'Drivetrain': None,
 'Exterior Colour': None,
 'Interior Colour': None,
 'Passengers': None,
 'Doors': None,
 'Fuel Type': None,
 'Fuel Economy': None,
 'Air Conditioning': 1,
 'Alarm': 0,
 'Heated Mirrors': 1,
 'Power Seat': 0,
 'Heated Seats': 0,
 'Power Windows': 1,
 'Alloy Wheels': 0,
 'Keyless Entry': 0,
 'Stability Control': 1,
 'Bluetooth': 0,
 'Memory Seats': 0,
 'Sunroof': 0,
 'Dual Climate Controls': 0,
 'Navigation System': 0,
 'Tow Package': 0,
 'Entertainment Package': 0,
 'Power Locks': 0,
 'Xenon Headlights': 0,
 'Fog Lights': 0,
 'Power Mirrors': 1,
 'Price': '84,168',
 'Combined Fuel Economy': '11.5L/100km'}

### Transformation of the dictionary

In [643]:
def modify_dict(car_dict):
    modified_dict = {}
    for key, value in car_dict.items():
        # Handle None values
        if value is None:
            modified_dict[key] = None
            continue

        # Modify specific keys
        if key == "Year Of Manufacturing" or key == "Cylinder" or key == "Door" or key == "Passengers":
            modified_dict[key] = int(value)
        elif key == "Price":
            modified_dict[key] = int(value.replace(",", ""))
        elif key == "Kilometres":
            modified_dict["Mileage"] = int(value.replace(",", "").replace(" km", ""))
        elif key == "Trim":
            modified_dict[key] = re.split('[!\"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~]', value)[0].strip()
        elif key in ["City Fuel Economy", "Hwy Fuel Economy", "Combined Fuel Economy"]:
            modified_dict[key] = float(re.search(r"\d+\.\d+", value).group())
        elif key == "Mileage Condition":
            modified_dict[key] = value.replace(" KM", "").lower()
        else:
            modified_dict[key] = value
    return modified_dict

In [644]:
car_specs_mod = modify_dict(car_specs)

In [645]:
car_specs_mod

{'Make': 'Porsche',
 'Model': 'Cayenne',
 'Year Of Manufacturing': 2021,
 'Mileage': 24790,
 'Mileage Condition': 'low',
 'Status': 'Used',
 'Trim': 'AWD',
 'Body Type': 'SUV',
 'Cylinder': None,
 'Transmission': None,
 'Drivetrain': None,
 'Exterior Colour': None,
 'Interior Colour': None,
 'Passengers': None,
 'Doors': None,
 'Fuel Type': None,
 'Fuel Economy': None,
 'Air Conditioning': 1,
 'Alarm': 0,
 'Heated Mirrors': 1,
 'Power Seat': 0,
 'Heated Seats': 0,
 'Power Windows': 1,
 'Alloy Wheels': 0,
 'Keyless Entry': 0,
 'Stability Control': 1,
 'Bluetooth': 0,
 'Memory Seats': 0,
 'Sunroof': 0,
 'Dual Climate Controls': 0,
 'Navigation System': 0,
 'Tow Package': 0,
 'Entertainment Package': 0,
 'Power Locks': 0,
 'Xenon Headlights': 0,
 'Fog Lights': 0,
 'Power Mirrors': 1,
 'Price': 84168,
 'Combined Fuel Economy': 11.5}

In [646]:
# TODO: Quite the driver and close the db connection