# Sports Car Prices Project - Web Scraping Car Max 

## Greg Fagan, Max Enabit, Luke Rubin 

#### Site we are scraping, CarMax:
https://www.carmax.com/cars/sports-cars

#### We are scraping every sports car make, model, year, mileage, trim, and price 

In [1]:
# import libraries

import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import random

# start the browser
browser = webdriver.Chrome()
browser.get("https://www.carmax.com/cars/sports-cars")
browser.maximize_window()

# empty lists for car data
car_makes = []
car_models = []
car_trims = []
car_years = []
car_mileages = []
car_prices = []

# scroll the page to load elements
def scroll_page(browser):
    browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(random.uniform(1.5, 3))

# click "see more matches" to load cars
def load_more_cars(browser, max_cars=1500):
    while True:
        try:
            # check if enough cars are loaded
            car_elements = browser.find_elements(By.XPATH, "//a[@class='scct--make-model-info-link make-model-link']")
            if len(car_elements) >= max_cars:
                break

            # click the "see more matches" button
            see_more_button = WebDriverWait(browser, 10).until(
                EC.element_to_be_clickable((By.XPATH, "//hzn-button[contains(text(), 'See More Matches')]"))
            )
            ActionChains(browser).move_to_element(see_more_button).click().perform()
            time.sleep(random.uniform(2, 4))  # wait for more cars

            # scroll to load more items
            scroll_page(browser)
        except Exception as e:
            print(f"error or no more cars: {e}")
            break

# load all cars
print("loading cars...")
load_more_cars(browser, max_cars=1500)

# extract car details
print("extracting car details...")
car_elements = browser.find_elements(By.XPATH, "//a[@class='scct--make-model-info-link make-model-link']")
for car in car_elements[:1500]:  # limit to 1500 cars
    try:
        year_make = car.find_element(By.XPATH, ".//span[@class='scct--make-model-info--year-make']").text
        model_trim = car.find_element(By.XPATH, ".//span[@class='scct--make-model-info--model-trim']").text
        car_full_name = f"{year_make} {model_trim}"

        # split year and name parts
        car_year = year_make.split(" ")[0]
        car_years.append(car_year)
        car_name_without_year = " ".join(car_full_name.split(" ")[1:])
        car_name_parts = car_name_without_year.split(" ")
        car_make = car_name_parts[0]
        car_model = car_name_parts[1]
        car_trim = " ".join(car_name_parts[2:]) if len(car_name_parts) > 2 else None

        car_makes.append(car_make)
        car_models.append(car_model)
        car_trims.append(car_trim)
    except Exception as e:
        print(f"error extracting car details: {e}")

# extract mileage and prices
mileage_elements = browser.find_elements(By.XPATH, "//span[@aria-hidden='true']")
price_elements = browser.find_elements(By.XPATH, "//span[@class='scct--price-miles-info--price']")

for mileage in mileage_elements[:1500]:
    car_mileages.append(mileage.text)

for price in price_elements[:1500]:
    car_prices.append(price.text)

# close the browser
browser.quit()

# create the df
car_data = pd.DataFrame({
    'car make': car_makes,
    'car model': car_models,
    'car trim': car_trims,
    'car year': car_years,
    'car mileage': car_mileages,
    'car price': car_prices
})

# save to csv
car_data.to_csv('Raw_CarMax.csv', index=False)
print("data saved to 'Raw_CarMax.csv'")


loading cars...
extracting car details...
data saved to 'Raw_CarMax.csv'


In [2]:
# display data
display(car_data)

Unnamed: 0,car make,car model,car trim,car year,car mileage,car price
0,Acura,TLX,,2015,101K mi,"$16,998*"
1,Chevrolet,Camaro,LS,2015,81K mi,"$18,998*"
2,Scion,FR-S,,2014,110K mi,"$17,998*"
3,Chevrolet,Corvette,Stingray 2LT,2023,4K mi,"$71,998*"
4,Chevrolet,Camaro,LT,2017,35K mi,"$23,998*"
...,...,...,...,...,...,...
1495,Jaguar,XE,Premium,2018,52K mi,"$17,998*"
1496,Mercedes-Benz,SL55,AMG,2022,9K mi,"$96,998*"
1497,Mazda,MX-5,Miata Sport,2020,18K mi,"$22,998*"
1498,Ford,Mustang,Ecoboost Premium,2020,51K mi,"$20,998*"
