In [1]:
#pip install selenium beautifulsoup4 webdriver_manager

from bs4 import BeautifulSoup as bs
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')

# This initializes the driver using 'Service' and 'options' keyword arguments
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# This is the URL for the 'new cars' webpage. 
url = 'https://www.toyotaofdowntownla.com/inventory/new'
driver.get(url)

# Uses Selenium's WebDriverWait to wait 20 seconds fot car inventory content to load
try:
    WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((By.CLASS_NAME, "srp-vehicle-list-item"))
    )
    print("Page finished rendering. Content loaded.")
except Exception as e:
    print(f"Timed out waiting for page to load: {e}")

# Pulls the HTML code for the specific car
page_source = driver.page_source

# Closes the browser instance used to pull HTML data
driver.quit()

# Creates a BeautifulSoup object from the HTML code pulled
soup = bs(page_source, 'html.parser')

# Selects div tags with all car attributes from car listings
results = soup.select('div.row.mb-5.mt-2')

import json
import pandas as pd

# Populates 'car_list' with all the car attribute data for our first dataframe
car_list = []
for result in results:
    scripts = result.find_all('script', type='application/ld+json')
    for script in scripts:
        json_text = script.string
        if json_text:
            car_data = json.loads(json_text)
            car_list.append({
                'Model': car_data.get('model'),
                'Brand': car_data.get('brand'),
                'Year': car_data.get('vehicleModelDate'),
                'Interior Color': car_data.get('vehicleInteriorColor'),
                'Transmission': car_data.get('vehicleTransmission'),
                'Color': car_data.get('color'),
                'Price': car_data.get('offers', {}).get('price')
            })
car_list
df2 = pd.DataFrame(car_list)

import json

# Populates 'car_url_list' with all the URLs of each new car in Toyota's inventory
car_url_list = []
for result in results:
    scripts = result.find_all('script', type='application/ld+json')
    for script in scripts:
        json_text = script.string
        if json_text:
            car_data = json.loads(json_text)
            car_url_list.append(car_data.get('offers', {}).get('url'))

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time

options = webdriver.ChromeOptions()
options.add_argument("--headless")

# Launches another Chrome browser to extract data from each individual car's webpage
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Populates 'result_list' with the HTML code for each car's webpage
result_list = []
for url in car_url_list:
    url = str(url).strip()
    if not url.startswith("http"):
        continue
    driver.get(url)
    try:
        WebDriverWait(driver, 5).until(
            EC.presence_of_element_located((By.CLASS_NAME, "srp-vehicle-list-item"))
        )
    except Exception as e:
        print(f"Timed out for {url}: {e}")

    # Remove the explicit sleep, if not needed:
    #time.sleep(3)
    
    soup = BeautifulSoup(driver.page_source, "html.parser")
    results = soup.select("div.details-value")
    result_list.extend(results)
driver.quit()

# Populates 'car' with lists of attributes for each car
cars = []
current_car = []
EXPECTED_FIELDS_PER_CAR = 7
for tag in result_list:
    is_start = False
    span = tag.find("span")
    # Original method
    if span and "ddoa-interior-color" in (span.get("type") or ""):
        is_start = True
    # Fallback: if a new car starts without interior color (e.g., first field is body type)
    elif (
        current_car and len(current_car) >= EXPECTED_FIELDS_PER_CAR and 
        ('Car' in tag.text or 'Utility' in tag.text or 'Mini-van' in tag.text or 'CrewMax' in tag.text)
    ):
        is_start = True
    if is_start:
        if current_car:
            cars.append(current_car)
        current_car = []
    current_car.append(tag)
if current_car:
    cars.append(current_car)

# Cleans the 'cars' list to get rid of extra attrubutes and label missing attributes as 'NA'
for i in range(0, len(cars)):
    car = cars[i]
    if len(car) < 7:
        new_car = car[0:len(car)]
        for j in range(0, 7-len(car)):
            new_car.append('NA')
        cars[i] = new_car
    elif len(car) > 7:
        new_car = car[0:7]
        cars[i] = new_car

import pandas as pd

# Defines a function to create a dictionary from each car containing their individual car attributes
def parse_car_details(car_tags):
    import re
    car_dict = {
        'Interior Color': 'NA',
        'Body Type': 'NA',
        'Drive Type': 'NA',
        'MPG': 'NA',
        'Engine': 'NA',
        'Transmission': 'NA',
        'Model Code': 'NA'
    }
    
    for tag in car_tags:
        
        if hasattr(tag, "get_text"):
            text = tag.get_text(strip=True)
        else:
            text = str(tag).strip()
        span = None
        if hasattr(tag, "find") and hasattr(tag, "get"):
            span = tag.find("span")
            if span and hasattr(span, "get"):
                if "ddoa-interior-color" in (span.get("type") or ""):
                    car_dict['Interior Color'] = text
                    continue
        if any(keyword in text for keyword in ['Car', 'Utility', 'Mini-van', 'XtraCab', 'CrewMax', 'Double Cab']):
            if car_dict['Body Type'] == 'NA':
                car_dict['Body Type'] = text
                continue
        if any(keyword in text for keyword in ['Wheel Drive', 'All Wheel', 'Four Wheel', 'Front Wheel', 'Rear Wheel']):
            car_dict['Drive Type'] = text
            continue
        if '/' in text and 'EPA' in text.upper():
            car_dict['MPG'] = text
            continue
        if any(keyword in text for keyword in ['Engine', 'Motor', 'Hybrid', 'Turbo', 'Cyl']) and 'Transmission' not in text:
            car_dict['Engine'] = text
            continue
        if 'Transmission' in text:
            car_dict['Transmission'] = text
            continue
        if isinstance(text, str) and text.isdigit():
            car_dict['Model Code'] = text
            continue
    return car_dict

# Applies the 'parse_car_details' function to all the cars in the new car inventory to populate 'car_table' with dictionaries for each car
car_table = []
for car in cars:
    car_dict = parse_car_details(car)
    car_table.append(car_dict)

# Creates a Pandas dataframe from 'car_table'
df1 = pd.DataFrame(car_table)

# Combines df1 and df2 to create a Pandas dataframe with all the desired information
result_df = pd.concat([df2, df1], axis=1)
result_df

Timed out waiting for page to load: name 'WebDriverWait' is not defined
Timed out for https://www.toyotaofdowntownla.com/viewdetails/new/4t1daack3tu676405/2026-toyota-camry-4dr-car?type=lease: Message: 
Stacktrace:
0   chromedriver                        0x0000000105f5d9d8 chromedriver + 6482392
1   chromedriver                        0x0000000105f54e1a chromedriver + 6446618
2   chromedriver                        0x0000000105999afb chromedriver + 436987
3   chromedriver                        0x00000001059ec6d3 chromedriver + 775891
4   chromedriver                        0x00000001059ec9d1 chromedriver + 776657
5   chromedriver                        0x0000000105a3dbf4 chromedriver + 1108980
6   chromedriver                        0x0000000105a3aeb8 chromedriver + 1097400
7   chromedriver                        0x00000001059dedaf chromedriver + 720303
8   chromedriver                        0x00000001059dfab1 chromedriver + 723633
9   chromedriver                        0x0000000105

Unnamed: 0,Model,Brand,Year,Interior Color,Transmission,Color,Price,Interior Color.1,Body Type,Drive Type,MPG,Engine,Transmission.1,Model Code
0,Camry,Toyota,2026,Black Fabric,Electronically controlled Continuously Variabl...,Ocean Gem,30195,Black Fabric,4dr Car,Front Wheel Drive,49 / 52[3]*EPA ESTIMATED,2.5L 4-Cyl. Gas/Electric Hybrid,Electronically controlled Continuously Variabl...,2559
1,RAV4,Toyota,2025,Black Fabric,Direct Shift 8-Speed Electronically Controlled...,Ice Cap,32190,Black Fabric,Sport Utility,Front Wheel Drive,35 / 27[3]*EPA ESTIMATED,2.5L 4-Cyl. Engine,Direct Shift 8-Speed Electronically Controlled...,4430
2,RAV4 Hybrid,Toyota,2025,Black Fabric,Electronically controlled Continuously Variabl...,Midnight Black Metallic,35549,Black Fabric,Sport Utility,All Wheel Drive,38 / 41[3]*EPA ESTIMATED,2.5L 4-Cyl. Hybrid Engine,Electronically controlled Continuously Variabl...,4435
3,Tacoma,Toyota,2025,Black Fabric,8-Speed Automatic Transmission,Ice Cap,33459,Black Fabric,XtraCab,Rear Wheel Drive,26 / 20[3]*EPA ESTIMATED,i-FORCE 2.4L 4-Cyl. Turbo Engine,8-Speed Automatic Transmission,7162
4,Corolla,Toyota,2026,Black Fabric,Dynamic Shift Continuously Variable Transmissi...,Ice Cap,25364,Black Fabric,4dr Car,Front Wheel Drive,41 / 32[3]*EPA ESTIMATED,2.0L 4-Cyl. Engine,Dynamic Shift Continuously Variable Transmissi...,1852
5,Tacoma,Toyota,2026,Black Fabric w/Smoke Silver,8-Speed Automatic Transmission,Underground,41284,Black Fabric w/Smoke Silver,Double Cab,Rear Wheel Drive,26 / 21[3]*EPA ESTIMATED,i-FORCE 2.4L 4-Cyl. Turbo Engine,8-Speed Automatic Transmission,7170
6,Corolla Cross,Toyota,2026,Light Gray Fabric,Continuously Variable Transmission with intell...,Jet Black,26598,Light Gray Fabric,Sport Utility,Front Wheel Drive,33 / 31[3]*EPA ESTIMATED,2.0L 4-Cyl. Engine,Continuously Variable Transmission with intell...,6301
7,Grand Highlander Hybrid,Toyota,2026,Black SofTex&#xAE;,Electronically controlled Continuously Variabl...,Wind Chill Pearl [extra_cost_color],49450,Black SofTex®,Sport Utility,All Wheel Drive,32 / 36[3]*EPA ESTIMATED,2.5-Liter 4-Cylinder Hybrid Engine,Electronically controlled Continuously Variabl...,6722
8,Corolla Hybrid,Toyota,2026,Black Fabric,Electronically controlled Continuously Variabl...,Midnight Black Metallic,26168,Black Fabric,4dr Car,Front Wheel Drive,46 / 53[3]*EPA ESTIMATED,1.8L 4-Cyl. Hybrid Engine,Electronically controlled Continuously Variabl...,1882
9,Tundra,Toyota,2026,Black Fabric,10-Speed Electronically Controlled automatic T...,Ice Cap,53793,Black Fabric,CrewMax,Four Wheel Drive,22 / 17[3]*EPA ESTIMATED,i-FORCE V6 Engine,10-Speed Electronically Controlled automatic T...,8361


In [2]:
new_order = ['Model', 'Brand', 'Year', 'Transmission', 'Price', 'Body Type', 'MPG', 'Engine']
toyota_df = result_df[new_order]
toyota_df

Unnamed: 0,Model,Brand,Year,Transmission,Transmission.1,Price,Body Type,MPG,Engine
0,Camry,Toyota,2026,Electronically controlled Continuously Variabl...,Electronically controlled Continuously Variabl...,30195,4dr Car,49 / 52[3]*EPA ESTIMATED,2.5L 4-Cyl. Gas/Electric Hybrid
1,RAV4,Toyota,2025,Direct Shift 8-Speed Electronically Controlled...,Direct Shift 8-Speed Electronically Controlled...,32190,Sport Utility,35 / 27[3]*EPA ESTIMATED,2.5L 4-Cyl. Engine
2,RAV4 Hybrid,Toyota,2025,Electronically controlled Continuously Variabl...,Electronically controlled Continuously Variabl...,35549,Sport Utility,38 / 41[3]*EPA ESTIMATED,2.5L 4-Cyl. Hybrid Engine
3,Tacoma,Toyota,2025,8-Speed Automatic Transmission,8-Speed Automatic Transmission,33459,XtraCab,26 / 20[3]*EPA ESTIMATED,i-FORCE 2.4L 4-Cyl. Turbo Engine
4,Corolla,Toyota,2026,Dynamic Shift Continuously Variable Transmissi...,Dynamic Shift Continuously Variable Transmissi...,25364,4dr Car,41 / 32[3]*EPA ESTIMATED,2.0L 4-Cyl. Engine
5,Tacoma,Toyota,2026,8-Speed Automatic Transmission,8-Speed Automatic Transmission,41284,Double Cab,26 / 21[3]*EPA ESTIMATED,i-FORCE 2.4L 4-Cyl. Turbo Engine
6,Corolla Cross,Toyota,2026,Continuously Variable Transmission with intell...,Continuously Variable Transmission with intell...,26598,Sport Utility,33 / 31[3]*EPA ESTIMATED,2.0L 4-Cyl. Engine
7,Grand Highlander Hybrid,Toyota,2026,Electronically controlled Continuously Variabl...,Electronically controlled Continuously Variabl...,49450,Sport Utility,32 / 36[3]*EPA ESTIMATED,2.5-Liter 4-Cylinder Hybrid Engine
8,Corolla Hybrid,Toyota,2026,Electronically controlled Continuously Variabl...,Electronically controlled Continuously Variabl...,26168,4dr Car,46 / 53[3]*EPA ESTIMATED,1.8L 4-Cyl. Hybrid Engine
9,Tundra,Toyota,2026,10-Speed Electronically Controlled automatic T...,10-Speed Electronically Controlled automatic T...,53793,CrewMax,22 / 17[3]*EPA ESTIMATED,i-FORCE V6 Engine
