###TrueCar.com

In [None]:
import requests
from lxml import html

session = requests.Session()

all_car_hrefs = set()

for page_number in range(1, 2):

    params = {'page': page_number}

    res = session.get('https://www.truecar.com/used-cars-for-sale/listings/location-davis-ca/', params=params)

    if res.status_code != 200:
        print(f"Failed to fetch page {page_number}, status code: {res.status_code}")
        continue

    page = html.fromstring(res.text)

    hrefs = page.xpath("//li[contains(@class, 'col-md-6')]//a/@href")
    hrefs = [href.split('?')[0] for href in hrefs]
    all_car_hrefs.update(hrefs)

    print(f"Page {page_number} hrefs: {len(hrefs)}")

print(f"Total car hrefs collected: {len(all_car_hrefs)}")


Page 1 hrefs: 33
Total car hrefs collected: 30


In [None]:
import requests
from lxml import html
import pandas as pd

def get_all_car_hrefs(base_url, location, pages=2):

    session = requests.Session()
    all_car_hrefs = set()

    for page_number in range(1, pages + 1):
      try:
        params = {'page': page_number}
        res = session.get(f'{base_url}{location}/', params=params)

        if res.status_code != 200:
            print(f"Failed to fetch page {page_number}, status code: {res.status_code}")
            continue

        page = html.fromstring(res.text)
        hrefs = page.xpath("//li[contains(@class, 'col-md-6')]//a/@href")
        hrefs = [href.split('?')[0] for href in hrefs]
        all_car_hrefs.update(hrefs)

      except Exception as e:
          print(f"Error processing {href}: {e}")

      print(f"Page {page_number} hrefs: {len(hrefs)}")

    print(f"Total car hrefs collected: {len(all_car_hrefs)}")
    return all_car_hrefs


def scrape_car_details(all_car_hrefs):
    session = requests.Session()
    car_list = []

    for href in all_car_hrefs:
        try:
            href = href.split('?')[0]
            res = session.get(f'https://www.truecar.com{href}')
            car_page_href = 'https://www.truecar.com' + href
            page = html.fromstring(res.text)

            car_name = page.xpath("//h1[contains(@class, 'heading-3_5')]//text()")
            car_name = car_name[0].strip() if car_name else "Unknown Car"

            car_health = page.xpath("//div[contains(@class, 'flex shrink-0')]//span//text()")
            car_health = car_health[0].strip() if car_health else "None"

            price = page.xpath("//div[contains(@class, 'heading-2')]//text()")
            price = price[0].strip() if price else "None"

            car_specs = page.xpath("//div[@class='row pt-3']")

            for spec in car_specs:
                exterior = spec.xpath(".//div[contains(text(), 'Exterior:')]//text()")
                exterior_text = [item.strip() for item in exterior if item.strip()][1] if exterior else None

                interior = spec.xpath(".//div[contains(text(), 'Interior:')]//text()")
                interior_text = [item.strip() for item in interior if item.strip()][1] if interior else None

                miles = spec.xpath(".//svg[use/@href='#speed']/following-sibling::div//text()")
                miles_text = [item.strip() for item in miles if item.strip()][0] if miles else None

                fuel_type = spec.xpath(".//svg[use/@href='#gas-can']/following-sibling::div//text()")
                fuel_type_text = [item.strip() for item in fuel_type if item.strip()][1] if fuel_type else None

                fuel_efficiency = spec.xpath(".//svg[use/@href='#gas-pump']/following-sibling::div//text()")
                fuel_efficiency_text = [item.strip() for item in fuel_efficiency if item.strip()][0] if fuel_efficiency else None

                epa_range = spec.xpath(".//svg[use/@href='#route']/following-sibling::div//text()")
                epa_range_text = [item.strip() for item in epa_range if item.strip()][1] if epa_range else None

                transmission = spec.xpath(".//svg[use/@href='#transmission']/following-sibling::div//text()")
                transmission_text = [item.strip() for item in transmission if item.strip()][0] if transmission else None

                drivetrain = spec.xpath(".//svg[use/@href='#drivetrain']/following-sibling::div//text()")
                drivetrain_text = [item.strip() for item in drivetrain if item.strip()][0] if drivetrain else None

                engine = spec.xpath(".//svg[use/@href='#engine']/following-sibling::div//text()")
                engine_text = [item.strip() for item in engine if item.strip()][0] if engine else None

                location = spec.xpath(".//svg[use/@href='#location_on']/following-sibling::div//text()")
                location_text = [item.strip() for item in location if item.strip()][0] if location else None

                listing_status = spec.xpath(".//svg[use/@href='#calendar_month']/following-sibling::div//text()")
                listing_status_text = [item.strip() for item in listing_status if item.strip()][0] if listing_status else None

                vin = spec.xpath(".//svg[use/@href='#vin']/following-sibling::div//text()")
                vin_text = [item.strip() for item in vin if item.strip()][1] if vin else None

                stock_number = spec.xpath(".//svg[use/@href='#stock-number']/following-sibling::div//text()")
                stock_number_text = [item.strip() for item in stock_number if item.strip()][1] if stock_number else None

                vehicle_condition_div = page.xpath('//div[@data-test="vehicleConditionHistory"]')
                vehicle_details = vehicle_condition_div[0].xpath('.//div[@class="text-sm"]/text()')
                vehicle_details = [detail.strip() for detail in vehicle_details]

                accidents = vehicle_details[0].replace('Accidents', '').strip() if len(vehicle_details) > 0 else None
                owners = vehicle_details[1] if len(vehicle_details) > 1 else None
                if owners:
                    if 'Owners' in owners:
                        owners = owners.replace('Owners', '').strip()
                    elif 'Owner' in owners:
                        owners = owners.replace('Owner', '').strip()
                    else:
                        owners = None
                car_title_status = vehicle_details[2] if len(vehicle_details) > 2 else None
                use_type = vehicle_details[3].replace('Use', '').strip() if len(vehicle_details) > 3 else None

                vehicle_last_inspected = vehicle_condition_div[0].xpath('.//div[@class="text-xs text-muted"]/text()')
                vehicle_last_inspected = ''.join(vehicle_last_inspected).replace('Condition data as of', '').strip() if vehicle_last_inspected else None

                car_list.append({
                    'Car Name': car_name,
                    'Car Webpage': car_page_href,
                    'Car Health': car_health,
                    'Price': price,
                    'Exterior': exterior_text,
                    'Interior': interior_text,
                    'Miles': miles_text,
                    'Fuel Type': fuel_type_text,
                    'Fuel Efficiency (mileage)': fuel_efficiency_text,
                    'EPA Range': epa_range_text,
                    'Transmission': transmission_text,
                    'Drivetrain': drivetrain_text,
                    'Engine': engine_text,
                    'Location': location_text,
                    'Listing Status': listing_status_text,
                    'VIN': vin_text,
                    'Stock Number': stock_number_text,
                    'Accidents': accidents,
                    'Owners': owners,
                    'Car Title Status': car_title_status,
                    'Use Type': use_type,
                    'Vehicle Last Inspected': vehicle_last_inspected
                })

            print(f"Successfully processed {car_name}")

        except Exception as e:
            print(f"Error processing {href}: {e}")

    car_df = pd.DataFrame(car_list)

    car_df.to_csv('car_details.csv', index=False)

    print("All car details have been saved to 'car_details.csv'")

base_url = "https://www.truecar.com/used-cars-for-sale/listings/location-"
location = "davis-ca"
all_car_hrefs = get_all_car_hrefs(base_url, location, pages=2)
scrape_car_details(all_car_hrefs)

Page 1 hrefs: 33
Page 2 hrefs: 33
Total car hrefs collected: 60
3
Successfully processed 2019 Chevrolet Malibu LT with 1LT
1
Successfully processed 2020 Chevrolet Sonic 5dr HB LT w/1FL
2
Successfully processed 2017 Jeep Wrangler Sport
1
Successfully processed 2017 Chevrolet Silverado 1500 LT Crew Cab Short Box 2WD
9
Successfully processed 2007 Toyota Sienna XLE 7-Passenger FWD
2
Successfully processed 2021 Nissan LEAF S PLUS
2
Successfully processed 2022 Chevrolet Malibu LT
3
Successfully processed 2014 GMC Yukon Denali AWD
2
Successfully processed 2016 Kia Soul Base Automatic
2
Successfully processed 2011 Jeep Grand Cherokee Laredo 4WD
1
Successfully processed 2022 Toyota Corolla LE CVT
2
Successfully processed 2016 Lincoln MKC Select FWD
2
Successfully processed 2017 MINI Countryman FWD
2
Successfully processed 2018 Land Rover Range Rover Velar P380 First Edition
3
Successfully processed 2019 Dodge Charger SXT RWD
3
Successfully processed 2014 Chrysler 200 LX Sedan
3
Successfully pro

In [None]:
# import requests
# from lxml import html

# session = requests.Session()

# all_car_hrefs = set()

# for page_number in range(2, 3):

#     params = {'page': page_number}

#     res = session.get('https://www.truecar.com/used-cars-for-sale/listings/location-davis-ca/', params=params)

#     if res.status_code != 200:
#         print(f"Failed to fetch page {page_number}, status code: {res.status_code}")
#         continue

#     page = html.fromstring(res.text)

#     hrefs = page.xpath("//li[contains(@class, 'col-md-6')]//a/@href")
#     hrefs = [href.split('?')[0] for href in hrefs]
#     all_car_hrefs.update(hrefs)

#     print(f"Page {page_number} hrefs: {len(hrefs)}")

# print(f"Total car hrefs collected: {len(all_car_hrefs)}\n\n")

# vehicle_report_hrefs_vin = []
# for href in all_car_hrefs:
#     href = href.split('?')[0]
#     res = session.get(f'https://www.truecar.com{href}')

#     page = html.fromstring(res.text)

#     car_name = page.xpath("//h1[contains(@class, 'heading-3_5')]//text()")
#     car_name = car_name[0].strip() if car_name else "Unknown Car"
#     print(f"Car Name: {car_name}")

#     vehicle_condition_div = page.xpath('//div[@data-test="vehicleConditionHistory"]')

#     vehicle_report_href = vehicle_condition_div[0].xpath('.//a/@href')[0]

#     print(f"Vehicle Report href: {vehicle_report_href}")

#     # for div in vehicle_condition_div:
#     vehicle_details = vehicle_condition_div[0].xpath('.//div[@class="text-sm"]/text()')
#     vehicle_details = [detail.strip() for detail in vehicle_details]
#     print(f"Vehicle Details: {vehicle_details}")

#     vehicle_last_inspected = vehicle_condition_div[0].xpath('.//div[@class="text-xs text-muted"]/text()')
#     vehicle_last_inspected = ''.join(vehicle_last_inspected)
#     print(f"Vehicle Last Inspected: {vehicle_last_inspected}")

#     #vin
#     car_specs = page.xpath("//div[@class='row pt-3']")
#     for spec in car_specs:
#       vin = spec.xpath(".//svg[use/@href='#vin']/following-sibling::div//text()")
#       vin_text = None
#       if vin:
#           vin_text = [item.strip() for item in vin if item.strip()][1]
#     vehicle_report_hrefs_vin.append((vehicle_report_href, vin_text))
#     print('*' * 60)

Page 2 hrefs: 33
Total car hrefs collected: 33


Car Name: 2015 BMW i3 60 Ah with Range Extender
Vehicle Report href: https://www.autocheck.com/vehiclehistory/?siteID=8900&vin=WBY1Z4C5XFV500167
Vehicle Details: ['0 Accidents', '2 Owners', 'Clean Title', 'Personal Use']
Vehicle Last Inspected: Condition data as of 2/4/2025
************************************************************
Car Name: 2014 Ford F-150 Lariat SuperCrew 6.5' Box 4WD
Vehicle Report href: https://www.autocheck.com/vehiclehistory/?siteID=8900&vin=1FTFW1ET3EFD02274
Vehicle Details: ['0 Accidents', '2 Owners', 'Clean Title', 'Personal Use']
Vehicle Last Inspected: Condition data as of 2/7/2025
************************************************************
Car Name: 2024 Acura ZDX A-Spec Package AWD
Vehicle Report href: https://www.truecar.com/abp/api/vendor/autocheck/reports/tRXS9I0Gbp9eOHwo__3DyI01Vr3Y1isoyjJoxhE7say1oKz3Zx37Ig
Vehicle Details: ['0 Accidents', '1 Owner', 'Clean Title', 'Personal Use']
Vehicle Last Inspec

In [None]:
import requests
from lxml import html

session = requests.Session()

car_details_dict = {}

for href in all_car_hrefs:
    href = href.split('?')[0]
    res = session.get(f'https://www.truecar.com{href}')

    car_page_href = 'https://www.truecar.com' + href
    page = html.fromstring(res.text)

    car_name = page.xpath("//h1[contains(@class, 'heading-3_5')]//text()")
    car_name = car_name[0].strip() if car_name else "Unknown Car"

    car_health = page.xpath("//div[contains(@class, 'flex shrink-0')]//span//text()")
    car_health = car_health[0].strip() if car_health else "None"

    price = page.xpath("//div[contains(@class, 'heading-2')]//text()")
    price = price[0].strip() if price else "None"

    car_specs = page.xpath("//div[@class='row pt-3']")

    for spec in car_specs:
        #Exterior
        exterior = spec.xpath(".//div[contains(text(), 'Exterior:')]//text()")
        exterior_text = None
        if exterior:
            exterior_text = [item.strip() for item in exterior if item.strip()][1]

        # Interior
        interior = spec.xpath(".//div[contains(text(), 'Interior:')]//text()")
        interior_text = None
        if interior:
            interior_text = [item.strip() for item in interior if item.strip()][1]

        # Miles
        # miles = spec.xpath(".//svg[@aria-label='Mileage']/following-sibling::div//text()")
        miles = spec.xpath(".//svg[use/@href='#speed']/following-sibling::div//text()")
        miles_text = None
        if miles:
            miles_text = [item.strip() for item in miles if item.strip()][0]

        # Fuel Type
        # fuel_type =spec.xpath(".//div[contains(text(), 'Fuel Type:')]//text()")
        fuel_type = spec.xpath(".//svg[use/@href='#gas-can']/following-sibling::div//text()")
        if fuel_type:
            fuel_type_text = [item.strip() for item in fuel_type if item.strip()][1]

        # Fuel Efficiency
        fuel_efficiency = spec.xpath(".//svg[use/@href='#gas-pump']/following-sibling::div//text()")
        fuel_efficiency_text = None
        if fuel_efficiency:
            fuel_efficiency_text = [item.strip() for item in fuel_efficiency if item.strip()][0]

        # EPA Range
        # epa_range = spec.xpath(".//div[contains(text(), 'EPA Range:')]//text()")
        epa_range = spec.xpath(".//svg[use/@href='#route']/following-sibling::div//text()")
        epa_range_text = None
        if epa_range:
            epa_range_text = [item.strip() for item in epa_range if item.strip()][1]

        # Transmission
        transmission = spec.xpath(".//svg[use/@href='#transmission']/following-sibling::div//text()")
        transmission_text = None
        if transmission:
            transmission_text = [item.strip() for item in transmission if item.strip()][0]

        # Drivetrain
        # drivetrain = spec.xpath(".//svg[@aria-label='Drivetrain']/following-sibling::div//text()")
        drivetrain = spec.xpath(".//svg[use/@href='#drivetrain']/following-sibling::div//text()")
        drivetrain_text = None
        if drivetrain:
            drivetrain_text = [item.strip() for item in drivetrain if item.strip()][0]

        # Engine
        engine = spec.xpath(".//svg[use/@href='#engine']/following-sibling::div//text()")
        engine_text = None
        if engine:
            engine_text = [item.strip() for item in engine if item.strip()][0]

        # Location
        location = spec.xpath(".//svg[use/@href='#location_on']/following-sibling::div//text()")
        location_text = None
        if location:
            location_text = [item.strip() for item in location if item.strip()][0]

        # Listing Status
        listing_status = spec.xpath(".//svg[use/@href='#calendar_month']/following-sibling::div//text()")
        listing_status_text = None
        if listing_status:
            listing_status_text = [item.strip() for item in listing_status if item.strip()][0]

        # VIN
        vin = spec.xpath(".//svg[use/@href='#vin']/following-sibling::div//text()")
        vin_text = None
        if vin:
            vin_text = [item.strip() for item in vin if item.strip()][1]

        # Stock Number
        stock_number = spec.xpath(".//svg[use/@href='#stock-number']/following-sibling::div//text()")
        stock_number_text = None
        if stock_number:
            stock_number_text = [item.strip() for item in stock_number if item.strip()][1]

        #vehicle condition detail
        vehicle_condition_div = page.xpath('//div[@data-test="vehicleConditionHistory"]')

        # vehicle_report_href = vehicle_condition_div[0].xpath('.//a/@href')[0]

        # print(f"Vehicle Report href: {vehicle_report_href}")

        vehicle_details = vehicle_condition_div[0].xpath('.//div[@class="text-sm"]/text()')
        vehicle_details = [detail.strip() for detail in vehicle_details]

        accidents = vehicle_details[0] if len(vehicle_details) > 0 else None
        accidents = accidents.replace('Accidents', '').strip() if accidents else None
        owners = vehicle_details[1] if len(vehicle_details) > 1 else None
        if 'Owners' in owners:
            owners = owners.replace('Owners', '').strip()
        elif 'Owner' in owners:
            owners = owners.replace('Owner', '').strip()
        else:
            owners = None

        car_title_status = vehicle_details[2] if len(vehicle_details) > 2 else None
        use_type = vehicle_details[3] if len(vehicle_details) > 3 else None
        if 'Use' in use_type:
            use_type = use_type.replace('Use', '').strip()

        vehicle_last_inspected = vehicle_condition_div[0].xpath('.//div[@class="text-xs text-muted"]/text()')
        vehicle_last_inspected = ''.join(vehicle_last_inspected)
        if 'Condition data as of' in vehicle_last_inspected:
            vehicle_last_inspected = vehicle_last_inspected.replace('Condition data as of', '').strip()


    car_details_dict[car_name] = {
        'Car Name': car_name,
        'Car Webpage': car_page_href,
        'Car Health': car_health,
        'Price': price,
        'Exterior': exterior_text,
        'Interior': interior_text,
        'Miles': miles_text,
        'Fuel Type': fuel_type_text,
        'Fuel Efficiency (mileage)': fuel_efficiency_text,
        'EPA Range': epa_range_text,
        'Transmission': transmission_text,
        'Drivetrain': drivetrain_text,
        'Engine': engine_text,
        'Location': location_text,
        'Listing Status': listing_status_text,
        'VIN': vin_text,
        'Stock Number': stock_number_text,
        'Accidents': accidents,
        'Owners': owners,
        'Car Title Status': car_title_status,
        'Use Type': use_type,
        'Vehicle Last Inspected': vehicle_last_inspected
    }

    print(f"Car Name: {car_name}")
    print(f"Car Health: {car_health}")
    print(f"Price: {price}")
    print(f"Exterior: {exterior_text}")
    print(f"Interior: {interior_text}")
    print(f"Miles: {miles_text}")
    print(f"Fuel Type: {fuel_type_text}")
    print(f"Fuel Efficiency (mileage): {fuel_efficiency_text}")
    print(f"EPA Range: {epa_range_text}")
    print(f"Transmission: {transmission_text}")
    print(f"Drivetrain: {drivetrain_text}")
    print(f"Engine: {engine_text}")
    print(f"Location: {location_text}")
    print(f"Listing Status: {listing_status_text}")
    print(f"VIN: {vin_text}")
    print(f"Stock Number: {stock_number_text}")
    print(f"Accidents: {accidents}")
    print(f"Owners: {owners}")
    print(f"Car Title Status: {car_title_status}")
    print(f"Use Type: {use_type}")
    print(f"Vehicle Last Inspected: {vehicle_last_inspected}")
    print('*' * 60)