In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import pandas as pd
import time
import re
from urllib.parse import quote_plus

# Setup Chrome WebDriver
options = webdriver.ChromeOptions()
options.add_argument('--headless')  # comment out if you want to see browser
options.add_argument('--disable-gpu')
driver = webdriver.Chrome(options=options)

# States to scrape
states = ["andhra pradesh", "telangana", "karnataka","tamil nadu", "maharashtra",
    "kerala", "gujarat", "madhya pradesh", "rajasthan", "uttar pradesh",
    "punjab", "haryana", "bihar", "odisha", "chhattisgarh", "jharkhand",
    "uttarakhand", "himachal pradesh", "delhi", "west bengal"]

# Data list
all_car_data = []

for state in states:
    url = f"https://www.cardekho.com/used-cars+in+{quote_plus(state)}"
    driver.get(url)
    time.sleep(4)

    soup = BeautifulSoup(driver.page_source, "html.parser")
    car_cards = soup.find_all("div", class_="NewUcExCard posR")

    for card in car_cards:
        car_data = {}

        # Car Name
        name_tag = card.find("h3", class_="title")
        name_text = name_tag.get_text(strip=True) if name_tag else "N/A"
        car_data["Car Name"] = name_text

        # Year
        year_match = re.match(r'^(19|20)\d{2}', name_text)
        year = year_match.group() if year_match else "N/A"
        car_data["Year"] = year

        # Brand: word after year
        if year != "N/A":
            rest = name_text[len(year):].strip()
            brand = rest.split(" ")[0]
        else:
            brand = "N/A"
        car_data["Brand"] = brand.title()

        # State name as location
        car_data["Location"] = state.title()

        # Price
        price_tag = card.find("div", class_="Price hover")
        price_text = price_tag.text.replace("Compare", "").strip() if price_tag else "N/A"
        car_data["Price"] = price_text

        # Fuel Type and Transmission and KM Driven
        dots = card.find("div", class_="dotsDetails")
        if dots:
            parts = dots.get_text(" ", strip=True).split("•")
            raw_text = dots.get_text(" ", strip=True).lower()

            fuels = ["petrol", "diesel", "cng", "electric", "hybrid"]
            transmissions = ["manual", "automatic"]

            fuel = next((f for f in fuels if f in raw_text), "N/A")
            trans = next((t for t in transmissions if t in raw_text), "N/A"
                         )

            # Try to extract KM Driven (usually first part)
            km_driven = "N/A"
            if len(parts) >= 1 and "km" in parts[0].lower():
                km_driven = parts[0].strip()

            car_data["Fuel Type"] = fuel.title()
            car_data["Transmission"] = trans.title()
            car_data["KM Driven"] = km_driven
        else:
            car_data["Fuel Type"] = "N/A"
            car_data["Transmission"] = "N/A"
            car_data["KM Driven"] = "N/A"

        all_car_data.append(car_data)

    print(f"✅ Scraped {len(car_cards)} cars from {state.title()}")

driver.quit()

# Create and save dataframe
df = pd.DataFrame(all_car_data)[[
    "Car Name", "Brand", "Year", "Location", "Price", "KM Driven", "Fuel Type", "Transmission"
]]
df.to_excel("used_cars_cardekho.xlsx", index=False)
print("📁 Data saved to used_cars_cardekho.xls")


✅ Scraped 20 cars from Andhra Pradesh
✅ Scraped 20 cars from Telangana
✅ Scraped 20 cars from Karnataka
✅ Scraped 20 cars from Tamil Nadu
✅ Scraped 20 cars from Maharashtra
✅ Scraped 20 cars from Kerala
✅ Scraped 20 cars from Gujarat
✅ Scraped 20 cars from Madhya Pradesh
✅ Scraped 20 cars from Rajasthan
✅ Scraped 20 cars from Uttar Pradesh
✅ Scraped 20 cars from Punjab
✅ Scraped 20 cars from Haryana
✅ Scraped 20 cars from Bihar
✅ Scraped 20 cars from Odisha
✅ Scraped 20 cars from Chhattisgarh
✅ Scraped 20 cars from Jharkhand
✅ Scraped 20 cars from Uttarakhand
✅ Scraped 20 cars from Himachal Pradesh
✅ Scraped 20 cars from Delhi
✅ Scraped 20 cars from West Bengal
📁 Data saved to used_cars_cardekho.xls
