In [1]:
from selenium import  webdriver
from selenium.webdriver.common.action_chains import  ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import  Options
from selenium.webdriver.chrome.service import  Service
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select,WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
import pandas as pd
import numpy as np
import random
import time
import requests as re
from bs4 import BeautifulSoup as bs

In [2]:
def get_driver():
    """
    to start selenium chrome webdriver 
    
    returns:
    ---------------------------------
    driver: selenium web driver
    
    """
    service = Service(executable_path=r'File Path')
    options = webdriver.ChromeOptions()
    
    options.add_argument('--start-maximized')
    options.add_argument('--Incognito')
    
    driver = webdriver.Chrome(service=service, options=options)    
    
    return driver
    

In [3]:
def load_website(website_url,driver):
    """
    function is to load cars24 website url
    
    args:
    --------------------------------------
    website_url: cars24 website url link
    driver: chrome webdriver
    
    """
    driver.get(website_url)
    driver.implicitly_wait(30)
    driver.execute_script("window.scrollTo(0,document.body.scrollHeight)")
    time.sleep(60)

In [4]:
def get_car_urls_homepage(driver):
    """
    function is to scrap all cars urls from cars24 website based on filters like city, model year, etc
    
    return:
    ---------------------------------------
    car_urls_list: scraped car urls list
    
    """
    car_bs = bs(driver.page_source, 'html.parser')
    car_urls_list = []
    for car_element in car_bs.find('div',class_="_2ujGx").find_all(class_="_2kfVy"):
        car_urls_list.append(car_element.get('href'))
        print(car_element.get('href'))
    return car_urls_list

In [5]:
def get_html_individual_car(url):
    """
    function is to get html content of car specific url
    
    args:
    -------------------------------
    url: car specific details url
    
    """
    car_request_data = re.get(url)
    car_bs = bs(car_request_data.content, 'html.parser')
    return car_bs

In [6]:
def get_model_version_price(car_page_bs):
    """
    function is to scrape car model, version and price 
    
    args:
    --------------------------------------------------
    car_page_bs: individual car details page html content
    
    returns:
    --------------------------------------------------
    car_model: model of car
    car_version: version of car
    car_price: cars24 price 
    
    """
    try:
        #print(car_page_bs.find(class_="_2Ximl"))
        car_model = " ".join(car_page_bs.find(class_="_2Ximl").text.split(" ")[1:])

        car_version = car_page_bs.find(class_="_2UHEW").find_all('li')[0].text

        car_price = car_page_bs.find_all(class_="_3i9_p")[1].text
    except:
        car_model = np.NaN
        car_version = np.NaN
        car_price = np.NaN
    
    return car_model, car_version, car_price

In [7]:
def scrape_car_details(car_urls_list,driver):
    """
    function is to scrape all car details from website
    
    args:
    ----------------------------------------------
    car_urls_list: car url links list
    driver: selenium chrome webdriver
    
    """
    count=1
    for car_url in car_urls_list:
        
        #print(count)
        #print(car_url)
        count+=1
        car_html_bs = get_html_individual_car(car_url)

        model, version, price = get_model_version_price(car_html_bs)

        car_data_dict['model'].append(model)
        car_data_dict['version'].append(version)
        car_data_dict['price'].append(price)

        
        car_specifications = car_html_bs.find('div',class_="_36EKv")

        car_specs_names = car_html_bs.find_all(class_="_1GG-X")
        

        for car_index,car_data in enumerate(car_specifications.find_all(class_="media")):


            feature = (car_data.find(class_="_1GG-X").text).lower().strip()
            data = (car_data.find(class_="_3gHeV").text)

            if car_index==3 and feature!="engine capacity":

                car_data_dict['engine capacity'].append(np.NAN)
                car_data_dict[feature].append(data)
                
            else:
                car_data_dict[feature].append(data)

In [8]:
car_data_dict = {'reg year':[],
 'make year':[],
 'reg number':[],
 'engine capacity':[],
 'insurance':[],
 'spare key':[],
 'transmission':[],
 'km driven':[],
 'ownership':[],
 'fuel type':[],
 'price':[],
 'model':[],
 'version':[]}

In [9]:
def scrape_car_driver(website_link):
    """
    function is to call all functions to get car details from cars24 website
    
    args:
    ----------------------------------
    website_link: cars24 website url link
    
    """
    
    driver = get_driver()
    
    load_website(website_link,driver)
    
    car_urls_list = get_car_urls_homepage(driver)
    
    driver.quit()
    
    scrape_car_details(car_urls_list,driver)
    
    

In [10]:
website_url = "https://www.cars24.com/buy-used-car?f=year%3Abw%3A2020%2C2023&sort=lhl&serveWarrantyCount=true&gaId=704663877.1697865901&listingSource=TabFilter&storeCityId=2423&pinId=411001"

In [11]:
scrape_car_driver(website_url)

https://www.cars24.com/buy-used-hyundai-aura-2021-cars-pune-10612057749/
https://www.cars24.com/buy-used-tata-altroz-2022-cars-pune-10623153720/
https://www.cars24.com/buy-used-maruti-celerio-2021-cars-pune-10634558724/
https://www.cars24.com/buy-used-maruti-s-presso-2020-cars-pune-10682259746/
https://www.cars24.com/buy-used-honda-amaze-2021-cars-pune-10680651743/
https://www.cars24.com/buy-used-maruti-celerio-2021-cars-pune-10638752783/
https://www.cars24.com/buy-used-kia-carens-2022-cars-pune-10632918761/
https://www.cars24.com/buy-used-hyundai-aura-2021-cars-pune-10641458745/
https://www.cars24.com/buy-used-renault-triber-2020-cars-pune-10606558720/
https://www.cars24.com/buy-used-honda-amaze-2021-cars-pune-10694710753/
https://www.cars24.com/buy-used-tata-altroz-2021-cars-pune-10690750731/
https://www.cars24.com/buy-used-hyundai-venue-2020-cars-pune-10677250721/
https://www.cars24.com/buy-used-maruti-new-wagon-r-2021-cars-pune-10620053723/
https://www.cars24.com/buy-used-hyundai-a

ReadTimeout: HTTPSConnectionPool(host='www.cars24.com', port=443): Read timed out. (read timeout=None)

In [None]:
cars_df = pd.DataFrame(car_data_dict)

In [None]:
cars_df.to_csv("final_cars24.csv",index=False)