In [1]:
# Import necessary libraries
from bs4 import BeautifulSoup
import requests
import pandas as pd
import csv

# Function to scrape car data from Cars24
def scraper():
    url = "https://www.cars24.com/buy-used-car?f=make%3A%3D%3Amahindra&sort=bestmatch&serveWarrantyCount=true&listingSource=TabFilter&storeCityId=2378"
    response = requests.get(url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, "html.parser")
        
        # Extract car details
        cars_data = soup.find_all("h3", {"class": "_11dVb"})
        cars = [i.text.split(' ', 1)[1] for i in cars_data]
        years = [i.text.split(' ', 1)[0] for i in cars_data]
        
        data2 = soup.find_all("ul", {"class": "_3J2G-"})
        dist = [i.find_all('li')[0].text for i in data2]
        fuel = [i.find_all('li')[2].text for i in data2]
        trans = [i.find_all('li')[4].text for i in data2]
        
        prices = [i.text for i in soup.find_all('strong', {"class": "_3RL-I"})]
        
        data_dict = {
            'Car Model': cars,
            'Total Kilometers Driven': dist,
            'Year of Manufacture': years,
            'Fuel Type': fuel,
            'Transmission Type': trans,
            'Price': prices
        }
        
        return data_dict
    else:
        print(f"Failed to retrieve data. Status code: {response.status_code}")
        return None

# Function to save data to CSV
def save_to_csv(data_dict, filename):
    with open(filename, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(data_dict.keys())
        writer.writerows(zip(*data_dict.values()))
    print(f"Data saved to {filename}")

# Function to load and clean CSV data
def load_and_clean_csv(filename):
    df = pd.read_csv(filename)
    cleaned_df = df.drop_duplicates().dropna()
    return cleaned_df


In [2]:
# Execute the scraper
data = scraper()

# Save data to CSV if scraping was successful
if data is not None:
    save_to_csv(data, 'Mahindra_cars_data.csv')
else:
    print("Data scraping failed.")


Data saved to Mahindra_cars_data.csv


In [3]:
# Load and clean the CSV data
cleaned_data = load_and_clean_csv('Mahindra_cars_data.csv')

# Display the cleaned DataFrame
if cleaned_data is not None:
    print("\nCleaned DataFrame:")
    display(cleaned_data.head(13))
else:
    print("Data cleaning failed.")



Cleaned DataFrame:


Unnamed: 0,Car Model,Total Kilometers Driven,Year of Manufacture,Fuel Type,Transmission Type,Price
0,Mahindra XUV300 W6 1.5 DIESEL,"16,454 km",2022,Diesel,Manual,₹10.45 Lakh
1,Mahindra XUV300 W8 (O) 1.2 PETROL,"69,558 km",2019,Petrol,Manual,₹7.71 Lakh
2,Mahindra XUV300 W6 1.2 PETROL AMT,"13,010 km",2022,Petrol,Automatic,₹9.62 Lakh
3,Mahindra XUV300 W8 1.5 DIESEL,"80,839 km",2019,Diesel,Manual,₹8.28 Lakh
4,Mahindra XUV500 W10 AT,"57,449 km",2017,Diesel,Automatic,₹11.38 Lakh
5,Mahindra XUV300 W6 1.5 DIESEL AMT,"38,524 km",2021,Diesel,Automatic,₹9.28 Lakh
6,Mahindra XUV500 W10 AT,"84,784 km",2017,Diesel,Automatic,₹10.44 Lakh
7,Mahindra XUV300 W8 1.5 DIESEL,"51,938 km",2019,Diesel,Manual,₹8.84 Lakh
8,Mahindra XUV300 W6 1.2 PETROL,"39,094 km",2019,Petrol,Manual,₹7.89 Lakh
9,Mahindra XUV300 W8 (O) 1.2 PETROL AMT,"20,405 km",2022,Petrol,Automatic,₹11.27 Lakh
