In [1]:
pip install pandas matplotlib seaborn scikit-learn


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

# Load the dataset
file_path = r"C:\Users\sonur\OneDrive\Desktop\Car_Dheko\Merged_car\merged_car.csv"
df = pd.read_csv(file_path)

# Check for missing values
missing_values = df.isnull().sum()
print("Missing values in each column:")
print(missing_values)

# Optionally, drop rows with missing values or fill them
# df.dropna(inplace=True)  # Drop rows with missing values
# or fill missing values, for example:
# df.fillna(df.median(), inplace=True)  # Fill with median

# Display the first few rows of the dataset
print("First few rows of the dataset:")
print(df.head())

# Descriptive Statistics
print("\nDescriptive Statistics:")
summary_stats = df.describe()
print(summary_stats)

# Calculate mode for each column
mode = df.mode().iloc[0]
print("\nMode of the dataset:")
print(mode)

# Calculate additional statistics
print("\nAdditional Statistics:")
additional_stats = {
    'median': df.median(),
    'std_dev': df.std()
}
print("Median:")
print(additional_stats['median'])
print("Standard Deviation:")
print(additional_stats['std_dev'])

# Data Visualization
plt.figure(figsize=(10, 6))
sns.scatterplot(x=df['Mileage'], y=df['Price'])
plt.title('Price vs. Mileage')
plt.xlabel('Mileage')
plt.ylabel('Price')
plt.show()

plt.figure(figsize=(10, 6))
sns.histplot(df['Price'], kde=True)
plt.title('Distribution of Car Prices')
plt.xlabel('Price')
plt.show()

plt.figure(figsize=(10, 6))
sns.boxplot(y=df['Price'])
plt.title('Box Plot of Car Prices')
plt.ylabel('Price')
plt.show()

plt.figure(figsize=(12, 8))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()

# Feature Selection
# Assuming 'Price' is the target variable
X = df.drop(columns=['Price'])
y = df['Price']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest model to get feature importance
model = RandomForestRegressor()
model.fit(X_train, y_train)

# Get feature importances
feature_importances = pd.Series(model.feature_importances_, index=X.columns).sort_values(ascending=False)

# Save the modified DataFrame to a new CSV file
output_file_path = r"C:\Users\sonur\OneDrive\Desktop\Car_Dheko\EDA_car.csv"
df.to_csv(output_file_path, index=False)

# Display feature importances
print("\nFeature Importances:")
print(feature_importances)


Missing values in each column:
City                         0
car_links                    0
fuel_type                    0
body_type                    4
kilometers_driven            0
transmission                 0
owner                        0
oem                          0
model                        0
year                         0
variant                      0
price                        0
registration_year           37
insurance_validity           7
fuel_type_overview           0
seats                        7
kms_driven                   2
rto                        952
comfort_convenience         65
interior_features           76
exterior_features           90
safety_features             97
entertainment_features    1078
mileage                   8431
engine                    8431
max_power                   65
torque                    8431
wheel_size                8431
bhp                       2354
rpm                       2354
consolidated_data            0
dtype: i

TypeError: Cannot convert [['Bangalore' 'Bangalore' 'Bangalore' ... 'Kolkata' 'Kolkata' 'Kolkata']
 ['https://www.cardekho.com/used-car-details/used-Maruti-Celerio-Vxi-cars-Bangalore_a12b21b4-e26a-41bf-8f47-7e79a725a246.htm'
  'https://www.cardekho.com/buy-used-car-details/used-Ford-Ecosport-1.5-Petrol-Titanium-Bsiv-cars-Bangalore_d88ffc79-1d90-405c-a944-051888866d13.htm'
  'https://www.cardekho.com/used-car-details/used-Tata-Tiago-1.2-Revotron-Xz-cars-Bangalore_8554c91f-7b6e-417d-aeca-b8c5ac6794a5.htm'
  ...
  'https://www.cardekho.com/used-car-details/used-Mercedes-benz-C-class-C-200-Cgi-Elegance-cars-Kolkata_662a1be2-da9d-4f50-93b8-767363d4582b.htm'
  'https://www.cardekho.com/used-car-details/used-Maruti-Ritz-Zxi-cars-Kolkata_db25aac6-4bd0-46d4-9127-826e8df2dc53.htm'
  'https://www.cardekho.com/used-car-details/used-Renault-Duster-110ps-Diesel-Rxz-Awd-cars-Kolkata_c2c28152-5e69-4872-a5c7-0fc442bdd9d4.htm']
 ['Petrol' 'Petrol' 'Petrol' ... 'Petrol' 'Petrol' 'Diesel']
 ...
 [nan
  'Cd Player, Radio, Speakers Front, Speakers Rear, Integrated2Din Audio, Usb Auxiliary Input, Bluetooth, Touch Screen, Number Of Speaker'
  'Cd Player, Radio, Speakers Front, Speakers Rear, Integrated2Din Audio, Usb Auxiliary Input, Bluetooth, Number Of Speaker'
  ...
  'Cd Player, Cd Changer, Radio, Audio System Remote Control, Speakers Front, Speakers Rear'
  'Cd Player, Radio, Speakers Front, Speakers Rear, Usb Auxiliary Input'
  'Cd Player, Radio, Speakers Front, Speakers Rear, Integrated2Din Audio, Usb Auxiliary Input, Bluetooth, Touch Screen, Number Of Speaker']
 ['67.04bhp@6000rpm' '121.31bhp@6500rpm' '84bhp@6000rpm' ...
  '186bhp@5600rpm' '85.80bhp@6000rpm' '108.45bhp@4000rpm']
 ["{'fuel_type': 'Petrol', 'body_type': 'Hatchback', 'kilometers_driven': '1,20,000', 'transmission': 'Manual', 'owner': '3rd Owner', 'oem': 'Maruti', 'model': 'Maruti Celerio', 'year': 2015, 'variant': 'VXI', 'price': '₹ 4 Lakh', 'registration_year': '2015', 'insurance_validity': 'Third Party insurance', 'fuel_type_overview': 'Petrol', 'seats': '5 Seats', 'kms_driven': '1,20,000 Kms', 'rto': 'KA51', 'comfort_convenience': 'Power Steering, Power Windows Front, Power Windows Rear, Remote Trunk Opener, Remote Fuel Lid Opener, Low Fuel Warning Light, Accessory Power Outlet, Vanity Mirror, Rear Seat Headrest, Cup Holders Front', 'interior_features': 'Air Conditioner, Heater, Digital Odometer, Electronic Multi Tripmeter, Fabric Upholstery, Glove Compartment, Digital Clock', 'exterior_features': 'Adjustable Head Lights, Manually Adjustable Exterior Rear View Mirror, Wheel Covers, Power Antenna, Chrome Grille', 'safety_features': 'Centeral Locking, Child Safety Locks, Day Night Rear View Mirror, Passenger Side Rear View Mirror, Halogen Headlamps, Rear Seat Belts, Door Ajar Warning, Side Impact Beams, Front Impact Beams, Adjustable Seats, Centrally Mounted Fuel Tank, Engine Immobilizer, Anti Theft Device', 'entertainment_features': None, 'mileage': None, 'engine': None, 'torque': None, 'wheel_size': None, 'bhp': '67.04', 'rpm': '6000'}"
  "{'fuel_type': 'Petrol', 'body_type': 'SUV', 'kilometers_driven': '32,706', 'transmission': 'Manual', 'owner': '2nd Owner', 'oem': 'Ford', 'model': 'Ford Ecosport', 'year': 2018, 'variant': '1.5 Petrol Titanium BSIV', 'price': '₹ 8.11 Lakh', 'registration_year': 'Feb 2018', 'insurance_validity': 'Comprehensive', 'fuel_type_overview': 'Petrol', 'seats': '5 Seats', 'kms_driven': '32,706 Kms', 'rto': 'KA05', 'comfort_convenience': 'Power Steering, Power Windows Front, Power Windows Rear, Remote Trunk Opener, Remote Fuel Lid Opener, Low Fuel Warning Light, Accessory Power Outlet, Trunk Light, Vanity Mirror, Rear Seat Headrest, Cup Holders Front, Multifunction Steering Wheel, Navigation System, Smart Access Card Entry, Engine Start Stop Button, Gear Shift Indicator, Luggage Hook And Net', 'interior_features': 'Air Conditioner, Heater, Adjustable Steering, Digital Odometer, Tachometer, Electronic Multi Tripmeter, Fabric Upholstery, Leather Steering Wheel, Glove Compartment, Digital Clock, Outside Temperature Display, Height Adjustable Driver Seat', 'exterior_features': 'Adjustable Head Lights, Fog Lights Front, Power Adjustable Exterior Rear View Mirror, Electric Folding Rear View Mirror, Rear Window Wiper, Rear Window Washer, Rear Window Defogger, Alloy Wheels, Integrated Antenna, Outside Rear View Mirror Turn Indicators, Chrome Grille, Roof Rail', 'safety_features': 'Anti Lock Braking System, Centeral Locking, Power Door Locks, Child Safety Locks, Driver Air Bag, Passenger Air Bag, Passenger Side Rear View Mirror, Halogen Headlamps, Rear Seat Belts, Seat Belt Warning, Door Ajar Warning, Side Impact Beams, Front Impact Beams, Adjustable Seats, Keyless Entry, Centrally Mounted Fuel Tank, Engine Immobilizer, Engine Check Warning, Crash Sensor, Ebd, Follow Me Home Headlamps, Rear Camera, Anti Theft Device, Speed Sensing Auto Door Lock, Pretensioners And Force Limiter Seatbelts, Impact Sensing Auto Door Lock, No Of Airbags', 'entertainment_features': 'Cd Player, Radio, Speakers Front, Speakers Rear, Integrated2Din Audio, Usb Auxiliary Input, Bluetooth, Touch Screen, Number Of Speaker', 'mileage': None, 'engine': None, 'torque': None, 'wheel_size': None, 'bhp': '121.31', 'rpm': '6500'}"
  "{'fuel_type': 'Petrol', 'body_type': 'Hatchback', 'kilometers_driven': '11,949', 'transmission': 'Manual', 'owner': '1st Owner', 'oem': 'Tata', 'model': 'Tata Tiago', 'year': 2018, 'variant': '1.2 Revotron XZ', 'price': '₹ 5.85 Lakh', 'registration_year': 'Sept 2018', 'insurance_validity': 'Comprehensive', 'fuel_type_overview': 'Petrol', 'seats': '5 Seats', 'kms_driven': '11,949 Kms', 'rto': 'KA03', 'comfort_convenience': 'Power Steering, Power Windows Front, Power Windows Rear, Remote Trunk Opener, Remote Fuel Lid Opener, Low Fuel Warning Light, Accessory Power Outlet, Trunk Light, Vanity Mirror, Rear Seat Headrest, Cup Holders Front, Multifunction Steering Wheel, Navigation System, Glove Box Cooling', 'interior_features': 'Air Conditioner, Heater, Adjustable Steering, Digital Odometer, Tachometer, Electronic Multi Tripmeter, Fabric Upholstery, Glove Compartment, Digital Clock, Outside Temperature Display, Driving Experience Control Eco, Height Adjustable Driver Seat', 'exterior_features': 'Adjustable Head Lights, Fog Lights Front, Power Adjustable Exterior Rear View Mirror, Rear Window Wiper, Rear Window Washer, Rear Window Defogger, Wheel Covers, Power Antenna, Tinted Glass, Rear Spoiler, Outside Rear View Mirror Turn Indicators, Chrome Grille, Chrome Garnish', 'safety_features': 'Anti Lock Braking System, Centeral Locking, Power Door Locks, Child Safety Locks, Driver Air Bag, Passenger Air Bag, Passenger Side Rear View Mirror, Halogen Headlamps, Rear Seat Belts, Seat Belt Warning, Door Ajar Warning, Side Impact Beams, Front Impact Beams, Vehicle Stability Control System, Adjustable Seats, Centrally Mounted Fuel Tank, Engine Immobilizer, Engine Check Warning, Crash Sensor, Ebd, Follow Me Home Headlamps, Anti Theft Device, Speed Sensing Auto Door Lock, Pretensioners And Force Limiter Seatbelts', 'entertainment_features': 'Cd Player, Radio, Speakers Front, Speakers Rear, Integrated2Din Audio, Usb Auxiliary Input, Bluetooth, Number Of Speaker', 'mileage': None, 'engine': None, 'torque': None, 'wheel_size': None, 'bhp': nan, 'rpm': nan}"
  ...
  "{'fuel_type': 'Petrol', 'body_type': 'Sedan', 'kilometers_driven': '50,000', 'transmission': 'Automatic', 'owner': '3rd Owner', 'oem': 'Mercedes-Benz', 'model': 'Mercedes-Benz C-Class', 'year': 2011, 'variant': 'C 200 CGI Elegance', 'price': '₹ 5.50 Lakh', 'registration_year': '2011', 'insurance_validity': 'Third Party insurance', 'fuel_type_overview': 'Petrol', 'seats': '5 Seats', 'kms_driven': '50,000 Kms', 'rto': 'WB06', 'comfort_convenience': 'Power Steering, Power Windows Front, Power Windows Rear, Air Quality Control, Remote Trunk Opener, Remote Fuel Lid Opener, Low Fuel Warning Light, Accessory Power Outlet, Trunk Light, Vanity Mirror, Rear Reading Lamp, Rear Seat Headrest, Rear Seat Centre Arm Rest, Height Adjustable Front Seat Belts, Cup Holders Front, Cup Holders Rear, Seat Lumbar Support, Multifunction Steering Wheel, Cruise Control, Rear ACVents', 'interior_features': 'Air Conditioner, Heater, Adjustable Steering, Digital Odometer, Tachometer, Electronic Multi Tripmeter, Leather Seats, Leather Steering Wheel, Glove Compartment, Digital Clock, Outside Temperature Display, Cigarette Lighter', 'exterior_features': 'Adjustable Head Lights, Fog Lights Front, Fog Lights Rear, Power Adjustable Exterior Rear View Mirror, Electric Folding Rear View Mirror, Rain Sensing Wiper, Rear Window Wiper, Rear Window Washer, Rear Window Defogger, Alloy Wheels, Power Antenna, Integrated Antenna, Tinted Glass, Sun Roof, Moon Roof, Outside Rear View Mirror Turn Indicators, Chrome Grille, Chrome Garnish, Smoke Headlamps, Roof Rail', 'safety_features': 'Anti Lock Braking System, Brake Assist, Centeral Locking, Power Door Locks, Child Safety Locks, Anti Theft Alarm, Driver Air Bag, Passenger Air Bag, Side Air Bag Front, Side Air Bag Rear, Day Night Rear View Mirror, Passenger Side Rear View Mirror, Halogen Headlamps, Rear Seat Belts, Seat Belt Warning, Door Ajar Warning, Side Impact Beams, Front Impact Beams, Vehicle Stability Control System, Adjustable Seats, Keyless Entry, Engine Immobilizer, Engine Check Warning, Tyre Pressure Monitor, Crash Sensor', 'entertainment_features': 'Cd Player, Cd Changer, Radio, Audio System Remote Control, Speakers Front, Speakers Rear', 'mileage': None, 'engine': None, 'torque': None, 'wheel_size': None, 'bhp': nan, 'rpm': nan}"
  "{'fuel_type': 'Petrol', 'body_type': 'Hatchback', 'kilometers_driven': '40,000', 'transmission': 'Manual', 'owner': '1st Owner', 'oem': 'Maruti', 'model': 'Maruti Ritz', 'year': 2012, 'variant': 'ZXi', 'price': '₹ 1.40 Lakh', 'registration_year': '2012', 'insurance_validity': 'Third Party insurance', 'fuel_type_overview': 'Petrol', 'seats': '5 Seats', 'kms_driven': '40,000 Kms', 'rto': nan, 'comfort_convenience': 'Power Steering, Power Windows Front, Power Windows Rear, Remote Trunk Opener, Remote Fuel Lid Opener, Low Fuel Warning Light, Accessory Power Outlet, Vanity Mirror, Rear Seat Headrest, Height Adjustable Front Seat Belts, Cup Holders Front, Multifunction Steering Wheel', 'interior_features': 'Air Conditioner, Heater, Adjustable Steering, Digital Odometer, Tachometer, Electronic Multi Tripmeter, Fabric Upholstery, Glove Compartment, Digital Clock, Height Adjustable Driver Seat', 'exterior_features': 'Adjustable Head Lights, Fog Lights Front, Fog Lights Rear, Power Adjustable Exterior Rear View Mirror, Rear Window Wiper, Rear Window Washer, Rear Window Defogger, Alloy Wheels, Power Antenna, Rear Spoiler', 'safety_features': 'Anti Lock Braking System, Centeral Locking, Power Door Locks, Child Safety Locks, Anti Theft Alarm, Driver Air Bag, Passenger Air Bag, Day Night Rear View Mirror, Passenger Side Rear View Mirror, Halogen Headlamps, Rear Seat Belts, Seat Belt Warning, Door Ajar Warning, Side Impact Beams, Front Impact Beams, Adjustable Seats, Keyless Entry, Centrally Mounted Fuel Tank, Engine Immobilizer, Crash Sensor, Ebd, Anti Theft Device', 'entertainment_features': 'Cd Player, Radio, Speakers Front, Speakers Rear, Usb Auxiliary Input', 'mileage': None, 'engine': None, 'torque': None, 'wheel_size': None, 'bhp': '85.80', 'rpm': '6000'}"
  "{'fuel_type': 'Diesel', 'body_type': 'SUV', 'kilometers_driven': '1,20,000', 'transmission': 'Manual', 'owner': '2nd Owner', 'oem': 'Renault', 'model': 'Renault Duster', 'year': 2017, 'variant': '110PS Diesel RxZ AWD', 'price': '₹ 5 Lakh', 'registration_year': '2017', 'insurance_validity': 'Third Party insurance', 'fuel_type_overview': 'Diesel', 'seats': '5 Seats', 'kms_driven': '1,20,000 Kms', 'rto': nan, 'comfort_convenience': 'Power Steering, Power Windows Front, Power Windows Rear, Remote Trunk Opener, Low Fuel Warning Light, Accessory Power Outlet, Trunk Light, Vanity Mirror, Rear Reading Lamp, Rear Seat Headrest, Rear Seat Centre Arm Rest, Height Adjustable Front Seat Belts, Cup Holders Front, Cup Holders Rear, Seat Lumbar Support, Multifunction Steering Wheel, Cruise Control, Navigation System, Voice Control, Gear Shift Indicator, Drive Modes', 'interior_features': 'Air Conditioner, Heater, Adjustable Steering, Digital Odometer, Tachometer, Electronic Multi Tripmeter, Fabric Upholstery, Leather Steering Wheel, Glove Compartment, Digital Clock, Outside Temperature Display, Driving Experience Control Eco, Height Adjustable Driver Seat', 'exterior_features': 'Adjustable Head Lights, Fog Lights Front, Power Adjustable Exterior Rear View Mirror, Electric Folding Rear View Mirror, Rear Window Wiper, Rear Window Washer, Rear Window Defogger, Alloy Wheels, Power Antenna, Outside Rear View Mirror Turn Indicators, Roof Rail', 'safety_features': 'Anti Lock Braking System, Brake Assist, Centeral Locking, Power Door Locks, Child Safety Locks, Driver Air Bag, Passenger Air Bag, Passenger Side Rear View Mirror, Halogen Headlamps, Rear Seat Belts, Seat Belt Warning, Door Ajar Warning, Side Impact Beams, Front Impact Beams, Vehicle Stability Control System, Adjustable Seats, Keyless Entry, Centrally Mounted Fuel Tank, Engine Immobilizer, Engine Check Warning, Crash Sensor, Ebd, Rear Camera, Anti Theft Device, Speed Sensing Auto Door Lock, Hill Assist, Impact Sensing Auto Door Lock', 'entertainment_features': 'Cd Player, Radio, Speakers Front, Speakers Rear, Integrated2Din Audio, Usb Auxiliary Input, Bluetooth, Touch Screen, Number Of Speaker', 'mileage': None, 'engine': None, 'torque': None, 'wheel_size': None, 'bhp': '108.45', 'rpm': '4000'}"]] to numeric

In [4]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
df = pd.read_csv('C:/Users/sonur/OneDrive/Desktop/Car_Dheko/Merged_car/merged_car.csv')

# Define the output directory
output_dir = 'C:/Users/sonur/OneDrive/Desktop/Car_Dheko/EDA_Output'
os.makedirs(output_dir, exist_ok=True)

# Descriptive Statistics
descriptive_stats = df.describe()
descriptive_stats.to_csv(os.path.join(output_dir, 'eda_descriptive_statistics.csv'))

# Data Visualization - Saving plots
sns.scatterplot(x='year', y='price', data=df)
plt.title('Year vs Price')
plt.savefig(os.path.join(output_dir, 'eda_scatterplot_year_vs_price.png'))
plt.clf()

# Ensure 'price' column is numeric and drop NaN values
df['price'] = pd.to_numeric(df['price'], errors='coerce')
df.dropna(subset=['price'], inplace=True)

# Drop rows where 'transmission' is NaN or has unexpected values
df.dropna(subset=['transmission'], inplace=True)

# Plotting Price Distribution
df['price'].plot(kind='hist', bins=20)
plt.title('Price Distribution')
plt.xlabel('Price')
plt.ylabel('Frequency')
plt.savefig(os.path.join(output_dir, 'eda_histogram_price_distribution.png'))
plt.clf()

# Ensure that 'transmission' column is treated as a categorical variable
df['transmission'] = df['transmission'].astype(str)

# Plotting Transmission Type vs Price
sns.boxplot(x='transmission', y='price', data=df)
plt.title('Transmission Type vs Price')
plt.savefig(os.path.join(output_dir, 'eda_boxplot_transmission_vs_price.png'))
plt.clf()

# Correlation Matrix
corr_matrix = df.corr()
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.savefig(os.path.join(output_dir, 'eda_correlation_heatmap.png'))
plt.clf()

# Correlated Features with Price
correlated_features = corr_matrix['price'].sort_values(ascending=False)
correlated_features.to_csv(os.path.join(output_dir, 'eda_correlated_features.csv'))


  vmin = np.nanmin(calc_data)
  vmax = np.nanmax(calc_data)


<Figure size 640x480 with 0 Axes>