In [24]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import LabelEncoder # Import LabelEncoder

import pandas as pd

# Create a DataFrame from the provided data
data = pd.read_csv('car_data.csv')
# Display the first few rows of the DataFrame to verify
print(data.head())


# Encode categorical variables using LabelEncoder
le = LabelEncoder()

# Apply LabelEncoder to categorical columns
for column in ['name', 'fuel', 'seller_type', 'transmission', 'owner']:
    data[column] = le.fit_transform(data[column])

# Prepare the data
X = data.drop('selling_price', axis=1)  # Features (independent variables)
y = data['selling_price']  # Target variable (dependent variable)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define and train the model
model = RandomForestRegressor(n_estimators=200, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = mse ** 0.5

print(f"Mean Absolute Error: {mae}")
print(f"Mean Squared Error: {mse}")
print(f"Root Mean Squared Error: {rmse}")

                       name  year  selling_price  km_driven    fuel  \
0             Maruti 800 AC  2007          60000      70000  Petrol   
1  Maruti Wagon R LXI Minor  2007         135000      50000  Petrol   
2      Hyundai Verna 1.6 SX  2012         600000     100000  Diesel   
3    Datsun RediGO T Option  2017         250000      46000  Petrol   
4     Honda Amaze VX i-DTEC  2014         450000     141000  Diesel   

  seller_type transmission         owner  
0  Individual       Manual   First Owner  
1  Individual       Manual   First Owner  
2  Individual       Manual   First Owner  
3  Individual       Manual   First Owner  
4  Individual       Manual  Second Owner  
Mean Absolute Error: 107448.21685028527
Mean Squared Error: 92315968370.86014
Root Mean Squared Error: 303835.4297491656
