In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


In [2]:
df = pd.read_csv("synthetic_car_data.csv")
df.head()


Unnamed: 0,name,company,year,kms_driven,fuel_type,Price
0,Volkswagen XUV,Volkswagen,2011,141932,Electric,50000
1,Tata XUV,Tata,2012,120268,Electric,111503
2,Chevrolet Swift,Chevrolet,2011,147337,Electric,121839
3,Volkswagen Kwid,Volkswagen,2015,97498,LPG,218099
4,Toyota XUV,Toyota,2008,26023,Diesel,50000


In [3]:
# Create LabelEncoders
le_company = LabelEncoder()
le_fuel = LabelEncoder()

# Apply encodings
df['company'] = le_company.fit_transform(df['company'])
df['fuel_type'] = le_fuel.fit_transform(df['fuel_type'])

df.head()


Unnamed: 0,name,company,year,kms_driven,fuel_type,Price
0,Volkswagen XUV,9,2011,141932,2,50000
1,Tata XUV,7,2012,120268,2,111503
2,Chevrolet Swift,0,2011,147337,2,121839
3,Volkswagen Kwid,9,2015,97498,3,218099
4,Toyota XUV,8,2008,26023,1,50000


In [4]:
# Define features and target
X = df.drop(columns=['name', 'Price'])
y = df['Price']

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


In [5]:
# Predict on test set
y_pred = model.predict(X_test)

# Print metrics
print("MAE:", mean_absolute_error(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))
print("R² Score:", r2_score(y_test, y_pred))


MAE: 67327.11995
RMSE: 100494.98220457956
R² Score: 0.644299254843455


In [10]:
# Show valid options
print("Available companies:", list(le_company.classes_))
print("Available fuel types:", list(le_fuel.classes_))

# Take user input and strip whitespace
company_input = input("Enter car company: ").strip()
year_input = int(input("Enter year of purchase (e.g., 2018): "))
kms_input = int(input("Enter kilometers driven (e.g., 45000): "))
fuel_input = input("Enter fuel type: ").strip()

# Validate and encode inputs
if company_input in le_company.classes_ and fuel_input in le_fuel.classes_:
    company_encoded = le_company.transform([company_input])[0]
    fuel_encoded = le_fuel.transform([fuel_input])[0]

    # Create input as DataFrame (to match training format)
    user_input_df = pd.DataFrame([{
        'company': company_encoded,
        'year': year_input,
        'kms_driven': kms_input,
        'fuel_type': fuel_encoded
    }])

    # Predict
    predicted_price = model.predict(user_input_df)[0]
    print(f"\n🚗 Estimated Price: ₹{int(predicted_price):,}")
else:
    print("\n❌ Invalid input. Please enter a valid company and fuel type.")


Available companies: ['Chevrolet', 'Ford', 'Honda', 'Hyundai', 'Mahindra', 'Maruti', 'Renault', 'Tata', 'Toyota', 'Volkswagen']
Available fuel types: ['CNG', 'Diesel', 'Electric', 'LPG', 'Petrol']


Enter car company:  Honda
Enter year of purchase (e.g., 2018):  2020
Enter kilometers driven (e.g., 45000):  45900
Enter fuel type:  Petrol



🚗 Estimated Price: ₹390,314
