In [99]:
#Installing necessary libraries
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [100]:
# Importing Dataset
data = pd.read_csv('car_price.csv')
data.head()

Unnamed: 0,Car ID,Brand,Year,Engine Size,Fuel Type,Transmission,Mileage,Condition,Price,Model
0,1,Tesla,2016,2.3,Petrol,Manual,114832,New,26613.92,Model X
1,2,BMW,2018,4.4,Electric,Manual,143190,Used,14679.61,5 Series
2,3,Audi,2013,4.5,Electric,Manual,181601,New,44402.61,A4
3,4,Tesla,2011,4.1,Diesel,Automatic,68682,New,86374.33,Model Y
4,5,Ford,2009,2.6,Diesel,Manual,223009,Like New,73577.1,Mustang


In [101]:
# Converting the categorical columns into numeric using one-hot encoding
data_encoded = pd.get_dummies(data, columns=['Brand', 'Fuel Type', 'Transmission', 'Condition', 'Model'])
data_encoded = data_encoded.drop(['Car ID'], axis=1)
data_encoded.head()

Unnamed: 0,Year,Engine Size,Mileage,Price,Brand_Audi,Brand_BMW,Brand_Ford,Brand_Honda,Brand_Mercedes,Brand_Tesla,...,Model_Model S,Model_Model X,Model_Model Y,Model_Mustang,Model_Prius,Model_Q5,Model_Q7,Model_RAV4,Model_X3,Model_X5
0,2016,2.3,114832,26613.92,False,False,False,False,False,True,...,False,True,False,False,False,False,False,False,False,False
1,2018,4.4,143190,14679.61,False,True,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,2013,4.5,181601,44402.61,True,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,2011,4.1,68682,86374.33,False,False,False,False,False,True,...,False,False,True,False,False,False,False,False,False,False
4,2009,2.6,223009,73577.1,False,False,True,False,False,False,...,False,False,False,True,False,False,False,False,False,False


In [102]:
# Defining Features (X) and Target (y)
X = data_encoded.drop('Price', axis=1)
y = data['Price']

In [103]:
# Splitting the data into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [104]:
# Initializing Random Forest Regressor model
model = RandomForestRegressor()

# Training model
model.fit(X_train, y_train)

# Making Predictions
y_pred = model.predict(X_test)

In [105]:
# Evaluating the model predictions
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.2f}")
print(f"R2 Score: {r2:.2f}")

Mean Squared Error: 805049784.89
R2 Score: -0.06


In [106]:
#Streamlit UI & User Input
st.set_page_config(page_title="Car Price Predictor", page_icon="🚗")
st.title("🚗 Car Price Predictor")
st.write("Enter the car details below to estimate its market price.")

# Sidebar for input
brand = st.selectbox("Brand", sorted(data['Brand'].unique()))
car_model = st.selectbox("Model", sorted(data['Model'].unique()))
fuel_type = st.selectbox("Fuel Type", sorted(data['Fuel Type'].unique()))
transmission = st.selectbox("Transmission", sorted(data['Transmission'].unique()))
condition = st.selectbox("Condition", sorted(data['Condition'].unique()))

# Create single-row input DataFrame
input_df = pd.DataFrame([{
    'Brand': brand,
    'Model': car_model,
    'Fuel Type': fuel_type,
    'Transmission': transmission,
    'Condition': condition
}])

# One-hot encode the input
input_encoded = pd.get_dummies(input_df)
missing_cols = set(X.columns) - set(input_encoded.columns)
for col in missing_cols:
    input_encoded[col] = 0
input_encoded = input_encoded[X.columns]  # Align column order

# Predict button
if st.button("Predict Price"):
    prediction = model.predict(input_encoded)[0]
    st.success(f"💰 Estimated Car Price: ${prediction:,.2f}")

# Optional: Show model performance
with st.expander("📊 Model Evaluation"):
    st.write(f"**Mean Squared Error:** {mse:.2f}")
    st.write(f"**R² Score:** {r2:.2f}")

