In [1]:
# STEP 1: IMPORT LIBRARIES
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import joblib

# STEP 2: LOAD DATA
df = pd.read_csv("CAR DETAILS FROM CAR DEKHO.csv")

# STEP 3: SELECT FEATURES
X = df[['name', 'year', 'km_driven', 'fuel', 'seller_type', 'transmission', 'owner']]
y = df['selling_price']

# STEP 4: TRAIN TEST SPLIT
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# STEP 5: ONE-HOT ENCODING FOR CATEGORICAL COLUMNS
cat_cols = ['name', 'fuel', 'seller_type', 'transmission', 'owner']

encoder = ColumnTransformer(
    transformers=[
        ('ohe', OneHotEncoder(handle_unknown='ignore'), cat_cols)
    ],
    remainder='passthrough'
)

X_train_encoded = encoder.fit_transform(X_train)
X_test_encoded = encoder.transform(X_test)

# STEP 6: MODEL
model = RandomForestRegressor(
    n_estimators=300,
    max_depth=25,
    random_state=42
)

model.fit(X_train_encoded, y_train)

# STEP 7: MODEL PREDICTION
y_pred = model.predict(X_test_encoded)

# STEP 8: MODEL EVALUATION
mse = mean_squared_error(y_test, y_pred)
rmse = (mse ** 0.5)
r2 = r2_score(y_test, y_pred)

print("\nMODEL RESULTS (IMPROVED):")
print("--------------------------")
print("RMSE :", rmse)
print("R² Score:", r2)

# STEP 9: REMOVE user input (for Streamlit)
print("\nUser input removed — Streamlit will handle predictions.")

# STEP 10: SAVE MODEL + ENCODER + FEATURES
joblib.dump(model, "car_price_model.pkl")
joblib.dump(encoder, "encoder.pkl")
joblib.dump(list(X.columns), "feature_names.pkl")

print("\nModel files saved:")
print("✔ car_price_model.pkl")
print("✔ encoder.pkl")
print("✔ feature_names.pkl")



MODEL RESULTS (IMPROVED):
--------------------------
RMSE : 362302.9884327329
R² Score: 0.569868164061951

User input removed — Streamlit will handle predictions.

Model files saved:
✔ car_price_model.pkl
✔ encoder.pkl
✔ feature_names.pkl


In [2]:
import os
os.getcwd()


'C:\\Users\\samar\\car price prediction projetc'