In [1]:
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, r2_score

In [55]:
df = pd.read_excel("gaddi.xlsx")

In [56]:
df

Unnamed: 0,Brand,Model,Year,Engine_Size,Fuel_Type,Transmission,Mileage,Doors,Owner_Count,Price
0,Kia,Rio,2020,4.2,Diesel,Manual,289944,3,5,8501
1,Chevrolet,Malibu,2012,2.0,Hybrid,Automatic,5356,2,3,12092
2,Mercedes,GLA,2020,4.2,Diesel,Automatic,231440,4,2,11171
3,Audi,Q5,2023,2.0,Electric,Manual,160971,2,1,11780
4,Volkswagen,Golf,2003,2.6,Hybrid,Semi-Automatic,286618,3,3,2867
...,...,...,...,...,...,...,...,...,...,...
9995,Kia,Optima,2004,3.7,Diesel,Semi-Automatic,5794,2,4,8884
9996,Chevrolet,Impala,2002,1.4,Electric,Automatic,168000,2,1,6240
9997,BMW,3 Series,2010,3.0,Petrol,Automatic,86664,5,1,9866
9998,Ford,Explorer,2002,1.4,Hybrid,Automatic,225772,4,1,4084


In [57]:
df['Engine_Size']

0       4.2
1       2.0
2       4.2
3       2.0
4       2.6
       ... 
9995    3.7
9996    1.4
9997    3.0
9998    1.4
9999    2.1
Name: Engine_Size, Length: 10000, dtype: float64

In [41]:
df["Brand"].unique()

array(['Kia', 'Chevrolet', 'Mercedes', 'Audi', 'Volkswagen', 'Toyota',
       'Honda', 'BMW', 'Hyundai', 'Ford'], dtype=object)

In [42]:
df["Model"].unique()

array(['Rio', 'Malibu', 'GLA', 'Q5', 'Golf', 'Camry', 'Civic', 'Sportage',
       'RAV4', '5 Series', 'CR-V', 'Elantra', 'Tiguan', 'Equinox',
       'Explorer', 'A3', '3 Series', 'Tucson', 'Passat', 'Impala',
       'Corolla', 'Optima', 'Fiesta', 'A4', 'Focus', 'E-Class', 'Sonata',
       'C-Class', 'X5', 'Accord'], dtype=object)

In [43]:
label_encoders = {}
categorical_columns = ["Brand", "Model", "Fuel_Type", "Transmission"]
for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

In [44]:
X = df.drop(columns=["Price"])
y = df["Price"]

In [45]:
y

0        8501
1       12092
2       11171
3       11780
4        2867
        ...  
9995     8884
9996     6240
9997     9866
9998     4084
9999     3342
Name: Price, Length: 10000, dtype: int64

In [46]:
df.isna().sum()

Brand           0
Model           0
Year            0
Engine_Size     0
Fuel_Type       0
Transmission    0
Mileage         0
Doors           0
Owner_Count     0
Price           0
dtype: int64

In [47]:
df[df.duplicated()]

Unnamed: 0,Brand,Model,Year,Engine_Size,Fuel_Type,Transmission,Mileage,Doors,Owner_Count,Price


In [48]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train an XGBoost Regressor model
model = XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
model.fit(X_train, y_train)


In [49]:
import pickle

# Save the trained model again
with open("car_price_model2.pkl", "wb") as file:
    pickle.dump(model, file)

# Save label encoders again
with open("label_encoders2.pkl", "wb") as file:
    pickle.dump(label_encoders, file)

print("Model and encoders saved successfully!")


Model and encoders saved successfully!


In [50]:
y_pred = model.predict(X_test)

In [51]:
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Absolute Error: {mae}")
print(f"R2 Score: {r2}")
print("Accuracy:", r2 * 100)

Mean Absolute Error: 151.04132763671876
R2 Score: 0.9960112571716309
Accuracy: 99.60112571716309


In [None]:
# print("\nEnter car details to predict the price:")
# year = int(input("Year: "))
# engine_size = float(input("Engine Size: "))
# mileage = int(input("Mileage: "))
# doors = int(input("Number of Doors: "))
# owner_count = int(input("Number of Previous Owners: "))
# brand = input("Brand: ")
# model = input("Model: ")
# fuel_type = input("Fuel Type: ")
# transmission = input("Transmission: ")

# # Encode user inputs using label encoders
# user_data = {
#     "Year": year,
#     "Engine_Size": engine_size,
#     "Mileage": mileage,
#     "Doors": doors,
#     "Owner_Count": owner_count,
#     "Brand": label_encoders["Brand"].transform([brand])[0],
#     "Model": label_encoders["Model"].transform([model])[0],
#     "Fuel_Type": label_encoders["Fuel_Type"].transform([fuel_type])[0],
#     "Transmission": label_encoders["Transmission"].transform([transmission])[0]
# }

# # Convert to DataFrame
# data = pd.DataFrame([user_data])

# # Predict the price
# predicted_price = model.predict(data)[0]
# print(f"Predicted Car Price: ₹{predicted_price:.2f}")