In [14]:
import pandas as pd
import joblib
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Load the dataset
df = pd.read_csv("/content/cardekho.csv")

# Preprocess the dataset (as you have done previously)
# Extract brand name (first word in the car name)
df['brand'] = df['name'].apply(lambda x: x.split()[0])

# One-hot encode the brand
df = pd.get_dummies(df, columns=['brand'], drop_first=True)

# Encode 'fuel' and 'transmission' columns
fuel_encoder = LabelEncoder()
transmission_encoder = LabelEncoder()

df['fuel_encoded'] = fuel_encoder.fit_transform(df['fuel'])
df['transmission_encoded'] = transmission_encoder.fit_transform(df['transmission'])

# Drop unnecessary columns
df = df.drop(columns=['fuel', 'transmission', 'owner', 'seller_type'])

# Define features (X) and target (y)
X = df.drop(columns=['selling_price', 'name'])  # Drop 'name' from features
y = df['selling_price']

# Fill missing values (if any)
X = X.fillna(0)

# Ensure all columns are numeric
categorical_columns = X.select_dtypes(include=['object']).columns
for col in categorical_columns:
    # Convert column to string (if it contains mixed types)
    X[col] = X[col].astype(str)
    # Apply LabelEncoder
    X[col] = encode.fit_transform(X[col])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Decision Tree model on the training data
dt_model = DecisionTreeRegressor(random_state=42)
dt_model.fit(X_train, y_train)

# Save the trained model and encoders
joblib.dump(dt_model, 'car_price_prediction_model.pkl')
joblib.dump(fuel_encoder, 'fuel_encoder.pkl')
joblib.dump(transmission_encoder, 'transmission_encoder.pkl')

#def
def predict_car_price():
    # Load the saved model and encoders
    dt_model = joblib.load('car_price_prediction_model.pkl')
    fuel_encoder = joblib.load('fuel_encoder.pkl')
    transmission_encoder = joblib.load('transmission_encoder.pkl')

    # Get input from the customer
    name = input("Enter car name: ").strip()
    year = input("Enter year of manufacture (YYYY): ").strip()
    km_driven = input("Enter kilometers driven: ").strip()
    mileage = input("Enter mileage (km/l): ").strip()
    engine = input("Enter engine capacity (cc): ").strip()
    max_power = input("Enter maximum power (bhp): ").strip()
    seats = input("Enter number of seats: ").strip()

    # Normalize inputs to lower case and handle empty inputs
    fuel = input("Enter fuel type (Petrol/Diesel/CNG): ").strip().lower() or 'Petrol'
    transmission = input("Enter transmission type (Manual/Automatic): ").strip().lower() or 'Manual'

    # Prepare input for prediction
    new_car = {
        'name': name,
        'year': int(year) if year.isdigit() else 0,
        'km_driven': int(km_driven) if km_driven.isdigit() else 0,
        'mileage': float(mileage) if mileage.replace('.', '', 1).isdigit() else 0.0,
        'engine': float(engine) if engine.replace('.', '', 1).isdigit() else 0.0,
        'max_power': float(max_power) if max_power.replace('.', '', 1).isdigit() else 0.0,
        'seats': float(seats) if seats.replace('.', '', 1).isdigit() else 0.0,
    }

    # Check if fuel type and transmission type are recognized
    try:
        new_car['fuel_encoded'] = fuel_encoder.transform([fuel.capitalize()])[0]
    except ValueError:
        print(f"Unrecognized fuel type: '{fuel}'. Please use one of the following: {fuel_encoder.classes_}")
        return

    try:
        new_car['transmission_encoded'] = transmission_encoder.transform([transmission.capitalize()])[0]
    except ValueError:
        print(f"Unrecognized transmission type: '{transmission}'. Please use one of the following: {transmission_encoder.classes_}")
        return

    # Convert new car details into a DataFrame
    new_car_df = pd.DataFrame([new_car])

    # Extract brand name (first word in the car name)
    new_car_df['brand'] = new_car_df['name'].apply(lambda x: x.split()[0])

    # One-hot encode the brand
    new_car_df = pd.get_dummies(new_car_df, columns=['brand'], drop_first=True)

    # Ensure the new input has the same columns as the training data
    missing_cols = set(X.columns) - set(new_car_df.columns)
    for col in missing_cols:
        new_car_df[col] = 0  # Add missing columns and set to 0

    # Drop the 'name' column (not needed for prediction)
    new_car_df = new_car_df.drop(columns=['name'])

    # Reorder columns to match the training data
    new_car_df = new_car_df[X.columns]

    # Predict the selling price for the new car
    predicted_price = dt_model.predict(new_car_df)[0]

    # Adjust the predicted price by subtracting one zero and format it with a dollar sign
    adjusted_price = predicted_price / 10
    print(f"Predicted Selling Price: ${adjusted_price:.2f}")

# Call the function to run the prediction program
predict_car_price()


Enter car name: bmw
Enter year of manufacture (YYYY): 2000
Enter kilometers driven: 300
Enter mileage (km/l): 
Enter engine capacity (cc): 200
Enter maximum power (bhp): 200
Enter number of seats: 4
Enter fuel type (Petrol/Diesel/CNG): diesel
Enter transmission type (Manual/Automatic): manual
Predicted Selling Price: $16300.00
