In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder

# Step 1: Load the dataset
def load_data(file_path):
    df = pd.read_csv(file_path)
    print("Dataset Loaded Successfully!")
    return df

# Step 2: Preprocess data
def preprocess_data(df):
    # Handle missing values
    df = df.dropna()

    # Encode categorical features
    label_encoders = {}
    for col in df.select_dtypes(include=['object']).columns:
        label_encoders[col] = LabelEncoder()
        df[col] = label_encoders[col].fit_transform(df[col])

    # Separate features and target variable
    X = df.drop(['Selling_Price'], axis=1)  # Assuming 'Price' is the target column
    y = df['Selling_Price']
    return X, y, label_encoders

# Step 3: Train the model
def train_model(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print(f"Mean Squared Error: {mse}")
    return model

# Main function
if __name__ == "__main__":
    # Replace 'car_data.csv' with the path to your dataset
    file_path = "car data.csv"

    # Load and preprocess the data
    df = load_data(file_path)
    print("First 5 rows of the dataset:\n", df.head())

    X, y, label_encoders = preprocess_data(df)
    print("Data Preprocessing Completed!")

    # Train the model
    model = train_model(X, y)
    print("Model trained successfully.")

Dataset Loaded Successfully!
First 5 rows of the dataset:
   Car_Name  Year  Selling_Price  Present_Price  Driven_kms Fuel_Type  \
0     ritz  2014           3.35           5.59       27000    Petrol   
1      sx4  2013           4.75           9.54       43000    Diesel   
2     ciaz  2017           7.25           9.85        6900    Petrol   
3  wagon r  2011           2.85           4.15        5200    Petrol   
4    swift  2014           4.60           6.87       42450    Diesel   

  Selling_type Transmission  Owner  
0       Dealer       Manual      0  
1       Dealer       Manual      0  
2       Dealer       Manual      0  
3       Dealer       Manual      0  
4       Dealer       Manual      0  
Data Preprocessing Completed!
Mean Squared Error: 0.782636952131147
Model trained successfully.
