In [1]:
import os
import pandas as pd

df = pd.read_csv(os.path.join(os.getcwd(), 'cars.csv'))

df.head()

Unnamed: 0,Car_ID,Brand,Model,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Mileage,Engine,Power,Seats,Price
0,1,Toyota,Corolla,2018,50000,Petrol,Manual,First,15,1498,108,5,800000
1,2,Honda,Civic,2019,40000,Petrol,Automatic,Second,17,1597,140,5,1000000
2,3,Ford,Mustang,2017,20000,Petrol,Automatic,First,10,4951,395,4,2500000
3,4,Maruti,Swift,2020,30000,Diesel,Manual,Third,23,1248,74,5,600000
4,5,Hyundai,Sonata,2016,60000,Diesel,Automatic,Second,18,1999,194,5,850000


In [2]:
X = df.drop({'Car_ID', 'Owner_Type'}, axis=1)
y = df['Owner_Type']

In [3]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder

numerical_features = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical_features = X.select_dtypes(include=['object']).columns.tolist()

preprocessor = ColumnTransformer(transformers=[
    ('num', StandardScaler(), numerical_features),
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
])

numerical_features, categorical_features

(['Year', 'Kilometers_Driven', 'Mileage', 'Engine', 'Power', 'Seats', 'Price'],
 ['Brand', 'Model', 'Fuel_Type', 'Transmission'])

In [4]:
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_validate
import numpy as np

In [5]:
# random forest
pipeline_rf = Pipeline([
    ('preprocessor', preprocessor),
    ('rf', RandomForestClassifier(random_state=42))
])

scoring = ['accuracy', 'precision_macro', 'recall_macro', 'f1_macro']
classification_result_rf = cross_validate(pipeline_rf, X, y, scoring=scoring)

mean_results = {key: np.mean(value) for key, value in classification_result_rf.items()}

for metric, mean in mean_results.items():
    print(f'{metric}: {mean:.4f}')


fit_time: 0.5478
score_time: 0.0808
test_accuracy: 0.8900
test_precision_macro: 0.8649
test_recall_macro: 0.8667
test_f1_macro: 0.8628
