#The car evaluation dataset is collected from UCI Machine Learning Repository and the data source (creator) was Marko Bohanec [1]. It contains 1728 car sample information with 7 attributes, including one class feature that tells whether the car is in acceptable conditions. The feature names with their descriptions are listed following:

buying_price: Buying Level or Capacity of the customer (Very High: vhigh, High: high, Low: low, Medium: med)
maint_cost: Maintenence Level (Very High: vhigh, High: high, Low: low, Medium: med)
doors: Number of doors in the car (2, 3, 4, and 5 or more)
person_capacity: capacity in terms of persons to carry (2, 4, and more)
lug_boot: The size of the Luggage Boot (small, med, big)
safety: Safety Level of Car (high, med, low)
class: Unacceptable, Acceptable, VeryGood, Good

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
df=pd.read_csv('/content/car_evaluation.csv')

In [None]:
df

Unnamed: 0,buying price,maintenance cost,number of doors,number of persons,lug_boot,safety,decision
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc
...,...,...,...,...,...,...,...
1723,low,low,5more,more,med,med,good
1724,low,low,5more,more,med,high,vgood
1725,low,low,5more,more,big,low,unacc
1726,low,low,5more,more,big,med,good


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1728 entries, 0 to 1727
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   buying price       1728 non-null   object
 1   maintenance cost   1728 non-null   object
 2   number of doors    1728 non-null   object
 3   number of persons  1728 non-null   object
 4   lug_boot           1728 non-null   object
 5   safety             1728 non-null   object
 6   decision           1728 non-null   object
dtypes: object(7)
memory usage: 94.6+ KB


In [None]:
df.head()

Unnamed: 0,buying price,maintenance cost,number of doors,number of persons,lug_boot,safety,decision
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [None]:
df.describe().T

Unnamed: 0,count,unique,top,freq
buying price,1728,4,vhigh,432
maintenance cost,1728,4,vhigh,432
number of doors,1728,4,2,432
number of persons,1728,3,2,576
lug_boot,1728,3,small,576
safety,1728,3,low,576
decision,1728,4,unacc,1210


In [None]:
df.shape

(1728, 7)

In [None]:
df.count()

buying price         1728
maintenance cost     1728
number of doors      1728
number of persons    1728
lug_boot             1728
safety               1728
decision             1728
dtype: int64

In [None]:
df.isnull().sum()

buying price         0
maintenance cost     0
number of doors      0
number of persons    0
lug_boot             0
safety               0
decision             0
dtype: int64

In [None]:
X=df.drop(['decision'],axis=1)
y=df['decision']

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.30,random_state  = 101)

In [None]:
pip install category_encoders

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import category_encoders as ce


In [None]:
# encode variables with ordinal encoding
encoder = ce.OrdinalEncoder(cols=['buying price', 'maintenance cost', 'number of doors', 'number of persons', 'lug_boot', 'safety'])


In [None]:
X_train = encoder.fit_transform(X_train)
X_test = encoder.transform(X_test)

In [None]:
X_train

Unnamed: 0,buying price,maintenance cost,number of doors,number of persons,lug_boot,safety
417,1,1,1,1,1,1
723,2,2,2,2,1,1
1248,3,1,2,3,2,1
1092,3,2,3,1,1,1
757,2,1,3,3,3,2
...,...,...,...,...,...,...
599,2,3,2,3,1,3
1599,4,2,1,3,2,1
1361,4,4,2,1,3,3
1547,4,2,4,3,2,3


In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
dtree=DecisionTreeClassifier()

In [None]:
dtree.fit(X_train,y_train)

DecisionTreeClassifier()

In [None]:
prediction =dtree.predict(X_test)

In [None]:
from sklearn.metrics import classification_report ,confusion_matrix 

In [None]:
print(classification_report(y_test,prediction))

              precision    recall  f1-score   support

         acc       0.96      0.92      0.94       108
        good       0.86      0.90      0.88        21
       unacc       0.98      0.99      0.98       374
       vgood       0.94      1.00      0.97        16

    accuracy                           0.97       519
   macro avg       0.94      0.95      0.94       519
weighted avg       0.97      0.97      0.97       519



In [None]:
print(confusion_matrix(y_test,prediction))

[[ 99   0   8   1]
 [  2  19   0   0]
 [  2   3 369   0]
 [  0   0   0  16]]


In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
rfc=RandomForestClassifier(n_estimators=600)

In [None]:
rfc.fit(X_train,y_train)

RandomForestClassifier(n_estimators=600)

In [None]:
from sklearn.metrics import classification_report,confusion_matrix

In [None]:
print(classification_report(y_test,prediction))

              precision    recall  f1-score   support

         acc       0.96      0.92      0.94       108
        good       0.86      0.90      0.88        21
       unacc       0.98      0.99      0.98       374
       vgood       0.94      1.00      0.97        16

    accuracy                           0.97       519
   macro avg       0.94      0.95      0.94       519
weighted avg       0.97      0.97      0.97       519



In [None]:
print(confusion_matrix(y_test,prediction))

[[ 99   0   8   1]
 [  2  19   0   0]
 [  2   3 369   0]
 [  0   0   0  16]]
