# import necessory packages

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Data Preprocessing

In [2]:
path_to_csv = "/content/car.csv"
data = pd.read_csv(path_to_csv,names=['buying','maint','doors','persons','lug_boot','safety','target_label'])

In [3]:
# replace low, med, high, vhigh with 0,1,2,3 repectively in 'buying' column
data.replace({'buying': { 'low':0, 'med':1, 'high':2, 'vhigh':3 }},inplace=True)

# replace low, med, high, vhigh with 0,1,2,3 repectively in 'maint' column
data.replace({'maint': { 'low':0, 'med':1, 'high':2, 'vhigh':3 }},inplace=True)

# replace 5more with 5 in 'doors' column
data.replace({'doors': { '5more':5 }},inplace=True)

# replace more with 5 in 'persons' column
data.replace({'persons': { 'more':5 }},inplace=True)

# convert 'doors' and 'persons' column to int dtype
data = data.astype({ 'doors':int, 'persons':int })

# replace small, med, big with 0, 1, 2 in 'lug_boot' column
data.replace({'lug_boot': { 'small':0, 'med':1, 'big':2 }},inplace=True)

# replace low, med, high with 0, 1, 2 in 'safety' column
data.replace({ 'safety': { 'low':0, 'med':1, 'high':2 }},inplace=True)

In [4]:
# replace "unacc"(unacceptable),"acc"(acceptable),"good","vgood" with 0, 1, 2, 3 repectively in 'target_label' column
data.replace({'target_label' : { 'unacc' : 0, 'acc' : 1, 'good' : 2,'vgood':3 }},inplace=True)

In [5]:
data

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,target_label
0,3,3,2,2,0,0,0
1,3,3,2,2,0,1,0
2,3,3,2,2,0,2,0
3,3,3,2,2,1,0,0
4,3,3,2,2,1,1,0
...,...,...,...,...,...,...,...
1723,0,0,5,5,1,1,2
1724,0,0,5,5,1,2,3
1725,0,0,5,5,2,0,0
1726,0,0,5,5,2,1,2


In [6]:
print("Column DataTypes - \n",data.dtypes)
print()
print("Sum of Null Values - \n",data.isnull().sum())
print()
print("Sum of None values - \n",data.isna().sum())

Column DataTypes - 
 buying          int64
maint           int64
doors           int64
persons         int64
lug_boot        int64
safety          int64
target_label    int64
dtype: object

Sum of Null Values - 
 buying          0
maint           0
doors           0
persons         0
lug_boot        0
safety          0
target_label    0
dtype: int64

Sum of None values - 
 buying          0
maint           0
doors           0
persons         0
lug_boot        0
safety          0
target_label    0
dtype: int64


In [7]:
# target variable
target_variable = 'target_label'
y = data[target_variable]

# features
X = data[data.columns.difference([target_variable])]

In [8]:
# split dataset
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.30)

# Support Vector Classfication

In [9]:
from sklearn.svm import SVC

In [10]:
svcClf = SVC().fit(X_train,y_train)

In [11]:
svcClf.score(X_train,y_train) # train-set score

0.9429280397022333

In [12]:
svcClf.score(X_test,y_test) # test-set score

0.905587668593449

In [13]:
svcClf.predict([X_test.iloc[1]]) # predicted using 1st row of Test set

array([0])

# Stochastic Gradient Descent

In [14]:
from sklearn.linear_model import SGDClassifier

In [15]:
sgdClf = SGDClassifier().fit(X_train,y_train)

In [16]:
sgdClf.score(X_train,y_train) # train-set score

0.8320926385442514

In [17]:
sgdClf.score(X_test,y_test) # test-set score

0.8400770712909441

In [18]:
sgdClf.predict([X_test.iloc[1]]) # predicted using 1st row of Test set

array([0])

# K Nearest Neighbors

In [19]:
from sklearn.neighbors import KNeighborsClassifier

In [20]:
knclf = KNeighborsClassifier(n_neighbors=4).fit(X_train,y_train)

In [21]:
knclf.score(X_train,y_train) # train-set score

0.9619520264681555

In [22]:
knclf.score(X_test,y_test) # test-set score

0.9229287090558767

In [23]:
knclf.predict([X_test.iloc[1]]) # predicted using 1st row of Test set 

array([0])

# Categorical Naive Bayes

In [24]:
from sklearn.naive_bayes import CategoricalNB

In [25]:
ctnbClf = CategoricalNB().fit(X_train,y_train)

In [26]:
ctnbClf.score(X_train,y_train) # train-set score

0.8808933002481389

In [27]:
ctnbClf.score(X_test,y_test) # test-set score

0.859344894026975

In [28]:
ctnbClf.predict([X_test.iloc[1]]) # predicted using 1st row of Test set

array([0])

# Decision Tree Classifier

In [29]:
from sklearn.tree import DecisionTreeClassifier

In [30]:
dtreeClf = DecisionTreeClassifier(max_features=None).fit(X_train,y_train)

In [31]:
dtreeClf.score(X_train,y_train) # train-set score

1.0

In [32]:
dtreeClf.score(X_test,y_test) # test-set score

0.9903660886319846

In [33]:
dtreeClf.predict([X_test.iloc[1]]) # predicted using 1st row of Test set

array([0])

# Random Forest Classifier

In [34]:
from sklearn.ensemble import RandomForestClassifier

In [35]:
rfClf = RandomForestClassifier(max_features=None).fit(X_train,y_train)

In [36]:
rfClf.score(X_train,y_train) # train-set score 

1.0

In [37]:
rfClf.score(X_test,y_test) # test-set score

0.9788053949903661

In [38]:
rfClf.predict([X_test.iloc[1]]) # predicted using 1st row of Test set

array([0])