# import necessory packages

In [167]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Data Preprocessing

In [168]:
path_to_csv = "/content/car.csv"
data = pd.read_csv(path_to_csv,names=['buying','maint','doors','persons','lug_boot','safety','target_label'])

In [169]:
# replace low, med, high, vhigh with 0,1,2,3 repectively in 'buying' column
data.replace({'buying': { 'low':0, 'med':1, 'high':2, 'vhigh':3 }},inplace=True)

# replace low, med, high, vhigh with 0,1,2,3 repectively in 'maint' column
data.replace({'maint': { 'low':0, 'med':1, 'high':2, 'vhigh':3 }},inplace=True)

# replace 5more with 5 in 'doors' column
data.replace({'doors': { '5more':5 }},inplace=True)

# replace more with 5 in 'persons' column
data.replace({'persons': { 'more':5 }},inplace=True)

# convert 'doors' and 'persons' column to int dtype
data = data.astype({ 'doors':int, 'persons':int })

# replace small, med, big with 0, 1, 2 in 'lug_boot' column
data.replace({'lug_boot': { 'small':0, 'med':1, 'big':2 }},inplace=True)

# replace low, med, high with 0, 1, 2 in 'safety' column
data.replace({ 'safety': { 'low':0, 'med':1, 'high':2 }},inplace=True)

In [170]:
# replace "unacc"(unacceptable),"acc"(acceptable),"good","vgood" with 0, 1, 2, 3 repectively in 'target_label' column
data.replace({'target_label' : { 'unacc' : 1, 'acc' : 2, 'good' : 3,'vgood':4 }},inplace=True)

In [171]:
data

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,target_label
0,3,3,2,2,0,0,1
1,3,3,2,2,0,1,1
2,3,3,2,2,0,2,1
3,3,3,2,2,1,0,1
4,3,3,2,2,1,1,1
...,...,...,...,...,...,...,...
1723,0,0,5,5,1,1,3
1724,0,0,5,5,1,2,4
1725,0,0,5,5,2,0,1
1726,0,0,5,5,2,1,3


In [172]:
print("Column DataTypes - \n",data.dtypes)
print()
print("Sum of Null Values - \n",data.isnull().sum())
print()
print("Sum of None values - \n",data.isna().sum())

Column DataTypes - 
 buying          int64
maint           int64
doors           int64
persons         int64
lug_boot        int64
safety          int64
target_label    int64
dtype: object

Sum of Null Values - 
 buying          0
maint           0
doors           0
persons         0
lug_boot        0
safety          0
target_label    0
dtype: int64

Sum of None values - 
 buying          0
maint           0
doors           0
persons         0
lug_boot        0
safety          0
target_label    0
dtype: int64


In [173]:
# target variable
target_variable = 'target_label'
y = data[target_variable]

# features
X = data[data.columns.difference([target_variable])]

In [174]:
# split dataset
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.30)

# Support Vector Classfication

In [175]:
from sklearn.svm import SVC

In [176]:
svcClf = SVC().fit(X_train,y_train)

In [177]:
svcClf.score(X_train,y_train) # train-set score

0.9454094292803971

In [178]:
svcClf.score(X_test,y_test) # test-set score

0.9075144508670521

In [179]:
svcClf.predict([X_test.iloc[1]]) # predicted using 1st row of Test set

array([1])

# Stochastic Gradient Descent

In [180]:
from sklearn.linear_model import SGDClassifier

In [181]:
sgdClf = SGDClassifier().fit(X_train,y_train)

In [182]:
sgdClf.score(X_train,y_train) # train-set score

0.8097601323407775

In [183]:
sgdClf.score(X_test,y_test) # test-set score

0.7861271676300579

In [184]:
sgdClf.predict([X_test.iloc[1]]) # predicted using 1st row of Test set

array([2])

# K Nearest Neighbors

In [185]:
from sklearn.neighbors import KNeighborsClassifier

In [186]:
knclf = KNeighborsClassifier(n_neighbors=4).fit(X_train,y_train)

In [187]:
knclf.score(X_train,y_train) # train-set score

0.9685690653432589

In [188]:
knclf.score(X_test,y_test) # test-set score

0.9036608863198459

In [189]:
knclf.predict([X_test.iloc[1]]) # predicted using 1st row of Test set 

array([1])

# Categorical Naive Bayes

In [190]:
from sklearn.naive_bayes import CategoricalNB

In [191]:
ctnbClf = CategoricalNB().fit(X_train,y_train)

In [192]:
ctnbClf.score(X_train,y_train) # train-set score

0.8726220016542597

In [193]:
ctnbClf.score(X_test,y_test) # test-set score

0.8265895953757225

In [194]:
ctnbClf.predict([X_test.iloc[1]]) # predicted using 1st row of Test set

array([1])

# Decision Tree Classifier

In [195]:
from sklearn.tree import DecisionTreeClassifier

In [196]:
dtreeClf = DecisionTreeClassifier(max_features=None).fit(X_train,y_train)

In [197]:
dtreeClf.score(X_train,y_train) # train-set score

1.0

In [198]:
dtreeClf.score(X_test,y_test) # test-set score

0.9614643545279383

In [199]:
dtreeClf.predict([X_test.iloc[1]]) # predicted using 1st row of Test set

array([1])

# Random Forest Classifier

In [200]:
from sklearn.ensemble import RandomForestClassifier

In [201]:
rfClf = RandomForestClassifier(max_features=None).fit(X_train,y_train)

In [202]:
rfClf.score(X_train,y_train) # train-set score 

1.0

In [203]:
rfClf.score(X_test,y_test) # test-set score

0.9710982658959537

In [204]:
rfClf.predict([X_test.iloc[1]]) # predicted using 1st row of Test set

array([1])