In [70]:
# Importing the required libraries
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix

In [71]:
# Importing the data
data = pd.read_csv("car_evaluation.csv")

In [72]:
data.head()

Unnamed: 0,vhigh,vhigh.1,2,2.1,small,low,unacc
0,vhigh,vhigh,2,2,small,med,unacc
1,vhigh,vhigh,2,2,small,high,unacc
2,vhigh,vhigh,2,2,med,low,unacc
3,vhigh,vhigh,2,2,med,med,unacc
4,vhigh,vhigh,2,2,med,high,unacc


In [73]:
data.shape

(1727, 7)

In [74]:
# Adding column name
data.columns = ['buying_price', 'maintenance_cost', 'number_of_doors', 'number_of_persons', 'lug_boot', 'safety', 'decision']

In [75]:
data.head()

Unnamed: 0,buying_price,maintenance_cost,number_of_doors,number_of_persons,lug_boot,safety,decision
0,vhigh,vhigh,2,2,small,med,unacc
1,vhigh,vhigh,2,2,small,high,unacc
2,vhigh,vhigh,2,2,med,low,unacc
3,vhigh,vhigh,2,2,med,med,unacc
4,vhigh,vhigh,2,2,med,high,unacc


### SVM cannot deal with non-numeric values, Hence converting the text values into numeric values by using Label Encoder

In [76]:
# Label encoder
enc = LabelEncoder()
data.buying_price = enc.fit_transform(data.buying_price)
data.maintenance_cost = enc.fit_transform(data.maintenance_cost)
data.lug_boot = enc.fit_transform(data.lug_boot)
data.safety = enc.fit_transform(data.safety)
data.decision = enc.fit_transform(data.decision)
data.number_of_doors = enc.fit_transform(data.number_of_doors)
data.number_of_persons = enc.fit_transform(data.number_of_persons)

In [77]:
data.head()

Unnamed: 0,buying_price,maintenance_cost,number_of_doors,number_of_persons,lug_boot,safety,decision
0,3,3,0,0,2,2,2
1,3,3,0,0,2,0,2
2,3,3,0,0,1,1,2
3,3,3,0,0,1,2,2
4,3,3,0,0,1,0,2


In [79]:
# Allocating X and y
X = data.iloc[:,:-1]
y = data.decision

In [80]:
# Splitting the data into Train and Test datasets
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.1,random_state=10)

In [83]:
# Creating the model and fitting
model = SVC() # kernel='rbf',C = 1, gamma = 0.1
model.fit(X_train,y_train)

SVC()

In [86]:
# y_predict
y_predict = model.predict(X_test)

In [87]:
accuracy_score(y_predict,y_test)

0.9017341040462428

In [88]:
confusion_matrix(y_predict,y_test)

array([[ 29,   3,   0,   4],
       [  5,   3,   0,   0],
       [  5,   0, 118,   0],
       [  0,   0,   0,   6]], dtype=int64)

### To get the efficient parameter and it value, we are using GridSearchCV - It will fit the model in all the given paramters and crossreference them

In [89]:
parameters = {'kernel':['rbf','linear'],
             'C':[1,5,8,10,12,15],
             'gamma':[0.1,0.5,0.8,1,2]}
grid_model = GridSearchCV(SVC(),parameters)

In [90]:
# Grid_Model Fitting
grid_model.fit(X_train,y_train)

GridSearchCV(estimator=SVC(),
             param_grid={'C': [1, 5, 8, 10, 12, 15],
                         'gamma': [0.1, 0.5, 0.8, 1, 2],
                         'kernel': ['rbf', 'linear']})

In [91]:
# Gives the best paramaters to use to the efficient result
grid_model.best_params_

{'C': 8, 'gamma': 0.5, 'kernel': 'rbf'}

In [92]:
# Gives the best accuracy score to use to the efficient result
grid_model.best_score_

0.9948511565190333

# Therefore, SVC('C': 8, 'gamma': 0.5, 'kernel': 'rbf') is the best parameters to get the best efficiency with the accuracy_score