# Support Vector Machine

In [1]:
# Import necessary packages
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from collections import Counter

In [2]:
# Load the data
data = pd.read_csv('car_evaluation.csv')
data.head(2)

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,outcome
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc


In [3]:
# Basic check
data.isnull().sum()

buying      0
maint       0
doors       0
persons     0
lug_boot    0
safety      0
outcome     0
dtype: int64

In [4]:
data.shape

(1728, 7)

In [5]:
data.describe()

Unnamed: 0,doors,persons
count,1728.0,1728.0
mean,3.5,3.666667
std,1.118358,1.24758
min,2.0,2.0
25%,2.75,2.0
50%,3.5,4.0
75%,4.25,5.0
max,5.0,5.0


In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1728 entries, 0 to 1727
Data columns (total 7 columns):
buying      1728 non-null object
maint       1728 non-null object
doors       1728 non-null int64
persons     1728 non-null int64
lug_boot    1728 non-null object
safety      1728 non-null object
outcome     1728 non-null object
dtypes: int64(2), object(5)
memory usage: 94.6+ KB


In [7]:
# Setting predictors and target
X = data.iloc[:,:-1]
y = data.outcome

In [8]:
enc = LabelEncoder()
X.buying = enc.fit_transform(X.buying)
X.maint = enc.fit_transform(X.maint)
X.lug_boot = enc.fit_transform(X.lug_boot)
X.safety = enc.fit_transform(X.safety)
X.head(2)

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety
0,3,3,2,2,2,1
1,3,3,2,2,2,2


In [11]:
# Train test split
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=10)

In [144]:
# Define the model
model = SVC(C=7,gamma=0.5,kernel='rbf') # C = 0.1 to 1000 gamma = 0.01 to 10
model.fit(X_train,y_train)
y_predict = model.predict(X_test)

In [145]:
accuracy_score(y_test,y_predict)

0.9976851851851852

In [146]:
pd.crosstab(y_test,y_predict)

col_0,acc,good,unacc,vgood
outcome,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
acc,83,0,1,0
good,0,19,0,0
unacc,0,0,307,0
vgood,0,0,0,22


In [147]:
print(Counter(y_test))

Counter({'unacc': 307, 'acc': 84, 'vgood': 22, 'good': 19})


# GridSearchCV

In [159]:
from sklearn.model_selection import GridSearchCV
parameters = {'kernel':['rbf'],
              'C':[1,10,100,500],
              'gamma':[0.01,0.1,0.5,1.0]}
grid_model = GridSearchCV(SVC(),parameters,verbose=2)
grid_model.fit(X_train,y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s


Fitting 3 folds for each of 16 candidates, totalling 48 fits
[CV] C=1, gamma=0.01, kernel=rbf .....................................
[CV] ...................... C=1, gamma=0.01, kernel=rbf, total=   0.0s
[CV] C=1, gamma=0.01, kernel=rbf .....................................
[CV] ...................... C=1, gamma=0.01, kernel=rbf, total=   0.0s
[CV] C=1, gamma=0.01, kernel=rbf .....................................
[CV] ...................... C=1, gamma=0.01, kernel=rbf, total=   0.0s
[CV] C=1, gamma=0.1, kernel=rbf ......................................
[CV] ....................... C=1, gamma=0.1, kernel=rbf, total=   0.0s
[CV] C=1, gamma=0.1, kernel=rbf ......................................
[CV] ....................... C=1, gamma=0.1, kernel=rbf, total=   0.0s
[CV] C=1, gamma=0.1, kernel=rbf ......................................
[CV] ....................... C=1, gamma=0.1, kernel=rbf, total=   0.0s
[CV] C=1, gamma=0.5, kernel=rbf ......................................
[CV] ...........

[Parallel(n_jobs=1)]: Done  48 out of  48 | elapsed:    2.5s finished


GridSearchCV(cv='warn', error_score='raise-deprecating',
             estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='auto_deprecated', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='warn', n_jobs=None,
             param_grid={'C': [1, 10, 100, 500], 'gamma': [0.01, 0.1, 0.5, 1.0],
                         'kernel': ['rbf']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=2)

In [160]:
grid_model.best_score_

0.9822530864197531