In [1]:

import pandas as pd
pd.options.display.max_colwidth = 80

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder

from sklearn.svm import SVC # SVM model with kernels
from sklearn.model_selection import GridSearchCV

from sklearn.metrics import accuracy_score

from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
import numpy as np
import warnings

import seaborn as sn
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore')

data = './data/car_evaluation.csv'

df = pd.read_csv(data, header=None)

In [4]:
col_names = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'class']


df.columns = col_names

col_names
 


['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'class']

In [5]:
df.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [6]:
df.describe()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
count,1728,1728,1728,1728,1728,1728,1728
unique,4,4,4,3,3,3,4
top,vhigh,vhigh,2,2,small,low,unacc
freq,432,432,432,576,576,576,1210


In [7]:
for column in df.columns:
    print(df[column].value_counts(), '\n') 

vhigh    432
high     432
med      432
low      432
Name: buying, dtype: int64 

vhigh    432
high     432
med      432
low      432
Name: maint, dtype: int64 

2        432
3        432
4        432
5more    432
Name: doors, dtype: int64 

2       576
4       576
more    576
Name: persons, dtype: int64 

small    576
med      576
big      576
Name: lug_boot, dtype: int64 

low     576
med     576
high    576
Name: safety, dtype: int64 

unacc    1210
acc       384
good       69
vgood      65
Name: class, dtype: int64 



In [15]:
y = df['buying']
X = df.drop(['buying'], axis=1) 
 

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.4, random_state = 42)

In [17]:
columns_encode = []
columns_encode.append(col_names)
columns_encode
ordinal_encoder = OrdinalEncoder()

col_names = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety']
X_train = ordinal_encoder.fit_transform(X_train, col_names)
X_test = ordinal_encoder.transform(X_test)
 

In [18]:
param_grid = [{'kernel': ['poly'], 'C' : [3, 5, 7, 9, 10]},
             {'kernel' : ['rbf'], 'C' : [3, 5, 7, 9, 10], 'gamma' : [2, 4, 6, 8]}]

svm = SVC()
grid_search = GridSearchCV(svm, param_grid, return_train_score=True)

grid_search.fit(X_train, y_train)

GridSearchCV(estimator=SVC(),
             param_grid=[{'C': [3, 5, 7, 9, 10], 'kernel': ['poly']},
                         {'C': [3, 5, 7, 9, 10], 'gamma': [2, 4, 6, 8],
                          'kernel': ['rbf']}],
             return_train_score=True)

In [19]:
grid_search.best_params_

{'C': 7, 'kernel': 'poly'}

In [20]:
grid_search.best_estimator_

SVC(C=7, kernel='poly')

In [21]:
svm_y_pred = grid_search.predict(X_test)

accuracy_score(y_test, svm_y_pred)

0.23699421965317918

In [22]:
svm_y_pred_train = grid_search.predict(X_train)

accuracy_score(y_train, svm_y_pred_train)


0.388996138996139