In [33]:
import pandas as pd
data = pd.read_csv('car_evaluation.csv')
data.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,outcome
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [34]:
data.shape

(1728, 7)

In [35]:
X = data.iloc[:,0:-1]
y = data.outcome
X.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety
0,vhigh,vhigh,2,2,small,low
1,vhigh,vhigh,2,2,small,med
2,vhigh,vhigh,2,2,small,high
3,vhigh,vhigh,2,2,med,low
4,vhigh,vhigh,2,2,med,med


# Preproccessing data, converting strings into numbers label

In [36]:
# we are converting categorical data into numarical, so can process with ML
# converting strings into numbers labels, so can process data
from sklearn.preprocessing import LabelEncoder
enc = LabelEncoder()
X.buying = enc.fit_transform(X.buying)
X.maint = enc.fit_transform(X.maint)
X.lug_boot = enc.fit_transform(X.lug_boot)
X.safety = enc.fit_transform(X.safety)

In [37]:
X.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety
0,3,3,2,2,2,1
1,3,3,2,2,2,2
2,3,3,2,2,2,0
3,3,3,2,2,1,1
4,3,3,2,2,1,2


In [38]:
# Train and test model.
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=10)

# Import the support vector machine

In [39]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

In [43]:
# default value of kernal='rbf'  
model = SVC(kernel='linear',C=1000, gamma = 0.5)
model.fit(X_train, y_train)
y_predict = model.predict(X_test)
print(accuracy_score(y_test,y_predict))
print(pd.crosstab(y_test,y_predict))

0.7337962962962963
col_0    acc  unacc
outcome            
acc       15     69
good       0     19
unacc      5    302
vgood     13      9


In [46]:
# inhance the performance by changes the model parametrs
model = SVC(kernel='rbf',C=10, gamma = 0.8)
model.fit(X_train, y_train)
y_predict = model.predict(X_test)
print(accuracy_score(y_test,y_predict))
print(pd.crosstab(y_test,y_predict))

0.9930555555555556
col_0    acc  good  unacc  vgood
outcome                         
acc       83     0      1      0
good       0    19      0      0
unacc      0     0    307      0
vgood      1     1      0     20


# Inhancing the model prediction by passing the Kernel,Gamma,C values
# Finding Best fit parameter


In [48]:
# finding best fit parametrs using GridSearchCV for SVM
from sklearn.model_selection import GridSearchCV

parameters = [{'kernel': ['linear'], 'C': [1,10,100,1000,10000]},
              {'kernel': ['rbf'], 'gamma': [.05,0.1,0.8,0.06,.07,.08], 'C': [ 1,10, 100, 1000]}
             ]
grid_model_svc = GridSearchCV(SVC(), parameters)
grid_model_svc.fit(X_train, y_train)


GridSearchCV(cv=None, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid=[{'kernel': ['linear'], 'C': [1, 10, 100, 1000, 10000]}, {'kernel': ['rbf'], 'gamma': [0.05, 0.1, 0.8, 0.06, 0.07, 0.08], 'C': [1, 10, 100, 1000]}],
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [49]:
print(grid_model_svc.best_score_)
print(grid_model_svc.best_params_)

0.9822530864197531
{'C': 1000, 'gamma': 0.1, 'kernel': 'rbf'}


In [50]:
model.predict([[3,3,2,2,2,1]])

array(['unacc'], dtype=object)

# Create the ML file

In [53]:

from sklearn.externals import joblib

In [54]:
joblib.dump(model,'car_trained.ml')

['car_trained.ml']