In [1]:
import seaborn as sns
import numpy as np

In [2]:
df = sns.load_dataset('iris')
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import sklearn.svm as svm

col=['petal_length', 'petal_width']
X= df.loc[:,col]
species_to_num = {'setosa':0,
                   'versicolor':1,
                  'virginica':2
                 }
df['tmp'] = df['species'].map(species_to_num)
y = df['tmp']
X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=0.77,random_state=0)



In [3]:
sc_x = StandardScaler()
X_std_train = sc_x.fit_transform(X_train)

In [4]:
C=1.0
clf = svm.SVC(kernel='rbf', degree=3, C=C)
clf.fit(X_std_train,y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [5]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score,recall_score,f1_score

## Cross validation

In [6]:
res = cross_val_score(clf,X_std_train,y_train,cv=10,scoring='accuracy')
print(np.mean(res))
print(np.std(res))

0.9488578088578089
0.05446719526817637


# Grid search

In [7]:
from sklearn.pipeline import Pipeline;

In [8]:
from sklearn.model_selection import GridSearchCV

In [9]:
pipeline = Pipeline([('clf',svm.SVC(kernel='rbf',C=1,gamma=0.1))])

In [10]:
params = {'clf__C':(0.1,0.5,1,2,5,10,20),
          'clf__gamma':(0.001,0.01,0.1,0.25,0.5,0.75,1)}

In [11]:
svm_grid_rfm = GridSearchCV(pipeline,params,n_jobs=1,cv=3,scoring='accuracy')

In [12]:
svm_grid_rfm.fit(X_train,y_train)

GridSearchCV(cv=3, error_score='raise',
       estimator=Pipeline(memory=None,
     steps=[('clf', SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.1, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False))]),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'clf__C': (0.1, 0.5, 1, 2, 5, 10, 20), 'clf__gamma': (0.001, 0.01, 0.1, 0.25, 0.5, 0.75, 1)},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='accuracy', verbose=0)

In [13]:
svm_grid_rfm.best_score_

0.9565217391304348

In [14]:
best = svm_grid_rfm.best_estimator_.get_params()

In [15]:
for k in sorted(params.keys()):
    print(k,best[k])

clf__C 0.5
clf__gamma 0.75


In [16]:
y_test_pred = svm_grid_rfm.predict(X_test)

In [17]:
confusion_matrix(y_test,y_test_pred)

array([[12,  0,  0],
       [ 0, 14,  0],
       [ 0,  0,  9]])