In [79]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC

# Load dataset

In [80]:
# load iris data
dataset = load_iris()

# use 80% as train data, 20% as test data
X_train,X_test,y_train,y_test=train_test_split(dataset.data,dataset.target,test_size=0.2)

# Find best hyperparamters
RBF kernel SVM has two parameters.
1. C (cost): The C parameter trades off correct classification of training examples against maximization of the decision function’s margin. For larger values of C, a smaller margin will be accepted if the decision function is better at classifying all training points correctly. 

2. gamma: the gamma parameter defines how far the influence of a single training example reaches, with low values meaning ‘far’ and high values meaning ‘close’. The gamma parameters can be seen as the inverse of the radius of influence of samples selected by the model as support vectors.

reference:
http://scikit-learn.org/stable/auto_examples/svm/plot_rbf_parameters.html

# Grid Search
find best hyperparameter using grid search.

In [81]:
def svc_param_selection(X, y, nfolds):
    svm_parameters = [
                        {'kernel': ['rbf'],
                         'gamma': [0.00001,0.0001, 0.001, 0.01, 0.1, 1],
                         'C': [0.01, 0.1, 1, 10, 100, 1000]
                        }
                       ]
    
    clf = GridSearchCV(SVC(), svm_parameters, cv=10)
    clf.fit(X_train, y_train)
    print(clf.best_params_)
    
    return clf

In [82]:
clf = svc_param_selection(X_train, y_train, 10)

{'C': 10, 'gamma': 0.1, 'kernel': 'rbf'}


# Test

In [83]:
y_true, y_pred = y_test, clf.predict(X_test)

print(classification_report(y_true, y_pred))
print()
print("accuracy : "+ str(accuracy_score(y_true, y_pred)) )

             precision    recall  f1-score   support

          0       1.00      1.00      1.00         7
          1       1.00      1.00      1.00        13
          2       1.00      1.00      1.00        10

avg / total       1.00      1.00      1.00        30


accuracy : 1.0


In [84]:
# Visualize true value with prediction value in pandas dataframe.
comparison = pd.DataFrame({'prediction':y_pred, 'ground_truth':y_true}) 
comparison

Unnamed: 0,ground_truth,prediction
0,1,1
1,1,1
2,2,2
3,0,0
4,1,1
5,2,2
6,2,2
7,2,2
8,1,1
9,0,0
