In [1]:
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt

# Dataset loader for IRIS
from sklearn.datasets import load_iris

from sklearn.model_selection import StratifiedShuffleSplit

# Support vector machine - learns coefficients of a function basis-set 
# (Gaussians, for example) in order to construct a decision boundary.
from sklearn.svm import SVC

In [2]:
iris_data = load_iris()

x = iris_data['data']
y = iris_data['target']

# This data is not shuffled (labels are 0,0,0,0,...,1,1,1,1,...,2,2,2,2)
# So let's shuffle it with a fixed random seed
np.random.seed(12345)
idx = np.random.permutation(x.shape[0])
x = x[idx]
y = y[idx]

print("Dataset has %d rows, %d features" % (x.shape[0], x.shape[1]))

Dataset has 150 rows, 4 features


In [6]:
# Make a train/test split
split = StratifiedShuffleSplit(n_splits=10, test_size = 0.3)
split.get_n_splits(x,y)
idx = 1

cvs = []

for train_idx, test_idx in split.split(x,y):
    train_x = x[train_idx]
    train_y = y[train_idx]

    test_x = x[test_idx]
    test_y = y[test_idx]
    
    classifier = SVC(C=1, gamma=1)

    classifier.fit(train_x, train_y)

    p = classifier.predict(test_x)

    print("CV fold %d accuracy is: %.6g" % (idx, np.mean(p==test_y)))
    cvs.append(np.mean(p==test_y))
    idx += 1
    
print("CV results: mean = %.6g, stdev = %.6g" % (np.mean(cvs), np.std(cvs)))

CV fold 1 accuracy is: 0.977778
CV fold 2 accuracy is: 0.955556
CV fold 3 accuracy is: 1
CV fold 4 accuracy is: 0.955556
CV fold 5 accuracy is: 0.933333
CV fold 6 accuracy is: 0.911111
CV fold 7 accuracy is: 0.977778
CV fold 8 accuracy is: 1
CV fold 9 accuracy is: 0.977778
CV fold 10 accuracy is: 0.977778
CV results: mean = 0.966667, stdev = 0.0267591
