# SVM Classifier

Goal: prototype sci-kit implementation of a SVM classifier for EEG data  
Stretch: Include multiple algorithms so that we can compare them

In [1]:
# import load_iris function from datasets module
from sklearn.datasets import load_iris

# save "bunch" object containing iris dataset and its attributes
iris = load_iris()

# store feature matrix in "X"
X = iris.data

# store response vector in "y"
y = iris.target

In [4]:
from sklearn import svm

In [95]:
# fit model (polynomial kernel)
# C = cost parameter. Shown to influence the accuracy of attentive/inattentive states in opposite directions.
# C = 10 is flexibly in the middle based on their data
clf = svm.SVC(kernel = 'poly', C = 10)
clf.fit(X,y)

SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='poly',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [96]:
# get support vectors
clf.support_vectors_

array([[ 5.1,  3.3,  1.7,  0.5],
       [ 4.8,  3.4,  1.9,  0.2],
       [ 4.5,  2.3,  1.3,  0.3],
       [ 5.1,  3.8,  1.9,  0.4],
       [ 5.9,  3.2,  4.8,  1.8],
       [ 6.3,  2.5,  4.9,  1.5],
       [ 6.7,  3. ,  5. ,  1.7],
       [ 6. ,  2.7,  5.1,  1.6],
       [ 5.1,  2.5,  3. ,  1.1],
       [ 4.9,  2.5,  4.5,  1.7],
       [ 6. ,  2.2,  5. ,  1.5],
       [ 6.1,  3. ,  4.9,  1.8],
       [ 7.2,  3. ,  5.8,  1.6],
       [ 6.3,  2.8,  5.1,  1.5],
       [ 6. ,  3. ,  4.8,  1.8]])

In [97]:
#indices of support vectors
clf.support_

array([ 23,  24,  41,  44,  70,  72,  77,  83,  98, 106, 119, 127, 129,
       133, 138], dtype=int32)

In [98]:
# number of vectors for each class
clf.n_support_

array([4, 5, 6], dtype=int32)

# Basic Evaluation

In [99]:
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=4)

In [100]:
clf.fit(X_train,y_train)

SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='poly',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [101]:
svm_pred = clf.predict(X_test)

In [102]:
# test prediction accuracy
from sklearn import metrics
print metrics.accuracy_score(y_test, svm_pred)

0.966666666667


# Cross Validation

In [103]:
from sklearn.cross_validation import train_test_split
from sklearn.cross_validation import cross_val_score

In [104]:
# simulate splitting a dataset of 25 observations into 5 folds
from sklearn.cross_validation import KFold
kf = KFold(25, n_folds=5, shuffle=False)

In [105]:
# 10-fold cross-validation
scores = cross_val_score(clf, X, y, cv=10, scoring ='accuracy')
print scores

# use average accuracy as an estimate of out-of-sample accuracy
print scores.mean()


[ 1.          1.          1.          1.          0.93333333  0.93333333
  0.86666667  1.          1.          1.        ]
0.973333333333
