In [1]:
#Import necessary libraries
import numpy as np
from sklearn import svm

In [139]:
#Load Data
X_raw=np.genfromtxt('X.txt',delimiter=' ',dtype='float32')
y_raw=np.genfromtxt('y.txt',delimiter=' ')
label=np.genfromtxt('label.txt',delimiter=' ',dtype='S')
print label

['eegRawValue' 'attention' 'meditation' 'blinkStrength' 'delta' 'theta'
 'alphaLow' 'alphaHigh' 'betaLow' 'betaHigh' 'gammaLow' 'gammaMid'
 'N(blinks),60sec' 'N(50<blinkStr<70),60sec' 'N(eeg<-500),60sec'
 'N(1.677e+007<alphaLow),60sec' 'N(1.674e+007<betaLow),60sec'
 'N(1e+005<theta<5e+005),60sec']


In [140]:
#Shuffle Data
idx=np.arange((X_raw.shape)[0])
np.random.shuffle(idx)
X_shuffled=X_raw[idx]
y_shuffled=y_raw[idx]
print(X_shuffled.shape,y_shuffled.shape)

((34015L, 18L), (34015L,))


In [142]:
#Select Features to run
features=[12,13,14,15,16,17]
X_final=X_shuffled[:,features]
idx_train=int(X_final.shape[0]*0.7)
idx_val=int(X_final.shape[0]*0.9)
print(idx_train,idx_val-idx_train,X_final.shape[0]-idx_val)

(23810, 6803, 3402L)


In [143]:
#Divide examples using Mini-batch
X_train=X_final[0:idx_train]
y_train=y_shuffled[0:idx_train]
X_val=X_final[idx_train+1:idx_val]
y_val=y_shuffled[idx_train+1:idx_val]
X_test=X_final[idx_val+1:]
y_test=y_shuffled[idx_val+1:]

In [144]:
#Test with default options
svc=svm.SVC(kernel='rbf',C=8,gamma=0.1)
svc.fit(X_train,y_train)
print svc

SVC(C=8, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.1,
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)


In [145]:
#Construct Accuracy metrics
#Simple Accuracy
pred_pos=svc.predict(X_val)
accuracy=np.mean(pred_pos==y_val)
#F1 score
def F1_score(pred_pos,y):
    n_pred_pos=float(sum(pred_pos==1))
    n_True_pos=float(sum(np.all([pred_pos==1,y==1],axis=0)==True))
    n_actual_pos=float(sum(y==1))
    Precision=0
    Recall=0
    F1=0
    if n_pred_pos!=0:
        Precision=n_True_pos/n_pred_pos
    if n_actual_pos!=0:
        Recall=n_True_pos/n_actual_pos
    if Precision+Recall!=0:
        F1=2*Precision*Recall/(Precision+Recall)
    return F1,Precision,Recall
F1,P,R=F1_score(pred_pos,y_val)
#Print Accuracy metrics
print accuracy
print (F1,P,R)
print svc.n_support_

0.967068509262
(0.910828025477707, 0.9248181083265966, 0.8972549019607843)
[1687 1459]


In [146]:
#Parameter tuning
C_values=[2**(x) for x in range(-11,4)]
gamma_values=[10**x for x in range(-3,2)]
best_svc=svc;
best_accuracy=accuracy;
best_F1=[F1,P,R]
for c in C_values:
    for g in gamma_values:
        svc_dummy=svm.SVC(kernel='rbf',C=c,gamma=g)
        svc_dummy.fit(X_train,y_train)
        dummy_pred_pos=svc_dummy.predict(X_val)
        dummy_accuracy=np.mean(pred_pos==y_val)
        dummy_F1,dummy_P,dummy_R=F1_score(dummy_pred_pos,y_val)
        if dummy_F1>F1:
            best_svc=svc_dummy
            best_accuracy=dummy_accuracy
            best_F1=[dummy_F1,dummy_P,dummy_R]
print best_svc
print best_F1,best_accuracy
print best_svc.n_support_

SVC(C=8, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.1,
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)
[0.910828025477707, 0.9248181083265966, 0.8972549019607843] 0.967068509262
[1687 1459]


In [147]:
#Classify the test set
test_pred_pos=best_svc.predict(X_test)
test_accuracy=np.mean(test_pred_pos==y_test)
test_F1=F1_score(test_pred_pos,y_test)
print test_F1,test_accuracy

(0.9031746031746032, 0.9074960127591707, 0.8988941548183255) 0.964128197589


In [153]:
print best_svc.support_
print best_svc.support_vectors_[0]
print best_svc.support_vectors_[1]

[    7    27    35 ..., 23788 23800 23806]
[ 14.   8.   0.   0.   0.   1.]
[ 23.   9.   0.   0.   1.   2.]


In [154]:
best_svc.support_.shape

(3146L,)