In [1]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn import svm
from sklearn import preprocessing

from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import GridSearchCV

In [None]:
filename = 'data/5m_sessions_r.csv'
k = 5 # k-fold cross validation (one in each k examples will be used for validation, the others for training)
dtype = "i4,U20,U5,U20,i4,f4,f4,f4,f4,f4,f4,f4,f4,f4,f4,f4,f4,f4,f4,f4"

features=['AVGNN','SDNN'",'MeanHR','SDHR','MinHR','MaxHR','RMSSD','NNxx','pNNxx', 'triangularindex', 'TINN', 'powerVLF', 'powerLF', 'powerHF', 'ratioHFLF', 'peakVLF', 'peakLF', 'peakHF']

## 1 - Extract data from the csv file and split it into labels and features

In [2]:
data = np.genfromtxt(filename, delimiter=',', dtype=dtype, names=True) 

labels = np.array([x[1] for x in data])
features = np.array([x.tolist()[5:20] for x in data])

print(labels.shape)
print(features.shape)

print (features[0])
features = preprocessing.scale(features)
print (features[0])

print(np.unique(labels))

(51,)
(51, 15)
[  687.15997314    74.40599823    87.31600189     8.82079983    68.35299683
   103.69999695    40.04800034    59.            13.81700039    11.26299953
   312.          1804.09997559  3734.39990234   771.40002441     4.84110022]
[-1.70398962  0.23693743  1.87388173  1.07219059  1.28863555  1.16330654
  0.14550287  0.04431825 -0.29998112 -1.62512662 -0.47367837 -0.34975513
  1.65765672  0.8400598   0.55669211]
['eat' 'focused-active' 'leisure-passive']


## 2 - split into test and train datasets (balanced by label)

In [3]:
dic = [{'label': label, 'features': features[labels==label]} for label in np.unique(labels)]

def validateLen (a, b, c):
    return a == len(b) and a == len(c)

testlabels  = []
trainlabels = []
testfeatures  = []
trainfeatures = []

for activity in dic:
    n = len(activity['features'])
    n_test = int(n/k)
    n_train = n - n_test    
    ltest = [activity['label'] for i in range(n_test)]
    ltrain = [activity['label'] for i in range(n_train)]
    ftest = activity['features'][0:n_test]
    ftrain = activity['features'][n_test:n]
    if not validateLen(n_train, ltrain, ftrain) or not validateLen(n_test, ltest, ftest):
        print ("Error in len of train and test sets")
        break
    print ("%s: %d examples (%d for train and %d for test)"%(activity['label'], n, n_train, n_test))
    testlabels.extend(ltest)
    trainlabels.extend(ltrain)
    testfeatures.extend(ftest)
    trainfeatures.extend(ftrain)
   
 
print("\nTotal: %d train examples and %d test examples "%(len(trainfeatures), len(testfeatures)))

eat: 10 examples (8 for train and 2 for test)
focused-active: 21 examples (17 for train and 4 for test)
leisure-passive: 20 examples (16 for train and 4 for test)

Total: 41 train examples and 10 test examples 


In [4]:
def printResults(expected, result):
    print("expected\t\tresult")
    print("------------------------------")
    correct = 0
    for i in range(len(expected)):
        print ("%s\t\t%s"%(expected[i], result[i]))
        if expected[i] == result[i]:
            correct += 1
    print("Got %d out of %d right! :)"%(correct,len(testlabels)))

## 3 - apply classifier (raw)

In [10]:
clf1 = svm.SVC(kernel='linear', cache_size=1000, C=1)
clf1.fit(X=trainfeatures, y=trainlabels)

print ("Linear Kernel, C=1\n")
printResults(testlabels, clf1.predict(testfeatures))

clf2 = svm.SVC(kernel='rbf', cache_size=1000, C=1, gamma=0.01)
clf2.fit(X=trainfeatures, y=trainlabels)

print ("\n\n\nRBF Kernel, C=1, gamma=0.001\n")
printResults(testlabels, clf2.predict(testfeatures))

Linear Kernel, C=1

expected		result
------------------------------
eat		eat
eat		eat
focused-active		focused-active
focused-active		eat
focused-active		leisure-passive
focused-active		eat
leisure-passive		eat
leisure-passive		leisure-passive
leisure-passive		leisure-passive
leisure-passive		eat
Got 5 out of 10 right! :)



RBF Kernel, C=1, gamma=0.001

expected		result
------------------------------
eat		eat
eat		eat
focused-active		leisure-passive
focused-active		leisure-passive
focused-active		leisure-passive
focused-active		leisure-passive
leisure-passive		leisure-passive
leisure-passive		leisure-passive
leisure-passive		leisure-passive
leisure-passive		leisure-passive
Got 6 out of 10 right! :)
