In [1]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn import svm
from sklearn import preprocessing

from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import GridSearchCV

In [17]:
filename = 'data/5m_sessions_e.csv'
k = 5 # k-fold cross validation (one in each k examples will be used for validation, the others for training)
dtype = "i4,U20,U5,U20,i4,f4,f4,f4,f4,f4,f4,f4,f4,f4,f4,f4,f4,f4,f4,f4,f4,f4,f4"

ufeatures=['AVGNN','SDNN','MeanHR','SDHR','MinHR','MaxHR','RMSSD','NNxx','pNNxx', 'triangularindex', 'TINN', 'powerVLF', 'powerLF', 'powerHF', 'ratioHFLF', 'peakVLF', 'peakLF', 'peakHF']

## 1 - Extract data from the csv file and split it into labels and features

In [26]:
data = np.genfromtxt(filename, delimiter=',', dtype=dtype, names=True) 

activities = np.array([x[1] for x in data])
postures = np.array([x[2] for x in data])
features = np.array([x.tolist()[5:25] for x in data])

print(features.shape)

print (features[0])
features = preprocessing.scale(features)
print (features[0])

print(np.unique(activities))
print(np.unique(postures))

(80, 18)
[  8.14640015e+02   1.08330002e+02   7.36529999e+01   1.00660000e+01
   5.97610016e+01   9.63389969e+01   6.15859985e+01   7.00000000e+01
   1.90739994e+01   2.16469994e+01   5.64000000e+02   3.33330002e+01
   7.33330002e+01   1.66669998e+01   7.05139990e+03   2.93100000e+03
   1.40080005e+03   2.09249997e+00]
[ 0.42806569  1.56054117 -0.51048175  1.14879498 -0.73261908 -0.36383086
  1.50011515  0.49961835  0.00411719  1.74606166  2.02311773  0.12547196
  0.92162999 -0.91052734  1.19628021  2.37367284  1.62506163 -0.46223849]
['eat' 'focused-active' 'focused-passive' 'household-chores'
 'leisure-passive' 'movement' 'rest-active' 'sleep']
['lie' 'sit' 'stand']


In [None]:
map(lambda x: 'stand' if )

## 2 - split into test and train datasets (balanced by label)

In [23]:
dic = [{'label': label, 'features': features[labels==label]} for label in np.unique(labels)]

def validateLen (a, b, c):
    return a == len(b) and a == len(c)

testlabels  = []
trainlabels = []
testfeatures  = []
trainfeatures = []

for activity in dic:
    n = len(activity['features'])
    n_test = int(n/k)
    n_train = n - n_test    
    ltest = [activity['label'] for i in range(n_test)]
    ltrain = [activity['label'] for i in range(n_train)]
    ftest = activity['features'][0:n_test]
    ftrain = activity['features'][n_test:n]
    if not validateLen(n_train, ltrain, ftrain) or not validateLen(n_test, ltest, ftest):
        print ("Error in len of train and test sets")
        break
    print ("%s: %d examples (%d for train and %d for test)"%(activity['label'], n, n_train, n_test))
    testlabels.extend(ltest)
    trainlabels.extend(ltrain)
    testfeatures.extend(ftest)
    trainfeatures.extend(ftrain)
   
 
print("\nTotal: %d train examples and %d test examples "%(len(trainfeatures), len(testfeatures)))

lie: 10 examples (8 for train and 2 for test)
sit: 53 examples (43 for train and 10 for test)
stand: 17 examples (14 for train and 3 for test)

Total: 65 train examples and 15 test examples 


In [24]:
def printResults(expected, result):
    print("expected\t\tresult")
    print("------------------------------")
    correct = 0
    for i in range(len(expected)):
        print ("%s\t\t%s"%(expected[i], result[i]))
        if expected[i] == result[i]:
            correct += 1
    print("Got %d out of %d right! :)"%(correct,len(testlabels)))

## 3 - apply classifier (raw)

In [25]:
clf1 = svm.SVC(kernel='linear', cache_size=1000, C=1)
clf1.fit(X=trainfeatures, y=trainlabels)

print ("Linear Kernel, C=1\n")
printResults(testlabels, clf1.predict(testfeatures))

clf2 = svm.SVC(kernel='rbf', cache_size=1000, C=1, gamma=0.01)
clf2.fit(X=trainfeatures, y=trainlabels)

print ("\n\n\nRBF Kernel, C=1, gamma=0.001\n")
printResults(testlabels, clf2.predict(testfeatures))

Linear Kernel, C=1

expected		result
------------------------------
lie		sit
lie		sit
sit		sit
sit		sit
sit		sit
sit		sit
sit		sit
sit		sit
sit		stand
sit		lie
sit		lie
sit		sit
stand		sit
stand		sit
stand		sit
Got 7 out of 15 right! :)



RBF Kernel, C=1, gamma=0.001

expected		result
------------------------------
lie		sit
lie		sit
sit		sit
sit		sit
sit		sit
sit		sit
sit		sit
sit		sit
sit		sit
sit		sit
sit		sit
sit		sit
stand		sit
stand		stand
stand		sit
Got 11 out of 15 right! :)
