In [1]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn import svm
from sklearn.preprocessing import scale
from sklearn.model_selection import StratifiedShuffleSplit, GridSearchCV, train_test_split

import herv_preprocess as hpp

In [2]:
df = pd.read_csv( 'data/12features/combined.csv')
ju =  df[df['user'] == 0]
ron =  df[df['user'] == 1]
edu =  df[df['user'] == 2]

df.describe()

Unnamed: 0,user,AVGNN,SDNN,MeanHR,MinHR,MaxHR,RMSSD,pNNxx,TINN,powerVLF,powerLF,powerHF,ratioHFLF
count,544.0,544.0,544.0,544.0,544.0,544.0,544.0,544.0,544.0,544.0,544.0,544.0,544.0
mean,1.1875,758.6,72.865018,81.37714,67.362033,108.560417,46.409227,20.082227,358.020221,2877.387599,1423.919671,1000.824001,2.753764
std,0.913249,129.785003,32.960406,13.75457,10.84968,36.385572,32.537407,18.316263,146.885899,3619.576847,1443.217366,1841.656665,2.296974
min,0.0,475.73,17.833,51.628,41.437,58.117,4.8825,0.0,83.0,85.654,57.852,7.9777,0.15468
25%,0.0,670.3625,51.8715,73.23225,60.29275,91.47025,27.43875,6.593075,258.0,995.1825,591.3075,258.9175,1.257
50%,2.0,750.545,67.057,79.942,66.1815,100.52,37.9945,15.4175,339.0,1807.95,1040.35,512.09,2.03735
75%,2.0,819.3075,85.82,89.504,74.28525,112.58,54.76225,26.7905,426.25,3304.25,1697.95,960.045,3.579975
max,2.0,1162.2,264.25,126.12,104.9,550.32,240.47,98.232,1103.0,29716.0,12958.0,16103.0,18.228


### Apply scale to make all features go to mean = 0 and SD = 1. That will allow SVM kernels not to put too much weight on features with larger values.

In [5]:
df.iloc[:, 3:15] = df.iloc[:, 3:15].apply(lambda x: scale(x))
df.describe()

Unnamed: 0,user,AVGNN,SDNN,MeanHR,MinHR,MaxHR,RMSSD,pNNxx,TINN,powerVLF,powerLF,powerHF,ratioHFLF
count,544.0,544.0,544.0,544.0,544.0,544.0,544.0,544.0,544.0,544.0,544.0,544.0,544.0
mean,1.1875,-1.657171e-16,4.0817020000000003e-17,-1.220429e-16,-1.020426e-16,-1.136754e-16,1.8367660000000002e-17,6.959302000000001e-17,-1.8775830000000002e-17,1.1428770000000001e-17,3.9592510000000004e-17,6.612358000000001e-17,-9.051175000000001e-17
std,0.913249,1.00092,1.00092,1.00092,1.00092,1.00092,1.00092,1.00092,1.00092,1.00092,1.00092,1.00092,1.00092
min,0.0,-2.181534,-1.671177,-2.164846,-2.391674,-1.387634,-1.277451,-1.097424,-1.874062,-0.7719972,-0.9474144,-0.5396012,-1.132567
25%,0.0,-0.6805001,-0.6375176,-0.5927038,-0.6521657,-0.4701286,-0.5835725,-0.7371355,-0.6815649,-0.5204856,-0.5774449,-0.4032181,-0.652224
50%,2.0,-0.0621213,-0.1763742,-0.1044351,-0.1089082,-0.2211816,-0.258855,-0.2549112,-0.1296089,-0.2957312,-0.2660186,-0.2656216,-0.3121818
75%,2.0,0.4681849,0.3934085,0.5913918,0.6386906,0.1105736,0.2569569,0.3665839,0.4649362,0.1180401,0.1900494,-0.02216294,0.3600263
max,2.0,3.112621,5.811856,3.255939,3.463007,12.15224,5.969725,4.270615,5.076494,7.421673,7.999277,8.207869,6.742991


### Test dataset will have 20% of examples, the other 80% will be for training (with cross-validation)

In [8]:
train, test = train_test_split(df, test_size=0.2)
train['activity']

450     leisure-passive
260                 eat
122     leisure-passive
130            movement
433     leisure-passive
224     leisure-passive
487         rest-active
540               sleep
55      focused-passive
67     household-chores
308      focused-active
507               sleep
283                 eat
445     leisure-passive
301      focused-active
394    household-chores
192                 eat
383    household-chores
186                 eat
8        focused-active
54      focused-passive
322      focused-active
226     leisure-passive
176               sleep
360     focused-passive
58      focused-passive
499         rest-active
63     household-chores
42      focused-passive
26       focused-active
             ...       
471            movement
203      focused-active
228     leisure-passive
214      focused-active
163               sleep
109     leisure-passive
302      focused-active
274                 eat
401    household-chores
377    household-chores
511             

### 3 - Model selection:  Perform an exhaustive search, with k-fold cross-validation,  in the space parameter consisting of:
* $C = 10^{i}, com -1 \leq i \leq 6$, for both linear and RBF kernels
* $\gamma = 10^{i}, com -3 \leq i \leq 3$, for RBF kernel.

In [7]:
crossval = StratifiedShuffleSplit(n_splits=4, test_size=0.2)

c_range = np.logspace(-1, 2, 4) 
gamma_range = np.logspace(-2, 1, 4)

param_lin=dict(C=c_range)
param_rbf=dict(C=c_range, gamma=gamma_range)


In [13]:
grid_lin = GridSearchCV(svm.SVC(kernel='linear', cache_size=1000), param_grid=param_lin, cv=crossval)
grid_lin.fit(X=train.iloc[:, 3:15], y=train['activity'])

print("Kernel linear --- ")
print("Best params: %s with score %0.5f" % (grid_lin.best_params_, grid_lin.best_score_))

Kernel linear --- 
Best params: {'C': 10.0} with score 0.38218


In [12]:
grid_rbf = GridSearchCV(svm.SVC(kernel='rbf', cache_size=1000), param_grid=param_rbf, cv=crossval)
grid_rbf.fit(X=train.iloc[:, 3:15], y=train['activity'])

print("Kernel RBF --- ")
print("Best params: %s with score %0.5f" % (grid_rbf.best_params_, grid_rbf.best_score_))

Kernel RBF --- 
Best params: {'C': 100.0, 'gamma': 0.10000000000000001} with score 0.44253


In [21]:
clf1 = svm.SVC(kernel='linear', cache_size=1000, C=10)
clf1.fit(X=train.iloc[:, 3:15], y=train['activity'])
hpp.printResults(test['activity'].values, clf1.predict(test.iloc[:, 3:15]))

clf2 = svm.SVC(kernel='rbf', cache_size=1000, C=100, gamma=0.1)
clf2.fit(X=train.iloc[:, 3:15], y=train['activity'])
hpp.printResults(test['activity'].values, clf1.predict(test.iloc[:, 3:15]), verbose=True)

------
Got 47 out of 109 right! :)
expected		result
------------------------------
movement		household-chores
leisure-passive		eat
focused-active		focused-active
focused-active		leisure-passive
sleep		sleep
focused-active		focused-active
movement		household-chores
rest-active		eat
household-chores		household-chores
focused-active		focused-active
eat		eat
eat		movement
leisure-active		leisure-active
focused-active		focused-active
leisure-passive		focused-active
focused-active		leisure-passive
leisure-passive		eat
sleep		sleep
focused-active		focused-active
leisure-passive		focused-active
focused-passive		focused-passive
rest-active		focused-passive
leisure-active		leisure-active
movement		household-chores
movement		household-chores
sleep		sleep
leisure-passive		sleep
sleep		focused-active
eat		leisure-passive
focused-passive		focused-passive
leisure-passive		focused-active
household-chores		movement
leisure-passive		leisure-passive
focused-passive		leisure-passive
leisure-passive		focus