In [2]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn import svm
from sklearn.preprocessing import scale
from sklearn.model_selection import StratifiedShuffleSplit, GridSearchCV, train_test_split

import hervpd as hp

In [40]:
df = pd.read_csv( 'data/12features/combined.csv')
print(df.activity.unique())
df.describe()

['eat' 'focused-active' 'focused-passive' 'household-chores'
 'leisure-active' 'leisure-passive' 'movement' 'sleep' 'rest-active']


Unnamed: 0,user,AVGNN,SDNN,MeanHR,MinHR,MaxHR,RMSSD,pNNxx,TINN,powerVLF,powerLF,powerHF,ratioHFLF
count,544.0,544.0,544.0,544.0,544.0,544.0,544.0,544.0,544.0,544.0,544.0,544.0,544.0
mean,1.1875,758.6,72.865018,81.37714,67.362033,108.560417,46.409227,20.082227,358.020221,2877.387599,1423.919671,1000.824001,2.753764
std,0.913249,129.785003,32.960406,13.75457,10.84968,36.385572,32.537407,18.316263,146.885899,3619.576847,1443.217366,1841.656665,2.296974
min,0.0,475.73,17.833,51.628,41.437,58.117,4.8825,0.0,83.0,85.654,57.852,7.9777,0.15468
25%,0.0,670.3625,51.8715,73.23225,60.29275,91.47025,27.43875,6.593075,258.0,995.1825,591.3075,258.9175,1.257
50%,2.0,750.545,67.057,79.942,66.1815,100.52,37.9945,15.4175,339.0,1807.95,1040.35,512.09,2.03735
75%,2.0,819.3075,85.82,89.504,74.28525,112.58,54.76225,26.7905,426.25,3304.25,1697.95,960.045,3.579975
max,2.0,1162.2,264.25,126.12,104.9,550.32,240.47,98.232,1103.0,29716.0,12958.0,16103.0,18.228


In [41]:
hp.countExamplesByActivity(df)

activity
eat                 53
focused-active      88
focused-passive     73
household-chores    68
leisure-active      14
leisure-passive     89
movement            65
rest-active         19
sleep               75
Name: user, dtype: int64

### Apply scale to make all features go to mean = 0 and SD = 1. That will allow SVM kernels not to put too much weight on features with larger values.

In [42]:
#df.iloc[:, 3:15] = df.iloc[:, 3:15].apply(lambda x: scale(x))
df = hp.scaleWithinUser(df)
df.describe()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


Unnamed: 0,user,AVGNN,SDNN,MeanHR,MinHR,MaxHR,RMSSD,pNNxx,TINN,powerVLF,powerLF,powerHF,ratioHFLF
count,544.0,544.0,544.0,544.0,544.0,544.0,544.0,544.0,544.0,544.0,544.0,544.0,544.0
mean,1.1875,-2.424531e-16,-6.612358000000001e-17,1.1836940000000001e-17,5.698056e-16,-2.314325e-16,8.245039e-17,-1.857175e-17,1.7143150000000003e-17,3.183728e-17,7.836868000000001e-17,-1.75105e-16,3.673532e-18
std,0.913249,1.00092,1.00092,1.00092,1.00092,1.00092,1.00092,1.00092,1.00092,1.00092,1.00092,1.00092,1.00092
min,0.0,-2.843791,-2.873066,-2.460395,-2.553288,-2.698142,-2.905886,-2.207686,-2.721551,-1.275233,-1.467678,-1.552781,-1.441435
25%,0.0,-0.6640136,-0.7040336,-0.6410388,-0.6684354,-0.5597017,-0.7309028,-0.7525458,-0.6982344,-0.5539669,-0.577282,-0.5662694,-0.6796624
50%,2.0,-0.0655166,-0.1676111,-0.09715803,-0.04039264,-0.1973524,-0.1655967,-0.2414187,-0.1085526,-0.306594,-0.3017131,-0.3295817,-0.2990694
75%,2.0,0.5280744,0.4666745,0.5956143,0.5992392,0.4212228,0.5763606,0.4514282,0.5244985,0.1528431,0.1965599,0.3533213,0.3809616
max,2.0,3.751426,4.527089,3.880179,5.022477,8.01763,4.081758,4.1198,3.771926,6.268217,5.81662,6.204422,5.978021


### Test dataset will have 20% of examples, the other 80% will be for training (with cross-validation)

In [43]:
train, test = train_test_split(df, test_size=0.2)
print (len(train), len(test))

435 109


### 3 - Model selection:  Perform an exhaustive search, with k-fold cross-validation,  in the space parameter consisting of:
* $C = 10^{i}, com -1 \leq i \leq 6$, for both linear and RBF kernels
* $\gamma = 10^{i}, com -3 \leq i \leq 3$, for RBF kernel.

In [44]:
crossval = StratifiedShuffleSplit(n_splits=4, test_size=0.2)

c_range = np.logspace(-1, 2, 4) 
gamma_range = np.logspace(-2, 1, 4)

param_lin=dict(C=c_range)
param_rbf=dict(C=c_range, gamma=gamma_range)


In [45]:
grid_lin = GridSearchCV(svm.SVC(kernel='linear', cache_size=1000), param_grid=param_lin, cv=crossval)
grid_lin.fit(X=train.iloc[:, 3:15], y=train['activity'])

print("Kernel linear --- ")
print("Best params: %s with score %0.5f" % (grid_lin.best_params_, grid_lin.best_score_))

clf1 = svm.SVC(kernel='linear', cache_size=1000, C=grid_lin.best_params_['C'])
clf1.fit(X=train.iloc[:, 3:15], y=train['activity'])
hp.report(test, clf1.predict(test.iloc[:, 3:15]))

Kernel linear --- 
Best params: {'C': 1.0} with score 0.39943
                  precision    recall  f1-score   support

             eat       0.44      0.36      0.40        11
  focused-active       0.37      0.44      0.40        16
 focused-passive       0.33      0.22      0.27        18
household-chores       0.20      0.21      0.21        14
  leisure-active       0.00      0.00      0.00         3
 leisure-passive       0.29      0.69      0.41        13
        movement       0.57      0.50      0.53        16
     rest-active       0.00      0.00      0.00         6
           sleep       0.67      0.50      0.57        12

     avg / total       0.37      0.38      0.36       109



  'precision', 'predicted', average, warn_for)


In [46]:
grid_rbf = GridSearchCV(svm.SVC(kernel='rbf', cache_size=1000), param_grid=param_rbf, cv=crossval)
grid_rbf.fit(X=train.iloc[:, 3:15], y=train['activity'])

print("Kernel RBF --- ")
print("Best params: %s with score %0.5f" % (grid_rbf.best_params_, grid_rbf.best_score_))

clf2 = svm.SVC(kernel='rbf', cache_size=1000, C=grid_rbf.best_params_['C'], gamma=grid_rbf.best_params_['gamma'])
clf2.fit(X=train.iloc[:, 3:15], y=train['activity'])
hp.report(test, clf2.predict(test.iloc[:, 3:15]))

Kernel RBF --- 
Best params: {'C': 10.0, 'gamma': 0.10000000000000001} with score 0.45402
                  precision    recall  f1-score   support

             eat       0.43      0.27      0.33        11
  focused-active       0.38      0.50      0.43        16
 focused-passive       0.35      0.39      0.37        18
household-chores       0.42      0.57      0.48        14
  leisure-active       0.00      0.00      0.00         3
 leisure-passive       0.29      0.31      0.30        13
        movement       0.73      0.69      0.71        16
     rest-active       0.00      0.00      0.00         6
           sleep       0.75      0.75      0.75        12

     avg / total       0.44      0.46      0.44       109



  'precision', 'predicted', average, warn_for)


In [47]:
clf3 = svm.SVC(kernel='poly', cache_size=1000, C=grid_lin.best_params_['C'])
clf3.fit(X=train.iloc[:, 3:15], y=train['activity'])
hp.report(test, clf3.predict(test.iloc[:, 3:15]))

                  precision    recall  f1-score   support

             eat       0.00      0.00      0.00        11
  focused-active       0.25      0.06      0.10        16
 focused-passive       0.00      0.00      0.00        18
household-chores       0.58      0.50      0.54        14
  leisure-active       0.00      0.00      0.00         3
 leisure-passive       0.18      1.00      0.31        13
        movement       0.80      0.50      0.62        16
     rest-active       0.00      0.00      0.00         6
           sleep       1.00      0.58      0.74        12

     avg / total       0.36      0.33      0.29       109



  'precision', 'predicted', average, warn_for)



## Now, we can run the same steps above with multiple experiments! Time to have fun!!

In [78]:
# exclude from df activities with too few examples

df = pd.read_csv( 'data/12features/combined.csv')
df = hp.excludeActivities(df, ['leisure-active',  'rest-active'])
print(hp.countExamplesByActivity(df))

activity
eat                 53
focused-active      88
focused-passive     73
household-chores    68
leisure-passive     89
movement            65
sleep               75
Name: user, dtype: int64


In [79]:
hp.runFlow(df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


408 train examples and 103 test examples
Best params for linear kernel: {'C': 10.0} with score 0.40549
--- test results for linear kernel:
                  precision    recall  f1-score   support

             eat       0.88      0.44      0.58        16
  focused-active       0.34      0.56      0.43        18
 focused-passive       0.35      0.58      0.44        12
household-chores       0.38      0.42      0.40        12
 leisure-passive       0.25      0.19      0.21        16
        movement       0.67      0.33      0.44        12
           sleep       0.67      0.59      0.62        17

     avg / total       0.51      0.45      0.45       103

Best params for RBF kernel: {'C': 10.0, 'gamma': 0.10000000000000001} with score 0.48171
--- test results for RBF kernel:
                  precision    recall  f1-score   support

             eat       0.71      0.31      0.43        16
  focused-active       0.39      0.50      0.44        18
 focused-passive       0.40      0.50  

In [25]:
hp.runFlowByUser(df)

CLASSFIER FOR USER 0
136 train examples and 35 test examples
Best params for linear kernel: {'C': 10.0} with score 0.53571
--- test results for linear kernel:
                  precision    recall  f1-score   support

             eat       0.00      0.00      0.00         0
  focused-active       0.40      0.50      0.44         4
 focused-passive       0.58      0.70      0.64        10
household-chores       0.33      0.75      0.46         4
 leisure-passive       0.25      0.25      0.25         4
        movement       1.00      0.14      0.25         7
           sleep       1.00      0.50      0.67         6

     avg / total       0.65      0.49      0.48        35



  'recall', 'true', average, warn_for)


Best params for RBF kernel: {'C': 1.0, 'gamma': 0.10000000000000001} with score 0.60714
--- test results for RBF kernel:
                  precision    recall  f1-score   support

  focused-active       1.00      0.50      0.67         4
 focused-passive       0.60      0.60      0.60        10
household-chores       0.38      0.75      0.50         4
 leisure-passive       0.50      0.50      0.50         4
        movement       0.75      0.43      0.55         7
           sleep       0.43      0.50      0.46         6

     avg / total       0.61      0.54      0.55        35

CLASSFIER FOR USER 1
57 train examples and 15 test examples
Best params for linear kernel: {'C': 1.0} with score 0.64583
--- test results for linear kernel:
                 precision    recall  f1-score   support

            eat       0.50      0.50      0.50         2
 focused-active       0.67      0.33      0.44         6
leisure-passive       0.56      0.83      0.67         6
       movement       1.00



214 train examples and 54 test examples
Best params for linear kernel: {'C': 0.10000000000000001} with score 0.45349
--- test results for linear kernel:
                  precision    recall  f1-score   support

             eat       0.20      0.25      0.22         8
  focused-active       0.30      0.43      0.35         7
 focused-passive       0.00      0.00      0.00         8
household-chores       0.20      0.22      0.21         9
 leisure-passive       0.29      0.33      0.31         6
        movement       0.40      0.29      0.33         7
           sleep       1.00      0.89      0.94         9

     avg / total       0.35      0.35      0.35        54

Best params for RBF kernel: {'C': 10.0, 'gamma': 0.10000000000000001} with score 0.48256
--- test results for RBF kernel:
                  precision    recall  f1-score   support

             eat       0.50      0.62      0.56         8
  focused-active       0.86      0.86      0.86         7
 focused-passive       0.

### 2 - Keeping only work and leisure activities

In [26]:
df_reduced = hp.filterActivities(df, ['focused-active', 'focused-passive', 'leisure-passive'])
df_reduced.groupby('activity').count()['user']

activity
focused-active     88
focused-passive    73
leisure-passive    89
Name: user, dtype: int64

In [27]:
hp.runFlow(df_reduced)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


200 train examples and 50 test examples
Best params for linear kernel: {'C': 10.0} with score 0.50625
--- test results for linear kernel:
                 precision    recall  f1-score   support

 focused-active       0.46      0.71      0.56        17
focused-passive       0.62      0.42      0.50        19
leisure-passive       0.36      0.29      0.32        14

    avg / total       0.49      0.48      0.47        50

Best params for RBF kernel: {'C': 100.0, 'gamma': 0.01} with score 0.55625
--- test results for RBF kernel:
                 precision    recall  f1-score   support

 focused-active       0.72      0.76      0.74        17
focused-passive       0.65      0.58      0.61        19
leisure-passive       0.33      0.36      0.34        14

    avg / total       0.58      0.58      0.58        50



In [29]:
hp.runFlowByUser(df_reduced)

CLASSFIER FOR USER 0
61 train examples and 16 test examples
Best params for linear kernel: {'C': 0.10000000000000001} with score 0.73077
--- test results for linear kernel:
                 precision    recall  f1-score   support

 focused-active       0.50      0.25      0.33         4
focused-passive       0.45      0.83      0.59         6
leisure-passive       0.67      0.33      0.44         6

    avg / total       0.55      0.50      0.47        16





Best params for RBF kernel: {'C': 10.0, 'gamma': 0.01} with score 0.65385
--- test results for RBF kernel:
                 precision    recall  f1-score   support

 focused-active       0.50      0.25      0.33         4
focused-passive       0.45      0.83      0.59         6
leisure-passive       0.67      0.33      0.44         6

    avg / total       0.55      0.50      0.47        16

CLASSFIER FOR USER 1
43 train examples and 11 test examples
Best params for linear kernel: {'C': 10.0} with score 0.55556
--- test results for linear kernel:
                 precision    recall  f1-score   support

 focused-active       0.67      1.00      0.80         4
leisure-passive       1.00      0.71      0.83         7

    avg / total       0.88      0.82      0.82        11





Best params for RBF kernel: {'C': 1.0, 'gamma': 0.10000000000000001} with score 0.72222
--- test results for RBF kernel:
                 precision    recall  f1-score   support

 focused-active       1.00      0.75      0.86         4
leisure-passive       0.88      1.00      0.93         7

    avg / total       0.92      0.91      0.91        11

CLASSFIER FOR USER 2
95 train examples and 24 test examples
Best params for linear kernel: {'C': 0.10000000000000001} with score 0.53947
--- test results for linear kernel:
                 precision    recall  f1-score   support

 focused-active       0.60      0.33      0.43         9
focused-passive       0.83      0.42      0.56        12
leisure-passive       0.08      0.33      0.12         3

    avg / total       0.65      0.38      0.45        24





Best params for RBF kernel: {'C': 100.0, 'gamma': 0.10000000000000001} with score 0.53947
--- test results for RBF kernel:
                 precision    recall  f1-score   support

 focused-active       0.71      0.56      0.63         9
focused-passive       0.89      0.67      0.76        12
leisure-passive       0.12      0.33      0.18         3

    avg / total       0.73      0.58      0.64        24



### 2 - Splitting database into movement categories (movement, household chores and exercise) and all others

In [30]:
includelist = ['movement', 'household-chores']

df_move = hp.addPartition(df, includelist, pname='move', labelIn='move', labelOut='still')
df_move.groupby('move').count()['user']

move
move     133
still    378
Name: user, dtype: int64

In [31]:
hp.runFlow(df_move, labelName='move')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


408 train examples and 103 test examples
Best params for linear kernel: {'C': 10.0} with score 0.89024
--- test results for linear kernel:
             precision    recall  f1-score   support

       move       0.80      0.83      0.82        24
      still       0.95      0.94      0.94        79

avg / total       0.91      0.91      0.91       103

Best params for RBF kernel: {'C': 100.0, 'gamma': 0.01} with score 0.88415
--- test results for RBF kernel:
             precision    recall  f1-score   support

       move       0.83      0.83      0.83        24
      still       0.95      0.95      0.95        79

avg / total       0.92      0.92      0.92       103



In [32]:
hp.runFlowByUser(df_move, labelName='move')

CLASSFIER FOR USER 0
136 train examples and 35 test examples




Best params for linear kernel: {'C': 1.0} with score 0.86607
--- test results for linear kernel:
             precision    recall  f1-score   support

       move       0.80      0.80      0.80        10
      still       0.92      0.92      0.92        25

avg / total       0.89      0.89      0.89        35

Best params for RBF kernel: {'C': 1.0, 'gamma': 0.10000000000000001} with score 0.90179
--- test results for RBF kernel:
             precision    recall  f1-score   support

       move       0.88      0.70      0.78        10
      still       0.89      0.96      0.92        25

avg / total       0.88      0.89      0.88        35

CLASSFIER FOR USER 1
57 train examples and 15 test examples
Best params for linear kernel: {'C': 1.0} with score 0.95833
--- test results for linear kernel:
             precision    recall  f1-score   support

       move       1.00      0.80      0.89         5
      still       0.91      1.00      0.95        10

avg / total       0.94      0.93  



Best params for RBF kernel: {'C': 10.0, 'gamma': 0.01} with score 0.95833
--- test results for RBF kernel:
             precision    recall  f1-score   support

       move       1.00      0.80      0.89         5
      still       0.91      1.00      0.95        10

avg / total       0.94      0.93      0.93        15

CLASSFIER FOR USER 2
214 train examples and 54 test examples




Best params for linear kernel: {'C': 1.0} with score 0.89535
--- test results for linear kernel:
             precision    recall  f1-score   support

       move       0.91      0.62      0.74        16
      still       0.86      0.97      0.91        38

avg / total       0.87      0.87      0.86        54

Best params for RBF kernel: {'C': 100.0, 'gamma': 0.01} with score 0.89535
--- test results for RBF kernel:
             precision    recall  f1-score   support

       move       0.91      0.62      0.74        16
      still       0.86      0.97      0.91        38

avg / total       0.87      0.87      0.86        54



## Hierarchy

### 1 - Split data by movement intensity:
 * group 1 = movement, household chores, exercise
 

In [80]:
df_stillcls = hp.excludeActivities(df, includelist)
hp.countExamplesByActivity(df_stillcls)

activity
eat                53
focused-active     88
focused-passive    73
leisure-passive    89
sleep              75
Name: user, dtype: int64

In [81]:
hp.runFlow(df_stillcls)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


302 train examples and 76 test examples
Best params for linear kernel: {'C': 1.0} with score 0.40574
--- test results for linear kernel:
                 precision    recall  f1-score   support

            eat       0.50      0.40      0.44        10
 focused-active       0.38      0.53      0.44        19
focused-passive       0.33      0.46      0.39        13
leisure-passive       0.50      0.33      0.40        21
          sleep       0.80      0.62      0.70        13

    avg / total       0.49      0.46      0.47        76

Best params for RBF kernel: {'C': 1.0, 'gamma': 0.10000000000000001} with score 0.47951
--- test results for RBF kernel:
                 precision    recall  f1-score   support

            eat       0.80      0.40      0.53        10
 focused-active       0.40      0.63      0.49        19
focused-passive       0.27      0.31      0.29        13
leisure-passive       0.38      0.24      0.29        21
          sleep       0.62      0.62      0.62        

In [84]:
hp.runFlowByUser(df_stillcls)



CLASSFIER FOR USER 0
92 train examples and 24 test examples
Best params for linear kernel: {'C': 10.0} with score 0.61842
--- test results for linear kernel:
                 precision    recall  f1-score   support

            eat       0.00      0.00      0.00         2
 focused-active       0.25      0.25      0.25         4
focused-passive       0.38      0.60      0.46         5
leisure-passive       0.25      0.33      0.29         3
          sleep       0.86      0.60      0.71        10

    avg / total       0.51      0.46      0.47        24

Best params for RBF kernel: {'C': 10.0, 'gamma': 1.0} with score 0.61842
--- test results for RBF kernel:
                 precision    recall  f1-score   support

            eat       0.50      0.50      0.50         2
 focused-active       0.25      0.25      0.25         4
focused-passive       0.33      0.60      0.43         5
leisure-passive       0.50      0.67      0.57         3
          sleep       0.80      0.40      0.53  



Best params for RBF kernel: {'C': 10.0, 'gamma': 0.01} with score 0.86364
--- test results for RBF kernel:
                 precision    recall  f1-score   support

            eat       1.00      0.50      0.67         2
 focused-active       0.60      0.43      0.50         7
leisure-passive       0.43      0.75      0.55         4

    avg / total       0.61      0.54      0.54        13

CLASSFIER FOR USER 2
158 train examples and 40 test examples




Best params for linear kernel: {'C': 0.01} with score 0.47656
--- test results for linear kernel:
                 precision    recall  f1-score   support

            eat       0.00      0.00      0.00         9
 focused-active       0.33      0.71      0.45         7
focused-passive       0.25      0.67      0.36         6
leisure-passive       0.00      0.00      0.00         8
          sleep       0.88      0.70      0.78        10

    avg / total       0.31      0.40      0.33        40



  'precision', 'predicted', average, warn_for)


Best params for RBF kernel: {'C': 10.0, 'gamma': 0.01} with score 0.55469
--- test results for RBF kernel:
                 precision    recall  f1-score   support

            eat       0.80      0.44      0.57         9
 focused-active       0.36      0.57      0.44         7
focused-passive       0.33      0.50      0.40         6
leisure-passive       0.38      0.38      0.38         8
          sleep       1.00      0.70      0.82        10

    avg / total       0.62      0.53      0.55        40



In [85]:
df_movecls = hp.filterActivities(df, includelist)
hp.countExamplesByActivity(df_movecls)

activity
household-chores    68
movement            65
Name: user, dtype: int64

In [87]:
hp.runFlow(df_movecls)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


106 train examples and 27 test examples
Best params for linear kernel: {'C': 0.01} with score 0.60227
--- test results for linear kernel:
                  precision    recall  f1-score   support

household-chores       0.40      0.46      0.43        13
        movement       0.42      0.36      0.38        14

     avg / total       0.41      0.41      0.41        27

Best params for RBF kernel: {'C': 10.0, 'gamma': 0.10000000000000001} with score 0.61364
--- test results for RBF kernel:
                  precision    recall  f1-score   support

household-chores       0.56      0.77      0.65        13
        movement       0.67      0.43      0.52        14

     avg / total       0.61      0.59      0.58        27



In [88]:
ju_move = hp.userRows(df_movecls, 0)
print(hp.countExamplesByActivity(ju_move))
hp.runFlow(ju_move)

activity
household-chores    28
movement            27
Name: user, dtype: int64
44 train examples and 11 test examples
Best params for linear kernel: {'C': 0.10000000000000001} with score 0.75000
--- test results for linear kernel:
                  precision    recall  f1-score   support

household-chores       0.71      0.83      0.77         6
        movement       0.75      0.60      0.67         5

     avg / total       0.73      0.73      0.72        11





Best params for RBF kernel: {'C': 1.0, 'gamma': 0.10000000000000001} with score 0.83333
--- test results for RBF kernel:
                  precision    recall  f1-score   support

household-chores       0.67      1.00      0.80         6
        movement       1.00      0.40      0.57         5

     avg / total       0.82      0.73      0.70        11



In [89]:
edu_move = hp.userRows(df_movecls, 2)
print(hp.countExamplesByActivity(edu_move))
hp.runFlow(edu_move)

activity
household-chores    40
movement            30
Name: user, dtype: int64
56 train examples and 14 test examples
Best params for linear kernel: {'C': 100.0} with score 0.70833
--- test results for linear kernel:
                  precision    recall  f1-score   support

household-chores       0.67      0.40      0.50        10
        movement       0.25      0.50      0.33         4

     avg / total       0.55      0.43      0.45        14





Best params for RBF kernel: {'C': 1.0, 'gamma': 0.01} with score 0.68750
--- test results for RBF kernel:
                  precision    recall  f1-score   support

household-chores       0.80      0.40      0.53        10
        movement       0.33      0.75      0.46         4

     avg / total       0.67      0.50      0.51        14



In [97]:
for activ in df_stillcls.activity.unique():
    print ('----------------------------------------------------------------')
    dfa = df_stillcls
    df_onevsall = hp.addPartition(df=dfa, includelist=[activ], pname=activ, labelIn=activ, labelOut='others')
    print(df_onevsall.groupby(activ).count()['user'])
    hp.runFlow(df_onevsall, labelName=activ)

----------------------------------------------------------------
eat
eat        53
others    325
Name: user, dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


302 train examples and 76 test examples
Best params for linear kernel: {'C': 0.01} with score 0.86885
--- test results for linear kernel:
             precision    recall  f1-score   support

        eat       0.00      0.00      0.00        14
     others       0.82      1.00      0.90        62

avg / total       0.67      0.82      0.73        76



  'precision', 'predicted', average, warn_for)


Best params for RBF kernel: {'C': 100.0, 'gamma': 0.01} with score 0.89344
--- test results for RBF kernel:
             precision    recall  f1-score   support

        eat       0.00      0.00      0.00        14
     others       0.81      0.97      0.88        62

avg / total       0.66      0.79      0.72        76

----------------------------------------------------------------
focused-active
focused-active     88
others            290
Name: user, dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


302 train examples and 76 test examples
Best params for linear kernel: {'C': 0.01} with score 0.75410
--- test results for linear kernel:
                precision    recall  f1-score   support

focused-active       0.00      0.00      0.00        15
        others       0.80      1.00      0.89        61

   avg / total       0.64      0.80      0.71        76



  'precision', 'predicted', average, warn_for)


Best params for RBF kernel: {'C': 100.0, 'gamma': 1.0} with score 0.79918
--- test results for RBF kernel:
                precision    recall  f1-score   support

focused-active       0.33      0.20      0.25        15
        others       0.82      0.90      0.86        61

   avg / total       0.72      0.76      0.74        76

----------------------------------------------------------------
focused-passive
focused-passive     73
others             305
Name: user, dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


302 train examples and 76 test examples
Best params for linear kernel: {'C': 0.01} with score 0.81967
--- test results for linear kernel:
                 precision    recall  f1-score   support

focused-passive       0.00      0.00      0.00        21
         others       0.72      1.00      0.84        55

    avg / total       0.52      0.72      0.61        76



  'precision', 'predicted', average, warn_for)


Best params for RBF kernel: {'C': 10.0, 'gamma': 1.0} with score 0.84016
--- test results for RBF kernel:
                 precision    recall  f1-score   support

focused-passive       0.64      0.33      0.44        21
         others       0.78      0.93      0.85        55

    avg / total       0.74      0.76      0.74        76

----------------------------------------------------------------
leisure-passive
leisure-passive     89
others             289
Name: user, dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


302 train examples and 76 test examples
Best params for linear kernel: {'C': 0.01} with score 0.75410
--- test results for linear kernel:
                 precision    recall  f1-score   support

leisure-passive       0.00      0.00      0.00        15
         others       0.80      1.00      0.89        61

    avg / total       0.64      0.80      0.71        76



  'precision', 'predicted', average, warn_for)


Best params for RBF kernel: {'C': 1.0, 'gamma': 1.0} with score 0.77869
--- test results for RBF kernel:
                 precision    recall  f1-score   support

leisure-passive       0.60      0.20      0.30        15
         others       0.83      0.97      0.89        61

    avg / total       0.79      0.82      0.78        76

----------------------------------------------------------------
sleep
others    303
sleep      75
Name: user, dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


302 train examples and 76 test examples
Best params for linear kernel: {'C': 1.0} with score 0.86066
--- test results for linear kernel:
             precision    recall  f1-score   support

     others       0.87      0.97      0.92        64
      sleep       0.60      0.25      0.35        12

avg / total       0.83      0.86      0.83        76

Best params for RBF kernel: {'C': 100.0, 'gamma': 0.01} with score 0.88934
--- test results for RBF kernel:
             precision    recall  f1-score   support

     others       0.90      0.94      0.92        64
      sleep       0.56      0.42      0.48        12

avg / total       0.84      0.86      0.85        76



### Split still activities by sleep vc others

In [105]:
hp.countExamplesByActivity(df_stillcls)
df_partsleep = hp.addPartition(df=df_stillcls, includelist=['sleep'], pname='sleep', labelIn='sleep', labelOut='wake')
print(df_partsleep.groupby('sleep').count()['user'])

sleep
sleep     75
wake     303
Name: user, dtype: int64


In [106]:
 hp.runFlow(df_partsleep, labelName='sleep')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


302 train examples and 76 test examples
Best params for linear kernel: {'C': 1.0} with score 0.85656
--- test results for linear kernel:
             precision    recall  f1-score   support

      sleep       0.73      0.38      0.50        21
       wake       0.80      0.95      0.87        55

avg / total       0.78      0.79      0.77        76

Best params for RBF kernel: {'C': 1.0, 'gamma': 0.10000000000000001} with score 0.88525
--- test results for RBF kernel:
             precision    recall  f1-score   support

      sleep       0.90      0.43      0.58        21
       wake       0.82      0.98      0.89        55

avg / total       0.84      0.83      0.81        76



In [109]:
ju_sleep = hp.userRows(df_partsleep, 0) 
hp.runFlow(ju_sleep, labelName='sleep')



92 train examples and 24 test examples
Best params for linear kernel: {'C': 1.0} with score 0.78947
--- test results for linear kernel:
             precision    recall  f1-score   support

      sleep       1.00      0.20      0.33         5
       wake       0.83      1.00      0.90        19

avg / total       0.86      0.83      0.79        24

Best params for RBF kernel: {'C': 10.0, 'gamma': 1.0} with score 0.89474
--- test results for RBF kernel:
             precision    recall  f1-score   support

      sleep       0.67      0.80      0.73         5
       wake       0.94      0.89      0.92        19

avg / total       0.89      0.88      0.88        24



In [108]:
edu_sleep = hp.userRows(df_partsleep, 2) 
hp.runFlow(edu_sleep, labelName='sleep')



158 train examples and 40 test examples
Best params for linear kernel: {'C': 0.10000000000000001} with score 0.96875
--- test results for linear kernel:
             precision    recall  f1-score   support

      sleep       1.00      0.75      0.86         8
       wake       0.94      1.00      0.97        32

avg / total       0.95      0.95      0.95        40

Best params for RBF kernel: {'C': 1.0, 'gamma': 0.01} with score 0.98438
--- test results for RBF kernel:
             precision    recall  f1-score   support

      sleep       1.00      0.75      0.86         8
       wake       0.94      1.00      0.97        32

avg / total       0.95      0.95      0.95        40



In [111]:
df_wake = hp.excludeActivities(df_stillcls,['sleep'] )
hp.countExamplesByActivity(df_wake)

activity
eat                53
focused-active     88
focused-passive    73
leisure-passive    89
Name: user, dtype: int64

In [112]:
hp.runFlow(df_wake)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


242 train examples and 61 test examples
Best params for linear kernel: {'C': 100.0} with score 0.46939
--- test results for linear kernel:
                 precision    recall  f1-score   support

            eat       0.67      0.38      0.48        16
 focused-active       0.39      0.64      0.49        14
focused-passive       0.39      0.54      0.45        13
leisure-passive       0.45      0.28      0.34        18

    avg / total       0.48      0.44      0.44        61

Best params for RBF kernel: {'C': 10.0, 'gamma': 0.01} with score 0.46939
--- test results for RBF kernel:
                 precision    recall  f1-score   support

            eat       0.33      0.06      0.11        16
 focused-active       0.50      0.71      0.59        14
focused-passive       0.38      0.62      0.47        13
leisure-passive       0.47      0.44      0.46        18

    avg / total       0.42      0.44      0.40        61



In [113]:
hp.runFlowByUser(df_wake)

  'precision', 'predicted', average, warn_for)


CLASSFIER FOR USER 0
67 train examples and 17 test examples
Best params for linear kernel: {'C': 1.0} with score 0.53571
--- test results for linear kernel:
                 precision    recall  f1-score   support

            eat       0.00      0.00      0.00         2
 focused-active       0.33      0.50      0.40         2
focused-passive       0.67      1.00      0.80         8
leisure-passive       1.00      0.40      0.57         5

    avg / total       0.65      0.65      0.59        17

Best params for RBF kernel: {'C': 10.0, 'gamma': 0.10000000000000001} with score 0.53571
--- test results for RBF kernel:
                 precision    recall  f1-score   support

            eat       1.00      1.00      1.00         2
 focused-active       0.00      0.00      0.00         2
focused-passive       0.67      1.00      0.80         8
leisure-passive       1.00      0.40      0.57         5

    avg / total       0.73      0.71      0.66        17

CLASSFIER FOR USER 1
51 train e

  'recall', 'true', average, warn_for)


Best params for RBF kernel: {'C': 1.0, 'gamma': 0.10000000000000001} with score 0.63636
--- test results for RBF kernel:
                 precision    recall  f1-score   support

            eat       0.00      0.00      0.00         0
 focused-active       0.71      1.00      0.83         5
leisure-passive       1.00      0.62      0.77         8

    avg / total       0.89      0.77      0.79        13

CLASSFIER FOR USER 2
124 train examples and 31 test examples


  'recall', 'true', average, warn_for)


Best params for linear kernel: {'C': 100.0} with score 0.53000
--- test results for linear kernel:
                 precision    recall  f1-score   support

            eat       0.44      0.50      0.47         8
 focused-active       0.44      0.57      0.50         7
focused-passive       0.80      0.40      0.53        10
leisure-passive       0.38      0.50      0.43         6

    avg / total       0.55      0.48      0.49        31

Best params for RBF kernel: {'C': 10.0, 'gamma': 0.10000000000000001} with score 0.44000
--- test results for RBF kernel:
                 precision    recall  f1-score   support

            eat       0.40      0.50      0.44         8
 focused-active       0.56      0.71      0.63         7
focused-passive       0.67      0.40      0.50        10
leisure-passive       0.17      0.17      0.17         6

    avg / total       0.48      0.45      0.45        31



### Split by passive vs active sitting activities

In [124]:
df_actlvl = hp.addPartition(df_wake, includelist=['focused-passive', 'leisure-passive'], pname='actlvl', labelIn='passive', labelOut='active')
print(df_passive.groupby('actlvl').count()['user'])

actlvl
active     141
passive    162
Name: user, dtype: int64


In [125]:
hp.runFlow(df_actlvl, labelName='actlvl')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


242 train examples and 61 test examples
Best params for linear kernel: {'C': 10.0} with score 0.60714
--- test results for linear kernel:
             precision    recall  f1-score   support

     active       0.52      0.44      0.48        25
    passive       0.65      0.72      0.68        36

avg / total       0.60      0.61      0.60        61

Best params for RBF kernel: {'C': 10.0, 'gamma': 0.10000000000000001} with score 0.64286
--- test results for RBF kernel:
             precision    recall  f1-score   support

     active       0.74      0.80      0.77        25
    passive       0.85      0.81      0.83        36

avg / total       0.81      0.80      0.80        61



In [126]:
hp.runFlowByUser(df_actlvl, labelName='actlvl')

  'precision', 'predicted', average, warn_for)


CLASSFIER FOR USER 0
67 train examples and 17 test examples
Best params for linear kernel: {'C': 0.10000000000000001} with score 0.66071
--- test results for linear kernel:
             precision    recall  f1-score   support

     active       0.00      0.00      0.00         6
    passive       0.65      1.00      0.79        11

avg / total       0.42      0.65      0.51        17

Best params for RBF kernel: {'C': 10.0, 'gamma': 0.01} with score 0.69643
--- test results for RBF kernel:
             precision    recall  f1-score   support

     active       0.00      0.00      0.00         6
    passive       0.65      1.00      0.79        11

avg / total       0.42      0.65      0.51        17

CLASSFIER FOR USER 1
51 train examples and 13 test examples
Best params for linear kernel: {'C': 0.10000000000000001} with score 0.72727
--- test results for linear kernel:
             precision    recall  f1-score   support

     active       0.60      0.43      0.50         7
    passiv

  'precision', 'predicted', average, warn_for)


Best params for RBF kernel: {'C': 10.0, 'gamma': 0.01} with score 0.75000
--- test results for RBF kernel:
             precision    recall  f1-score   support

     active       0.71      0.71      0.71         7
    passive       0.67      0.67      0.67         6

avg / total       0.69      0.69      0.69        13

CLASSFIER FOR USER 2
124 train examples and 31 test examples




Best params for linear kernel: {'C': 0.10000000000000001} with score 0.70000
--- test results for linear kernel:
             precision    recall  f1-score   support

     active       0.58      0.79      0.67        14
    passive       0.75      0.53      0.62        17

avg / total       0.67      0.65      0.64        31

Best params for RBF kernel: {'C': 100.0, 'gamma': 0.10000000000000001} with score 0.73000
--- test results for RBF kernel:
             precision    recall  f1-score   support

     active       0.60      0.64      0.62        14
    passive       0.69      0.65      0.67        17

avg / total       0.65      0.65      0.65        31



In [129]:
df_passive = hp.filterActivities(df_actlvl, ['leisure-passive', 'focused-passive'])
hp.countExamplesByActivity(df_passive)

activity
focused-passive    73
leisure-passive    89
Name: user, dtype: int64

In [132]:
hp.runFlow(df_passive)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


129 train examples and 33 test examples
Best params for linear kernel: {'C': 10.0} with score 0.59615
--- test results for linear kernel:
                 precision    recall  f1-score   support

focused-passive       0.73      0.50      0.59        16
leisure-passive       0.64      0.82      0.72        17

    avg / total       0.68      0.67      0.66        33

Best params for RBF kernel: {'C': 10.0, 'gamma': 1.0} with score 0.66346
--- test results for RBF kernel:
                 precision    recall  f1-score   support

focused-passive       0.78      0.44      0.56        16
leisure-passive       0.62      0.88      0.73        17

    avg / total       0.70      0.67      0.65        33



In [138]:
hp.runFlow(hp.userRows(df_passive, 0))



44 train examples and 12 test examples
Best params for linear kernel: {'C': 0.10000000000000001} with score 0.94444
--- test results for linear kernel:
                 precision    recall  f1-score   support

focused-passive       0.83      0.71      0.77         7
leisure-passive       0.67      0.80      0.73         5

    avg / total       0.76      0.75      0.75        12

Best params for RBF kernel: {'C': 10.0, 'gamma': 0.01} with score 0.83333
--- test results for RBF kernel:
                 precision    recall  f1-score   support

focused-passive       0.83      0.71      0.77         7
leisure-passive       0.67      0.80      0.73         5

    avg / total       0.76      0.75      0.75        12



In [144]:
hp.runFlow(hp.userRows(df_passive, 2))



63 train examples and 16 test examples
Best params for linear kernel: {'C': 10.0} with score 0.55769
--- test results for linear kernel:
                 precision    recall  f1-score   support

focused-passive       0.75      0.27      0.40        11
leisure-passive       0.33      0.80      0.47         5

    avg / total       0.62      0.44      0.42        16

Best params for RBF kernel: {'C': 1.0, 'gamma': 0.01} with score 0.63462
--- test results for RBF kernel:
                 precision    recall  f1-score   support

focused-passive       1.00      0.09      0.17        11
leisure-passive       0.33      1.00      0.50         5

    avg / total       0.79      0.38      0.27        16



In [131]:
df_active = hp.excludeActivities(df_actlvl, ['leisure-passive', 'focused-passive'])
hp.countExamplesByActivity(df_active)

activity
eat               53
focused-active    88
Name: user, dtype: int64

In [145]:
hp.runFlow(df_active)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


112 train examples and 29 test examples
Best params for linear kernel: {'C': 100.0} with score 0.89130
--- test results for linear kernel:
                precision    recall  f1-score   support

           eat       0.78      0.64      0.70        11
focused-active       0.80      0.89      0.84        18

   avg / total       0.79      0.79      0.79        29

Best params for RBF kernel: {'C': 10.0, 'gamma': 0.01} with score 0.83696
--- test results for RBF kernel:
                precision    recall  f1-score   support

           eat       0.78      0.64      0.70        11
focused-active       0.80      0.89      0.84        18

   avg / total       0.79      0.79      0.79        29



In [149]:
hp.runFlowByUser(df_active)

  'precision', 'predicted', average, warn_for)


CLASSFIER FOR USER 0
22 train examples and 6 test examples
Best params for linear kernel: {'C': 0.10000000000000001} with score 0.85000
--- test results for linear kernel:
                precision    recall  f1-score   support

           eat       0.00      0.00      0.00         1
focused-active       0.83      1.00      0.91         5

   avg / total       0.69      0.83      0.76         6

Best params for RBF kernel: {'C': 1.0, 'gamma': 0.10000000000000001} with score 0.90000
--- test results for RBF kernel:
                precision    recall  f1-score   support

           eat       0.00      0.00      0.00         1
focused-active       0.83      1.00      0.91         5

   avg / total       0.69      0.83      0.76         6

CLASSFIER FOR USER 1
29 train examples and 8 test examples
Best params for linear kernel: {'C': 0.10000000000000001} with score 0.87500
--- test results for linear kernel:
                precision    recall  f1-score   support

           eat       1.0

  'precision', 'predicted', average, warn_for)


Best params for RBF kernel: {'C': 1.0, 'gamma': 0.10000000000000001} with score 0.95833
--- test results for RBF kernel:
                precision    recall  f1-score   support

           eat       1.00      0.50      0.67         2
focused-active       0.86      1.00      0.92         6

   avg / total       0.89      0.88      0.86         8

CLASSFIER FOR USER 2
60 train examples and 16 test examples
Best params for linear kernel: {'C': 1.0} with score 0.72917
--- test results for linear kernel:
                precision    recall  f1-score   support

           eat       1.00      0.83      0.91         6
focused-active       0.91      1.00      0.95        10

   avg / total       0.94      0.94      0.94        16





Best params for RBF kernel: {'C': 10.0, 'gamma': 0.01} with score 0.79167
--- test results for RBF kernel:
                precision    recall  f1-score   support

           eat       0.75      0.50      0.60         6
focused-active       0.75      0.90      0.82        10

   avg / total       0.75      0.75      0.74        16

