In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline  

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.offline as pl
import plotly.graph_objs as go
pl.init_notebook_mode(connected=True)

from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import scale
from sklearn.model_selection import StratifiedShuffleSplit, GridSearchCV, train_test_split
from sklearn.metrics import classification_report, precision_recall_fscore_support

import hervpd as hp
import classif
import dataviz as dv

In [2]:
path = "C:\\Users\\ju\\GDrive\\Projects\\HeRV\\Data\\PreProcessed" 

features = ['mrri', 'sdnn', 'rmssd', 'pnn50', 'hfnu', 'lfnu', 'lf_hf']
onehotlabels = ['sleep', 'exercise', 'move', 'stress1', 'stress2', 'stress3', 'relax2']
crops = [90] 
durations = [600, 450, 300, 240, 180, 120, 60]

In [3]:
c1 = 10
c2 = 10
gamma = 0.1


clf_rbf = svm.SVC(kernel='rbf', cache_size=1000, C=c1, gamma=gamma)
clf_lin = svm.SVC(kernel='linear', cache_size=1000, C=10, gamma=gamma)
clf_rf = RandomForestClassifier(n_estimators=100, n_jobs=-1)

#classifiers = {'rf': clf_rf, 'rbf': clf_rbf, 'lin': clf_lin}
#classifiers = {'rf': clf_rf, 'rbf': clf_rbf}
classifiers = {'rf': clf_rf}

## Interuser classification

In [4]:
%time fullresult = classif.full_binary_classification(classifiers, durations, crops, features, onehotlabels, path=path)

C:\Users\ju\GDrive\Projects\HeRV\Data\PreProcessed\df_600_90_encoded.xlsx
1538 frags
1230 308



Data with input dtype int64 was converted to float64 by StandardScaler.



C:\Users\ju\GDrive\Projects\HeRV\Data\PreProcessed\df_450_90_encoded.xlsx
2087 frags
1669 418



Data with input dtype int64 was converted to float64 by StandardScaler.



C:\Users\ju\GDrive\Projects\HeRV\Data\PreProcessed\df_300_90_encoded.xlsx
3222 frags
2577 645



Data with input dtype int64 was converted to float64 by StandardScaler.



C:\Users\ju\GDrive\Projects\HeRV\Data\PreProcessed\df_240_90_encoded.xlsx
4108 frags
3286 822



Data with input dtype int64 was converted to float64 by StandardScaler.



C:\Users\ju\GDrive\Projects\HeRV\Data\PreProcessed\df_180_90_encoded.xlsx
5550 frags
4440 1110



Data with input dtype int64 was converted to float64 by StandardScaler.



C:\Users\ju\GDrive\Projects\HeRV\Data\PreProcessed\df_120_90_encoded.xlsx
8457 frags
6765 1692



Data with input dtype int64 was converted to float64 by StandardScaler.



C:\Users\ju\GDrive\Projects\HeRV\Data\PreProcessed\df_60_90_encoded.xlsx
17199 frags
13759 3440



Data with input dtype int64 was converted to float64 by StandardScaler.



Wall time: 47.2 s


In [5]:
dfi = pd.DataFrame(fullresult)
dfi.loc[dfi['classifier']=='rf'].describe()

Unnamed: 0,crop,duration,f1,fn,fp,precision,recall,score,tn,tp,user
count,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0
mean,90.0,278.571429,0.530736,134.510204,58.408163,0.65261,0.472852,0.847633,889.693878,122.387755,-1.0
std,0.0,178.009831,0.216007,150.582082,71.041337,0.149738,0.223015,0.075267,778.675938,169.269486,0.0
min,90.0,60.0,0.053097,1.0,1.0,0.333333,0.028846,0.740407,162.0,3.0,-1.0
25%,90.0,120.0,0.446259,37.0,13.0,0.563063,0.366834,0.781105,353.0,24.0,-1.0
50%,90.0,240.0,0.548936,87.0,35.0,0.628931,0.495833,0.845946,621.0,71.0,-1.0
75%,90.0,450.0,0.676768,164.0,67.0,0.8,0.615385,0.889294,1065.0,145.0,-1.0
max,90.0,600.0,0.885496,592.0,324.0,0.920635,0.852941,0.993506,3309.0,989.0,-1.0


In [None]:
dfi.to_excel('./resultsprelim.xlsx')
dfi

## Intrauser classification

In [None]:
usersresult = []

for user in range(2):
    usersresult.extend(classif.full_binary_classification(classifiers, durations, crops, features, onehotlabels, user=user, path=path))
    

In [None]:
dfu = pd.DataFrame(usersresult)
dfu.describe()

In [None]:
dfull = pd.concat([dfi, dfu])

In [None]:
dfull.to_excel(path + '\\results_encoded.xlsx')

## Examining classification

In [None]:
df = pd.read_excel(path + '\\results.xlsx')

### Examining one of the options (duration = 240, crop = 90, classifier = random forest)

In [None]:
dview = dfi.loc[(dfi['classifier'] == 'rf') & (dfi['crop'] == 90) &(dfi['duration']==240)]
dview

In [None]:
metrics = ['precision', 'recall', 'f1']

trace = go.Heatmap(z=np.array(dview[metrics]),
                   x=metrics,
                   y=dview['label'],
                   colorscale='Viridis',)
data=[trace]
pl.iplot(data, filename='labelled-heatmap')

In [None]:
metrics = ['tp', 'fp', 'fn'] 

trace = go.Heatmap(z=np.array(dview[metrics]),
                   x=metrics,
                   y=dview['label'],
                   colorscale='Viridis',)
data=[trace]
pl.iplot(data, filename='labelled-heatmap')

### Model selection - for various durations, crops and classifiers, which is the best?

In [None]:
dfn = df.loc[(df['classifier'] == 'rf') & df['label'].isin(['exercise', 'move', 'sleep', 'stress2'])]
dfn

In [None]:
dfi = dfi.loc[(dfi['classifier'] == 'rf') & dfi['label'].isin(['exercise', 'move', 'sleep', 'stress2', 'relax2'])]
dv.bargroup(dfi, 'crop', 'duration', 'label', 'f1')

In [None]:
dv.bargroup(dfull[dfull['user']==0], 'label', 'duration', 'classifier', 'f1')

### Examining per user results

In [None]:
dview = dfull.loc[(dfull['classifier'] == 'rf') & (dfull['crop'] == 90) &(dfull['duration']==240) & ( dfull['user'] == 0)]
dview

In [None]:
dfull[dfull['duration']==600]