In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline  

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.offline as pl
pl.init_notebook_mode(connected=True)
import plotly.graph_objs as go

from datetime import datetime, timedelta
import parseIntervalFiles as pif
import parseActivityFiles as paf
#pun intended :)
import consolidateFiles as cf
import datacleaning as cl
import dataviz as dv

In [2]:
verbose = True

PATH = "C:\\Users\\ju\\GDrive\\Projects\\HeRV\\Data\\"
RAW = PATH + "Raw"

In [3]:
file = PATH + "PreProcessed\\sessions.xlsx"
df = pd.read_excel(file)
df.sample(5)

Unnamed: 0,activity,beatscount,duration,hf,hfnu,lf,lf_hf,lfnu,mhr,mrri,...,posture,removed_artifacts,rmssd,sdnn,start,stop,total_power,user,vlf,sess_id
365,eat,1370,1020,290.759649,27.785084,755.699854,2.599053,72.214916,80.131622,753.950365,...,sit,0,26.71017,64.779114,2017-10-26 20:41:00,2017-10-26 20:58:00,1913.218777,2,866.759273,365
99,focused-active,147,364,555.705629,63.191638,323.691776,0.582488,36.808362,73.706616,816.401361,...,sit,0,43.001752,44.723758,2017-10-29 11:58:35,2017-10-29 12:04:39,1543.657049,0,664.259643,99
129,focused-active,1964,1365,206.852945,28.427889,520.787939,2.517672,71.572111,84.806782,711.248982,...,sit,0,25.072295,51.971333,2017-11-07 20:46:47,2017-11-07 21:09:32,1443.922259,0,716.281375,129
427,rest-passive,897,727,1071.803506,61.211322,679.185466,0.633685,38.788678,72.806069,828.516165,...,sit,0,65.660671,59.343338,2017-12-27 16:35:46,2017-12-27 16:47:53,2205.909947,5,454.920975,427
190,movement,2153,1184,10572.968418,62.464387,6353.425912,0.600912,37.535613,118.484194,539.089178,...,stand,71,166.900248,158.05209,2018-02-20 18:49:20,2018-02-20 19:09:04,18876.671487,0,1950.277157,190


In [4]:
len(df)

450

## Plotting random examples of RR time series for each type of activity

In [7]:
for act in df['activity'].unique():
    v = df[df['activity'] == act].sample(1)
    print('---------------- ', act, ' ---------------')
    for i in range(1):
        print('USER: ', v.iloc[i]['user'])
        dv.plot_sess_rr(v.iloc[i], RAW, act)

----------------  focused-active  ---------------
USER:  0


----------------  leisure  ---------------
USER:  0


----------------  eat  ---------------
USER:  2


----------------  movement  ---------------
USER:  2


----------------  sleep  ---------------
USER:  0


----------------  rest-passive  ---------------
USER:  1


----------------  rest-active  ---------------
USER:  6


----------------  household-chores  ---------------
USER:  0


----------------  focused-passive  ---------------
USER:  2


----------------  exercise  ---------------
USER:  1


### Removing outliers from frequency domain

In [6]:
dfc = df[df['hf'] < 7000]
print(len(df) - len(dfc))
print(len(dfc))

40
410


In [7]:
dfc = dfc[dfc['lf'] < 7000]
print(len(dfc))

409


## Plotting features summary for each type of session
---

In [8]:
for user in range(3):    
    dfu = dfc[dfc['user'] == user]
    print('---------------------------------------- ', user, ' ----------------------------------------')
    for feat in cl.features_all:
        dv.boxplot_compare(dfu, feat, groupby='activity', min_examples=2)

----------------------------------------  0  ----------------------------------------


----------------------------------------  1  ----------------------------------------


----------------------------------------  2  ----------------------------------------


## Plotting features summary for each user
---

In [23]:
ulist = [0,1,2,4,5,6]
alist = ['leisure','focused-passive', 'focused-active','rest-passive','rest-active']
dfv = dfc[(dfc.activity.isin(alist))]
for feat in cl.features_all:
    dv.boxplot_compare(dfv, feat, groupby='user', min_examples=2)

## Assessing how features evolve within a session
---

### Load sessions fragmented in 60 seconds

In [8]:
fdf = pd.read_excel('../data/fragments/df_60_30.xlsx')

In [9]:
fdf.describe()

Unnamed: 0,beatcount,hf,hfnu,lf,lf_hf,lfnu,mhr,mrri,nn50,order,pnn50,rmssd,sdnn,sess,total_power,user,vlf
count,19233.0,19233.0,19233.0,19233.0,19233.0,19233.0,19233.0,19233.0,19233.0,19233.0,19233.0,19233.0,19233.0,19233.0,19233.0,19233.0,19233.0
mean,80.363334,1773.941653,49.715246,1531.153352,2.214625,50.284754,80.330579,793.353147,15.412052,90.554724,22.114158,61.525357,70.406003,213.207768,4763.583921,1.298393,1458.488916
std,20.802385,2950.548394,25.388744,2840.047921,3.537051,25.388744,19.158152,168.537135,13.509879,113.640431,20.560198,52.131322,45.708567,137.217487,6895.635161,1.902666,3183.156723
min,37.0,0.077152,0.749662,0.173046,0.005172,0.514513,43.512674,327.907104,0.0,0.0,0.0,1.424952,2.100095,0.0,0.980344,0.0,0.0
25%,68.0,225.169651,27.548188,217.729426,0.399618,28.551939,68.75127,694.717647,4.0,12.0,4.878049,26.782073,38.349954,97.0,962.270918,0.0,180.706275
50%,77.0,580.018943,49.822851,596.142299,1.007111,50.177149,77.08786,785.960526,12.0,36.0,16.438356,42.529168,56.484266,188.0,2101.509697,0.0,514.230086
75%,87.0,1604.705984,71.448061,1579.597222,2.630003,72.451812,87.477389,882.623188,24.0,132.0,34.375,79.151921,89.612703,349.0,5157.723253,2.0,1428.327446
max,186.0,14976.717492,99.485487,38797.808704,132.393456,99.250338,183.052548,1382.954545,100.0,571.0,94.444444,431.483256,328.385002,446.0,70185.639553,6.0,61682.58901


### Helper to choose an activity

In [10]:
df.loc[(df['activity']=='movement') & (df['user']== 1)][['user', 'activity', 'start', 'duration', 'beatscount']].sort_values(by='duration', ascending=False)

Unnamed: 0,user,activity,start,duration,beatscount
238,1,movement,2017-11-02 19:49:37,2178,3614
235,1,movement,2017-11-01 17:02:13,2072,3733
255,1,movement,2017-11-30 15:02:00,942,880
266,1,movement,2018-05-08 10:10:50,905,1961
257,1,movement,2017-11-30 15:50:40,679,994


### Exercise
20 min switch exercise type (aerobic -> strength)

In [11]:
layout = go.Layout( yaxis=dict( title='t (minutos)'))

In [12]:
pp = fdf.loc[fdf['sess'] == 207].sort_values(by=['order'], ascending=True)
pl.iplot(dv.trace_sess_fragments(pp,['rmssd', 'mhr', 'sdnn', 'pnn50'], filter=3), layout)

### Movement

In [13]:
pp = fdf.loc[fdf['sess'] == 198].sort_values(by=['order'], ascending=True)
pl.iplot(dv.trace_sess_fragments(pp,['rmssd', 'mhr', 'sdnn', 'pnn50'], filter=3))

### Sleeping

In [14]:
pp = fdf.loc[fdf['sess'] == 188].sort_values(by=['order'], ascending=True)
pl.iplot(dv.trace_sess_fragments(pp,['rmssd', 'mhr', 'sdnn', 'pnn50'], filter=5))

### A long session of lectures

In [15]:
pp = fdf.loc[fdf['sess'] == 31].sort_values(by=['order'], ascending=True)
pl.iplot(dv.trace_sess_fragments(pp,['rmssd', 'mhr', 'sdnn', 'pnn50'], filter=3))

### A single lecture
148 - lose focus after an hour <br>
107 - keep focus

In [16]:
pp = fdf.loc[fdf['sess'] == 107].sort_values(by=['order'], ascending=True)
pl.iplot(dv.trace_sess_fragments(pp,['rmssd', 'mhr', 'sdnn', 'pnn50'], filter=3))

### Work session
seems like a pomodoro break there :)

In [17]:
pp = fdf.loc[fdf['sess'] == 230].sort_values(by=['order'], ascending=True)
pl.iplot(dv.trace_sess_fragments(pp,['rmssd', 'mhr', 'sdnn', 'pnn50'], filter=3))

In [18]:
df.describe()

Unnamed: 0,beatscount,duration,hf,hfnu,lf,lf_hf,lfnu,mhr,mrri,nn50,pnn50,removed_artifacts,rmssd,sdnn,total_power,user,vlf,sess_id
count,450.0,450.0,450.0,450.0,450.0,450.0,450.0,450.0,450.0,450.0,450.0,450.0,450.0,450.0,450.0,450.0,450.0,450.0
mean,3925.582222,3126.822222,2067.095548,41.767744,1693.62125,1.95521,58.232256,87.261169,730.931761,932.093333,17.41498,24.2,65.040535,91.059682,5138.145294,1.28,1377.428496,224.5
std,6502.56499,5742.013922,3777.616059,17.165852,1691.491027,1.608758,17.165852,19.028288,142.556068,2628.950661,15.433927,63.972711,50.889449,43.606087,6162.382931,1.592521,1240.472581,130.048068
min,105.0,88.0,13.635122,8.241489,36.995074,0.172079,14.681505,54.190501,369.893825,0.0,0.0,0.0,5.402758,21.083352,150.076646,0.0,52.162541,0.0
25%,1112.0,900.0,370.393323,28.610812,673.84568,0.818736,45.016759,74.684912,642.513222,82.0,5.880139,0.0,32.713546,61.912978,1745.794584,0.0,570.73961,112.25
50%,1789.0,1428.0,719.166158,39.474339,1216.947907,1.533292,60.525661,83.176087,733.546638,242.5,13.670047,1.0,48.330213,78.514148,3205.454764,1.0,1022.983694,224.5
75%,3771.75,2458.0,1606.190331,54.983241,1974.461539,2.495209,71.389188,95.142772,817.104119,530.5,24.888292,14.0,76.322143,108.261929,5268.507993,2.0,1713.661655,336.75
max,46856.0,34360.0,22414.537885,85.318495,12243.363436,11.13373,91.758511,164.397688,1144.581425,20895.0,81.780538,591.0,290.138931,253.78725,36726.950587,6.0,7350.357547,449.0


In [19]:
df.loc[df['activity'] == 'leisure-active']

Unnamed: 0,activity,beatscount,duration,hf,hfnu,lf,lf_hf,lfnu,mhr,mrri,...,posture,removed_artifacts,rmssd,sdnn,start,stop,total_power,user,vlf,sess_id


In [20]:
pp = fdf.loc[fdf['sess'] == 440].sort_values(by=['order'], ascending=True)
pl.iplot(dv.trace_sess_fragments(pp,['rmssd', 'mhr', 'sdnn', 'pnn50'], filter=3))