In [9]:
%load_ext autoreload
%autoreload 2
%matplotlib inline  

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from datetime import datetime, timedelta
import parseIntervalFiles as pif
import parseActivityFiles as paf
#pun intended :)
import consolidateFiles as cf
import hervpd as hp

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## 0 - Pipeline configuration 
* set the input/output directories, user id and verbose level

In [10]:
    verbose = True
    
    RAW_PATH = "/home/ju/GDrive/Projects/HeRV/Data/Raw"
    PRE_PATH = "/home/ju/GDrive/Projects/HeRV/Data/PreProcessed"
    
    # duration (in seconds) to be cropped from the beginning of each second to account for stabilization and user adjustment
    crop = 90
    
    # duration (in seconds) of each fragment to be sent to analysis
    duration = 300
    
    # if any fragment has more than 'threshold' consecutive seconds with no beats, it will be discarded
    threshold = 3  
    

## 1 - Extract sessions from raw data (intervals + annotations files)

### 1.1 - List activities with session start/stop datetime
Parses activities files and prints all errors found (activities without start or stop, invalid dates, etc)

In [11]:
sessions = []
for user in range(7):
    sessions.extend(paf.get_user_sessions(user, dirname=RAW_PATH))

reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171014.csv ... 
orphan stop in: ['2017-10-14 2:14:21', 'stop', '', '', '']
orphan start in: ['2017-10-14 11:58:31', 'start', 'focused-active', 'lying down', '']
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171102.csv ... 
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171016.csv ... 
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171006.csv ... 
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171104.csv ... 
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171029.csv ... 
orphan stop in: ['2017-10-29 15:27:59', 'stop', '', '']
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171028.csv ... 
orphan start in: ['2017-10-28 11:48:07', 'start', 'sleep', 'posture']
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171013.csv ... 
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171003.csv ... 
orphan start in: ['2017-10-03 22:56:19', 'start', 'eat', 'sitting']
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/

## 2 - Generate fragments from sessions

### 2.1 - breaks the sessions duration in fragments
Configurations:
* duration of each fragment in seconds;
* number of seconds to be discarded at the beginning of the session, accounting for user's stabilization and adjustment to posture and activity 

### 2.2 - extracts the intervals for each fragment
Retrieves from the heartbeat files all the intervals contained in each session's duration and adds them to the fragments objects (in memory)


In [14]:
frags = cf.fragment_sessions(sessions, duration, crop)

329 valid sessions out of 393 total (at least one full fragment of 300 seconds after discarding first 90 seconds)


## 3 - Quality assurance

### To be implemented


In [18]:
valid_frags = [f for f in frags if len(cf.beats_in_fragment(f, RAW_PATH)) > 0.8*duration ]

## 4 - Extract features

In [17]:
beats = cf.beatlist(cf.beats_in_fragment(frags[100], RAW_PATH))
print(cf.calc_metrics(beats))

{'rmssd': 45.477477553512408, 'sdnn': 68.814471910244038, 'nn50': 16, 'pnn50': 2.572347266881029, 'mrri': 490.03049759229532, 'mhr': 124.57569788680118, 'total_power': 2102.5544791818802, 'vlf': 379.11538427722235, 'lf': 865.19516073512818, 'hf': 858.24393416952989, 'lf_hf': 1.0080993599708044, 'lfnu': 50.201667311196253, 'hfnu': 49.798332688803761}


In [21]:
print(len(valid_frags))
df = [cf.aggregate_data(f, RAW_PATH) for f in valid_frags]

2619


In [22]:
pdf = pd.DataFrame(df)
pdf.head()

Unnamed: 0,activity,hf,hfnu,lf,lf_hf,lfnu,mhr,mrri,nn50,order,pnn50,rmssd,sdnn,sess,start,stop,total_power,user,vlf
0,focused-active,1516.951144,60.039661,1009.63065,0.665566,39.960339,77.562161,786.159383,106,0,27.319588,64.259882,98.63235,0,2017-10-14 02:15:51,2017-10-14 02:20:51,4622.061918,0,2095.480124
1,focused-active,443.40188,47.028536,499.433927,1.126369,52.971464,76.262208,791.421189,60,1,15.544041,38.154071,59.187774,0,2017-10-14 02:20:51,2017-10-14 02:25:51,2122.588509,0,1179.752702
2,focused-active,556.500976,47.711706,609.881487,1.095922,52.288294,75.781084,795.275325,50,2,13.020833,38.068024,53.117372,0,2017-10-14 02:25:51,2017-10-14 02:30:51,1818.107217,0,651.724754
3,focused-active,2556.565157,60.438465,1673.464748,0.654575,39.561535,73.561917,828.932249,136,3,36.956522,86.832813,109.005825,0,2017-10-14 02:30:51,2017-10-14 02:35:51,6779.012728,0,2548.982823
4,focused-active,764.052485,41.364174,1083.083368,1.417551,58.635826,76.085082,794.917313,93,4,24.093264,47.554632,71.367134,0,2017-10-14 02:35:51,2017-10-14 02:40:51,3062.008804,0,1214.872951


In [23]:
pdf.describe()
#r = hp.runFlow(pdf)
# hp.scaleFeatures(pdf)

Unnamed: 0,hf,hfnu,lf,lf_hf,lfnu,mhr,mrri,nn50,order,pnn50,rmssd,sdnn,sess,total_power,user,vlf
count,2619.0,2619.0,2619.0,2619.0,2619.0,2619.0,2619.0,2619.0,2619.0,2619.0,2619.0,2619.0,2619.0,2619.0,2619.0,2619.0
mean,23725.05,45.385168,430984.5,2.519038,54.614832,80.316212,796.870991,86.230241,20.298205,24.345589,108.754669,111.438615,156.24475,1508606.0,1.623139,1053896.0
std,184009.4,23.096016,9142996.0,4.950977,23.096016,17.318303,159.679709,73.326207,26.599498,23.01523,161.561323,115.954166,112.334309,46556770.0,1.996276,37617670.0
min,0.5751146,1.419545,2.567078,0.027289,2.656368,47.596407,334.157205,0.0,0.0,0.0,3.369205,13.625541,0.0,33.90064,0.0,25.59104
25%,326.4626,26.230275,540.0063,0.578918,36.66548,69.039105,698.960529,25.0,2.0,5.745437,30.572893,52.117072,59.0,1581.253,0.0,477.2655
50%,845.5907,43.023321,1209.224,1.324321,56.976679,78.70597,776.071066,66.0,7.0,16.745283,51.264944,74.884477,126.0,3291.579,1.0,1019.906
75%,4492.113,63.33452,2840.573,2.812395,73.769725,87.632704,890.307246,135.0,30.0,37.317784,131.050687,134.858549,289.5,11565.31,3.0,2341.619
max,6139868.0,97.343632,426383800.0,69.445109,98.580455,180.176325,1269.037344,354.0,113.0,91.726619,2239.26324,1776.464653,328.0,2338062000.0,6.0,1905538000.0


In [29]:
pdf.describe()

Unnamed: 0,hf,hfnu,lf,lf_hf,lfnu,mhr,mrri,nn50,order,pnn50,rmssd,sdnn,sess,total_power,user,vlf
count,2619.0,2619.0,2619.0,2619.0,2619.0,2619.0,2619.0,2619.0,2619.0,2619.0,2619.0,2619.0,2619.0,2619.0,2619.0,2619.0
mean,23725.05,45.385168,430984.5,2.519038,54.614832,80.316212,796.870991,86.230241,20.298205,24.345589,108.754669,111.438615,156.24475,1508606.0,1.623139,1053896.0
std,184009.4,23.096016,9142996.0,4.950977,23.096016,17.318303,159.679709,73.326207,26.599498,23.01523,161.561323,115.954166,112.334309,46556770.0,1.996276,37617670.0
min,0.5751146,1.419545,2.567078,0.027289,2.656368,47.596407,334.157205,0.0,0.0,0.0,3.369205,13.625541,0.0,33.90064,0.0,25.59104
25%,326.4626,26.230275,540.0063,0.578918,36.66548,69.039105,698.960529,25.0,2.0,5.745437,30.572893,52.117072,59.0,1581.253,0.0,477.2655
50%,845.5907,43.023321,1209.224,1.324321,56.976679,78.70597,776.071066,66.0,7.0,16.745283,51.264944,74.884477,126.0,3291.579,1.0,1019.906
75%,4492.113,63.33452,2840.573,2.812395,73.769725,87.632704,890.307246,135.0,30.0,37.317784,131.050687,134.858549,289.5,11565.31,3.0,2341.619
max,6139868.0,97.343632,426383800.0,69.445109,98.580455,180.176325,1269.037344,354.0,113.0,91.726619,2239.26324,1776.464653,328.0,2338062000.0,6.0,1905538000.0


In [36]:
pdf.to_excel(PRE_PATH+'/'+'df.xlsx')

In [28]:
pdf.loc[pdf.hf > 1000000].loc[pdf.user != 0]

Unnamed: 0,activity,hf,hfnu,lf,lf_hf,lfnu,mhr,mrri,nn50,order,pnn50,rmssd,sdnn,sess,start,stop,total_power,user,vlf
2296,rest-passive,6139868.0,1.419545,426383800.0,69.445109,98.580455,77.14313,1001.333333,118,4,47.580645,2090.737771,1512.166049,311,2017-12-29 14:01:01,2017-12-29 14:06:01,2338062000.0,5,1905538000.0
2300,rest-active,2674476.0,23.469649,8720989.0,3.260822,76.530351,100.843465,1095.157143,202,1,72.401434,1711.040725,1207.022049,312,2017-12-26 19:53:12,2017-12-26 19:58:12,14753900.0,5,3358434.0
2609,focused-active,2544236.0,7.600287,30931290.0,12.157399,92.399713,85.562386,1202.257692,125,8,48.262548,2239.26324,1776.464653,327,2017-12-31 13:35:34,2017-12-31 13:40:34,55659530.0,6,22184000.0
