In [65]:
%load_ext autoreload
%autoreload 2
%matplotlib inline  

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from datetime import datetime, timedelta
import parseIntervalFiles as pif
import parseActivityFiles as paf
#pun intended :)
import consolidateFiles as cf
import hervpd as hp

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## 0 - Pipeline configuration 
* set the input/output directories, user id and verbose level

In [52]:
    verbose = True    
    user = 0
    
    RAW_DATA_PATH = "/home/ju/GDrive/Projects/HeRV/Data/Raw/%d"%user
    PRE_DATA_PATH = "/home/ju/GDrive/Projects/HeRV/Data/PreProcessed/%d"%user
    
    # duration (in seconds) to be cropped from the beginning of each second to account for stabilization and user adjustment
    crop = 90
    
    # duration (in seconds) of each fragment to be sent to analysis
    duration = 300
    
    # if any fragment has more than 'threshold' consecutive seconds with no beats, it will be discarded
    threshold = 3

## 1 - Extract sessions from raw data (intervals + annotations files)

### 1.1 - List activities with session start/stop datetime
Parses activities files and prints all errors found (activities without start or stop, invalid dates, etc)

In [53]:
sessions = paf.parseActivityFiles(dirname=RAW_DATA_PATH)

reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171014.csv ... 
orphan stop in: ['2017-10-14 2:14:21', 'stop', '', '', '']
orphan start in: ['2017-10-14 11:58:31', 'start', 'focused-active', 'lying down', '']
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171102.csv ... 
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171016.csv ... 
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171006.csv ... 
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171104.csv ... 
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171029.csv ... 
orphan stop in: ['2017-10-29 15:27:59', 'stop', '', '']
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171028.csv ... 
orphan start in: ['2017-10-28 11:48:07', 'start', 'sleep', 'posture']
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171013.csv ... 
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171003.csv ... 
orphan start in: ['2017-10-03 22:56:19', 'start', 'eat', 'sitting']
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/

## 2 - Generate fragments from sessions

### 2.1 - breaks the sessions duration in fragments
Configurations:
* duration of each fragment in seconds;
* number of seconds to be discarded at the beginning of the session, accounting for user's stabilization and adjustment to posture and activity 

### 2.2 - extracts the intervals for each fragment
Retrieves from the heartbeat files all the intervals contained in each session's duration and adds them to the fragments objects (in memory)


In [54]:
frags = cf.fragment_sessions(sessions, duration, crop)

122 valid sessions out of 157 total (at least one full fragment of 300 seconds after discarding first 90 seconds)


## 3 - Quality assurance

### To be implemented


In [55]:
valid_frags = [f for f in frags if len(cf.beats_in_fragment(f, RAW_DATA_PATH)) > 0.8*duration ]

## 4 - Extract features

In [56]:
beats = cf.beatlist(cf.beats_in_fragment(frags[100], RAW_DATA_PATH))
print(cf.calc_metrics(beats))

{'rmssd': 45.477477553512408, 'sdnn': 68.814471910244038, 'nn50': 16, 'pnn50': 2.572347266881029, 'mrri': 490.03049759229532, 'mhr': 124.57569788680118, 'total_power': 2102.5544791818802, 'vlf': 379.11538427722235, 'lf': 865.19516073512818, 'hf': 858.24393416952989, 'lf_hf': 1.0080993599708044, 'lfnu': 50.201667311196253, 'hfnu': 49.798332688803761}


In [60]:
print(len(valid_frags))
df = [cf.aggregate_data(f, RAW_DATA_PATH) for f in valid_frags]

1286


In [77]:
pdf = pd.DataFrame(df)
pdf.head()

Unnamed: 0,activity,hf,hfnu,lf,lf_hf,lfnu,mhr,mrri,nn50,order,pnn50,rmssd,sdnn,sess,start,stop,total_power,vlf
0,focused-active,1516.951144,60.039661,1009.63065,0.665566,39.960339,77.562161,786.159383,106,0,27.319588,64.259882,98.63235,0,2017-10-14 02:15:51,2017-10-14 02:20:51,4622.061918,2095.480124
1,focused-active,443.40188,47.028536,499.433927,1.126369,52.971464,76.262208,791.421189,60,1,15.544041,38.154071,59.187774,0,2017-10-14 02:20:51,2017-10-14 02:25:51,2122.588509,1179.752702
2,focused-active,556.500976,47.711706,609.881487,1.095922,52.288294,75.781084,795.275325,50,2,13.020833,38.068024,53.117372,0,2017-10-14 02:25:51,2017-10-14 02:30:51,1818.107217,651.724754
3,focused-active,2556.565157,60.438465,1673.464748,0.654575,39.561535,73.561917,828.932249,136,3,36.956522,86.832813,109.005825,0,2017-10-14 02:30:51,2017-10-14 02:35:51,6779.012728,2548.982823
4,focused-active,764.052485,41.364174,1083.083368,1.417551,58.635826,76.085082,794.917313,93,4,24.093264,47.554632,71.367134,0,2017-10-14 02:35:51,2017-10-14 02:40:51,3062.008804,1214.872951


In [75]:
#pdf.describe()
#r = hp.runFlow(pdf)
hp.scaleFeatures(pdf)



In [78]:
pdf.describe()

Unnamed: 0,hf,hfnu,lf,lf_hf,lfnu,mhr,mrri,nn50,order,pnn50,rmssd,sdnn,sess,total_power,vlf
count,1286.0,1286.0,1286.0,1286.0,1286.0,1286.0,1286.0,1286.0,1286.0,1286.0,1286.0,1286.0,1286.0,1286.0,1286.0
mean,25258.54,55.341049,288617.8,1.754342,44.658951,82.38012,789.116547,117.09098,24.825816,32.470789,132.776426,125.71908,56.260498,582177.2,268300.9
std,153995.9,23.850139,4889699.0,4.007776,23.850139,19.229308,171.501216,82.508843,27.996067,26.415424,140.785066,102.525861,35.567131,10239720.0,5261225.0
min,2.147458,1.456776,17.39058,0.027289,2.656368,49.448981,354.596065,0.0,0.0,0.0,3.369205,13.625541,0.0,110.8732,37.04196
25%,526.7165,37.248277,610.0922,0.332417,24.94843,67.332358,665.72362,40.0,3.0,8.747651,39.129113,57.077112,23.0,1842.039,519.0481
50%,2206.278,58.587815,1500.708,0.70684,41.412185,80.087278,764.9225,105.0,12.0,24.159042,89.425239,99.076177,58.0,5267.396,1204.442
75%,13583.43,75.05157,4183.391,1.684689,62.751723,92.346774,920.116568,199.0,40.0,55.49628,194.099499,171.846782,78.0,23302.35,2996.18
max,3481862.0,97.343632,165713700.0,67.64471,98.543224,169.7966,1240.951417,354.0,113.0,91.726619,1667.724397,1226.359214,121.0,349167000.0,179971400.0


In [80]:
pdf.loc[pdf.hf > 1000000]

Unnamed: 0,activity,hf,hfnu,lf,lf_hf,lfnu,mhr,mrri,nn50,order,pnn50,rmssd,sdnn,sess,start,stop,total_power,vlf
113,household-chores,1106908.0,22.782275,3751727.0,3.389377,77.217725,103.029756,655.965739,75,6,16.094421,711.716355,545.558334,9,2017-10-14 13:48:28,2017-10-14 13:53:28,6859219.0,2000584.0
537,sleep,2474411.0,7.423667,30856970.0,12.470431,92.576333,79.669903,852.571031,168,49,46.927374,460.809232,397.942859,42,2017-11-09 04:30:32,2017-11-09 04:35:32,46600440.0,13269070.0
555,sleep,1122949.0,3.42505,31663390.0,28.196653,96.57495,77.857616,953.015528,247,67,76.94704,1097.452557,767.879751,42,2017-11-09 06:00:32,2017-11-09 06:05:32,71959090.0,39172750.0
1085,eat,1216702.0,19.404664,5053449.0,4.1534,80.595336,101.162743,965.132911,185,6,58.730159,1479.885082,1048.500217,106,2017-10-04 13:10:04,2017-10-04 13:15:04,7482753.0,1212602.0
1086,eat,1176622.0,23.417895,3847834.0,3.270239,76.582105,117.007072,811.032172,220,7,59.139785,1199.36879,872.067106,106,2017-10-04 13:15:04,2017-10-04 13:20:04,6758502.0,1734047.0
1087,eat,1149476.0,35.16126,2119679.0,1.844039,64.83874,107.797894,743.728155,184,8,44.768856,785.166743,625.05849,106,2017-10-04 13:20:04,2017-10-04 13:25:04,3772668.0,503513.2
1098,eat,1619905.0,7.482271,20030000.0,12.364926,92.517729,116.145955,642.578616,158,19,33.193277,1024.735296,724.396072,106,2017-10-04 14:15:04,2017-10-04 14:20:04,29988870.0,8338966.0
1150,focused-passive,1067715.0,26.58397,2948672.0,2.761665,73.41603,101.180399,843.151934,209,39,57.894737,972.770573,788.434939,107,2017-10-05 12:51:13,2017-10-05 12:56:13,5482440.0,1466053.0
1220,leisure-passive,3481862.0,2.057892,165713700.0,47.593423,97.942108,114.527201,755.661728,196,9,48.514851,1667.724397,1226.359214,120,2017-09-29 20:05:28,2017-09-29 20:10:28,349167000.0,179971400.0
