In [3]:
%load_ext autoreload
%autoreload 2
%matplotlib inline  

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from datetime import datetime, timedelta
import parseIntervalFiles as pif
import parseActivityFiles as paf
#pun intended :)
import consolidateFiles as cf
import hervpd as hp

## ! Parse activity files and parse interval files are to be replaced with the corresponding database queries as soon as they are available 

## 0 - Pipeline configuration 
* set the input/output directories, user id and verbose level

In [4]:
    verbose = True
    
    RAW_PATH = "/home/ju/GDrive/Projects/HeRV/Data/Raw"
    PRE_PATH = "/home/ju/GDrive/Projects/HeRV/Data/PreProcessed"
    
    # duration (in seconds) to be cropped from the beginning of each second to account for stabilization and user adjustment
    crop = 90
    
    # duration (in seconds) of each fragment to be sent to analysis
    duration = 240
    
    # if any fragment has more than 'threshold' consecutive seconds with no beats, it will be discarded
    threshold = 3
    

## 1 - Extract sessions from raw data (intervals + annotations files)

### 1.1 - List activities with session start/stop datetime
Parses activities files and prints all errors found (activities without start or stop, invalid dates, etc)

In [5]:
sessions = []
for user in range(7):
    sessions.extend(paf.get_user_sessions(user, dirname=RAW_PATH))

reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171014.csv ... 
orphan stop in: ['2017-10-14 2:14:21', 'stop', '', '', '']
orphan start in: ['2017-10-14 11:58:31', 'start', 'focused-active', 'lying down', '']
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171102.csv ... 
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171016.csv ... 
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171006.csv ... 
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171104.csv ... 
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171029.csv ... 
orphan stop in: ['2017-10-29 15:27:59', 'stop', '', '']
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171028.csv ... 
orphan start in: ['2017-10-28 11:48:07', 'start', 'sleep', 'posture']
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171013.csv ... 
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171003.csv ... 
orphan start in: ['2017-10-03 22:56:19', 'start', 'eat', 'sitting']
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/

## 2 - Generate fragments from sessions

### 2.1 - breaks the sessions duration in fragments
Configurations:
* duration of each fragment in seconds;
* number of seconds to be discarded at the beginning of the session, accounting for user's stabilization and adjustment to posture and activity 

### 2.2 - extracts the intervals for each fragment
Retrieves from the heartbeat files all the intervals contained in each session's duration and adds them to the fragments objects (in memory)


In [6]:
frags = cf.fragment_sessions(sessions, duration, crop)

359 valid sessions out of 403 total (at least one full fragment of 240 seconds after discarding first 90 seconds)


## 3 - Quality assurance

### To be implemented


In [7]:
valid_frags = [f for f in frags if len(cf.beats_in_fragment(f, RAW_PATH)) > 0.8*duration ]

## 4 - Extract features

In [8]:
beats = cf.beatlist(cf.beats_in_fragment(frags[100], RAW_PATH))
print(cf.calc_metrics(beats))

{'rmssd': 246.14484218307996, 'sdnn': 206.23492014950645, 'nn50': 153, 'pnn50': 63.749999999999993, 'mrri': 1017.4066390041494, 'mhr': 61.104411490800594, 'total_power': 35554.907413156609, 'vlf': 4296.2253136113522, 'lf': 7408.2864799298786, 'hf': 23850.395619615378, 'lf_hf': 0.31061482576989419, 'lfnu': 23.699932250303196, 'hfnu': 76.300067749696794}


In [9]:
print(len(valid_frags))
df = [cf.aggregate_data(f, RAW_PATH) for f in valid_frags]

3458


In [10]:
pdf = pd.DataFrame(df)
pdf.head()

Unnamed: 0,activity,hf,hfnu,lf,lf_hf,lfnu,mhr,mrri,nn50,order,pnn50,posture,rmssd,sdnn,sess,start,stop,total_power,user,vlf
0,focused-active,1823.334077,59.335074,1249.610728,0.685344,40.664926,74.326961,814.723333,100,0,33.444816,sitting,71.430042,81.293515,0,2017-10-14 02:15:51,2017-10-14 02:19:51,4837.108747,0,1764.163943
1,focused-active,406.541792,35.592981,735.65472,1.809543,64.407019,80.461096,756.130031,50,1,15.52795,sitting,37.668586,85.910298,0,2017-10-14 02:19:51,2017-10-14 02:23:51,4094.987842,0,2952.791329
2,focused-active,411.097626,54.835149,338.599659,0.823648,45.164851,75.682842,794.876623,28,2,9.120521,sitting,32.848284,41.129149,0,2017-10-14 02:23:51,2017-10-14 02:27:51,962.12778,0,212.430494
3,focused-active,759.792939,54.374329,637.544659,0.839103,45.625671,74.279914,811.800664,60,3,20.0,sitting,43.69447,56.942077,0,2017-10-14 02:27:51,2017-10-14 02:31:51,2418.14088,0,1020.803282
4,focused-active,2873.56406,60.600346,1868.263744,0.650156,39.399654,73.89837,827.745763,114,4,38.77551,sitting,94.191584,119.360027,0,2017-10-14 02:31:51,2017-10-14 02:35:51,7640.073305,0,2898.245501


In [11]:
pdf.describe()

Unnamed: 0,hf,hfnu,lf,lf_hf,lfnu,mhr,mrri,nn50,order,pnn50,rmssd,sdnn,sess,total_power,user,vlf
count,3458.0,3458.0,3458.0,3458.0,3458.0,3458.0,3458.0,3458.0,3458.0,3458.0,3458.0,3458.0,3458.0,3458.0,3458.0,3458.0
mean,21941.03,46.028661,250484.7,2.426225,53.971339,78.695623,818.718038,71.373916,23.625795,25.645399,108.536266,111.67653,167.431463,609262.7,1.616252,336837.1
std,142329.7,23.350799,3178385.0,4.956553,23.350799,18.303238,174.06476,57.659829,30.903771,22.827238,166.26326,121.070635,119.20397,8755028.0,1.993899,5781785.0
min,0.9162266,0.735464,2.168192,0.020093,1.969717,47.660561,332.600543,0.0,0.0,0.0,3.585467,5.939201,0.0,9.926065,0.0,2.088845
25%,354.9671,26.249097,545.9688,0.547914,35.396942,65.681984,703.113393,23.0,2.0,7.007826,32.47775,52.154415,66.0,1671.809,0.0,463.6185
50%,868.4488,43.784665,1249.734,1.283905,56.215335,77.051017,791.828479,58.0,8.0,18.782946,52.661276,75.209879,141.0,3443.742,1.0,1032.988
75%,3792.908,64.603058,2808.493,2.809655,73.750903,87.10764,933.909561,109.0,35.0,39.05944,121.260763,132.338163,297.0,10780.53,2.0,2386.448
max,3900249.0,98.030283,135327700.0,134.968592,99.264536,180.987972,1266.731959,306.0,141.0,90.666667,2205.670373,1826.309221,358.0,330819300.0,6.0,203423600.0


In [12]:
pdf.to_excel(PRE_PATH+'/'+'df_'+str(crop)+'_'+str(duration)+'.xlsx')

In [14]:
pdf.loc[pdf.hf > 1000000]

Unnamed: 0,activity,hf,hfnu,lf,lf_hf,lfnu,mhr,mrri,nn50,order,pnn50,posture,rmssd,sdnn,sess,start,stop,total_power,user,vlf
686,sleep,3900249.0,7.500135,48102130.0,12.333093,92.499865,83.008439,818.715719,120,61,40.268456,lie,492.320941,421.35598,44,2017-11-09 04:29:32,2017-11-09 04:33:32,71373460.0,0,19371080.0
707,sleep,1667204.0,3.479682,46245330.0,27.738262,96.520318,81.977972,935.591603,199,84,76.245211,lie,1212.083406,846.274598,44,2017-11-09 06:01:32,2017-11-09 06:05:32,102063600.0,0,54151030.0
1375,eat,1469753.0,20.680239,5637286.0,3.835534,79.319761,107.597365,960.885827,160,8,63.241107,sitting,1504.242866,1074.29061,111,2017-10-04 13:12:04,2017-10-04 13:16:04,8263263.0,0,1156224.0
1376,eat,1112292.0,20.145217,4409076.0,3.963957,79.854783,117.409066,774.337621,167,9,53.870968,sitting,1091.163104,806.968152,111,2017-10-04 13:16:04,2017-10-04 13:20:04,7679258.0,0,2157890.0
1377,eat,1515957.0,35.04789,2809430.0,1.853239,64.95211,106.59497,785.496795,157,10,50.482315,sitting,888.607522,698.81259,111,2017-10-04 13:20:04,2017-10-04 13:24:04,4989375.0,0,663989.1
1456,focused-passive,1304437.0,29.769773,3077313.0,2.359112,70.230227,99.270507,908.841328,165,49,61.111111,sitting,1106.735287,888.700979,112,2017-10-05 12:52:13,2017-10-05 12:56:13,5840989.0,0,1459239.0
1551,leisure-passive,2664805.0,1.931123,135327700.0,50.783344,98.068877,118.899548,678.966006,150,11,42.613636,sitting,1584.140941,1145.269948,126,2017-09-29 20:04:28,2017-09-29 20:08:28,330819300.0,0,192826800.0
2287,not-recorded,1200922.0,4.482329,25591440.0,21.309828,95.517671,91.212019,828.928571,45,17,15.358362,sit,1036.693009,868.774555,235,2017-10-17 19:20:30,2017-10-17 19:24:30,41449710.0,2,14657340.0
2708,leisure-passive,1028684.0,3.044196,32762970.0,31.849392,96.955804,107.127956,651.116105,21,8,7.894737,lie,1155.731585,799.987282,316,2017-12-20 21:22:51,2017-12-20 21:26:51,59691070.0,4,25899420.0
2851,sleep,1233466.0,1.677667,72289180.0,58.60657,98.322333,94.480653,702.555556,12,79,3.934426,lie,1309.452236,938.945924,320,2017-12-21 05:45:49,2017-12-21 05:49:49,276946300.0,4,203423600.0
