In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline  

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from datetime import datetime, timedelta
import parseIntervalFiles as pif
import parseActivityFiles as paf
#pun intended :)
import consolidateFiles as cf
import hervpd as hp

## ! Parse activity files and parse interval files are to be replaced with the corresponding database queries as soon as they are available 

## 0 - Pipeline configuration 
* set the input/output directories, user id and verbose level

In [12]:
    verbose = True
    
    RAW_PATH = "/home/ju/GDrive/Projects/HeRV/Data/Raw"
    PRE_PATH = "/home/ju/GDrive/Projects/HeRV/Data/PreProcessed"
    
    # duration (in seconds) to be cropped from the beginning of each second to account for stabilization and user adjustment
    crop = 60
    
    # duration (in seconds) of each fragment to be sent to analysis
    duration = 180
    
    # if any fragment has more than 'threshold' consecutive seconds with no beats, it will be discarded
    threshold = 3
    

## 1 - Extract sessions from raw data (intervals + annotations files)

### 1.1 - List activities with session start/stop datetime
Parses activities files and prints all errors found (activities without start or stop, invalid dates, etc)

In [13]:
sessions = []
for user in range(7):
    sessions.extend(paf.get_user_sessions(user, dirname=RAW_PATH))

reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171014.csv ... 
orphan stop in: ['2017-10-14 2:14:21', 'stop', '', '', '']
orphan start in: ['2017-10-14 11:58:31', 'start', 'focused-active', 'lying down', '']
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171102.csv ... 
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171016.csv ... 
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171006.csv ... 
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171104.csv ... 
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171029.csv ... 
orphan stop in: ['2017-10-29 15:27:59', 'stop', '', '']
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171028.csv ... 
orphan start in: ['2017-10-28 11:48:07', 'start', 'sleep', 'posture']
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171013.csv ... 
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/act171003.csv ... 
orphan start in: ['2017-10-03 22:56:19', 'start', 'eat', 'sitting']
reading /home/ju/GDrive/Projects/HeRV/Data/Raw/0/

## 2 - Generate fragments from sessions

### 2.1 - breaks the sessions duration in fragments
Configurations:
* duration of each fragment in seconds;
* number of seconds to be discarded at the beginning of the session, accounting for user's stabilization and adjustment to posture and activity 

### 2.2 - extracts the intervals for each fragment
Retrieves from the heartbeat files all the intervals contained in each session's duration and adds them to the fragments objects (in memory)


In [14]:
frags = cf.fragment_sessions(sessions, duration, crop)

375 valid sessions out of 403 total (at least one full fragment of 180 seconds after discarding first 60 seconds)


## 3 - Quality assurance

### To be implemented


In [15]:
valid_frags = [f for f in frags if len(cf.beats_in_fragment(f, RAW_PATH)) > 0.8*duration ]

## 4 - Extract features

In [16]:
beats = cf.beatlist(cf.beats_in_fragment(frags[100], RAW_PATH))
print(cf.calc_metrics(beats))

{'rmssd': 218.92893623474976, 'sdnn': 203.47467560795687, 'nn50': 119, 'pnn50': 65.384615384615387, 'mrri': 1000.4918032786885, 'mhr': 62.262355354429609, 'total_power': 30110.018916381548, 'vlf': 5879.4857674571085, 'lf': 6267.3302742616261, 'hf': 17963.202874662813, 'lf_hf': 0.34889826263119961, 'lfnu': 25.86542456883506, 'hfnu': 74.134575431164933}


In [17]:
print(len(valid_frags))
df = [cf.aggregate_data(f, RAW_PATH) for f in valid_frags]

4784


  .format(nperseg, input_length))


In [18]:
pdf = pd.DataFrame(df)
pdf.head()

Unnamed: 0,activity,hf,hfnu,lf,lf_hf,lfnu,mhr,mrri,nn50,order,pnn50,posture,rmssd,sdnn,sess,start,stop,total_power,user,vlf
0,focused-active,1668.530439,54.652258,1384.464089,0.829751,45.347742,74.124143,818.094595,79,0,35.746606,sitting,77.156185,87.58874,0,2017-10-14 02:15:21,2017-10-14 02:18:21,4930.699049,0,1877.704521
1,focused-active,506.995433,40.216776,753.661142,1.486525,59.783224,79.776808,764.1,45,1,18.828452,sitting,43.067962,92.206554,0,2017-10-14 02:18:21,2017-10-14 02:21:21,3881.831402,0,2621.174827
2,focused-active,456.661298,40.710702,665.061682,1.456357,59.289298,77.567047,779.217021,36,2,15.384615,sitting,36.738915,65.221252,0,2017-10-14 02:21:21,2017-10-14 02:24:21,3066.360825,0,1944.637846
3,focused-active,510.815863,54.442902,427.443934,0.836787,45.557098,75.717676,794.393939,22,3,9.565217,sitting,33.176144,40.07228,0,2017-10-14 02:24:21,2017-10-14 02:27:21,1129.321442,0,191.061645
4,focused-active,454.165233,43.726237,584.49087,1.286956,56.273763,75.170311,802.371179,37,4,16.22807,sitting,42.244121,57.753615,0,2017-10-14 02:27:21,2017-10-14 02:30:21,2032.259134,0,993.603031


In [19]:
pdf.describe()

Unnamed: 0,hf,hfnu,lf,lf_hf,lfnu,mhr,mrri,nn50,order,pnn50,rmssd,sdnn,sess,total_power,user,vlf
count,4784.0,4784.0,4784.0,4784.0,4784.0,4784.0,4784.0,4784.0,4784.0,4784.0,4784.0,4784.0,4784.0,4784.0,4784.0,4784.0
mean,19624.5,46.210656,224073.6,2.385178,53.789344,78.697601,818.354119,53.129599,31.345945,25.573598,106.240345,109.256308,178.389005,525125.9,1.626672,281427.8
std,137610.5,23.765067,2958263.0,4.805299,23.765067,18.302269,174.490506,43.747335,41.102185,23.038812,174.168116,129.073517,124.435724,8200052.0,1.990529,5421810.0
min,0.2485728,0.593058,1.196438,0.021476,2.102402,47.576344,332.088768,0.0,0.0,0.0,3.185629,6.925825,0.0,5.233558,0.0,0.9208101
25%,323.8285,25.907185,491.1111,0.527126,34.517505,65.925611,702.127395,17.0,3.0,6.709977,31.234275,49.962362,72.0,1549.992,0.0,408.181
50%,819.5273,44.032494,1173.229,1.27105,55.967506,77.110855,792.275862,42.5,11.0,18.702111,51.266067,72.427279,157.0,3289.597,1.0,926.8682
75%,3261.397,65.482495,2639.616,2.859933,74.092815,87.236184,933.249491,80.25,46.0,38.668508,112.563936,126.339773,314.0,9432.173,2.0,2172.812
max,4600261.0,97.897598,127831700.0,167.61756,99.406942,180.950681,1434.911565,256.0,189.0,92.261905,3427.741729,3073.438204,374.0,426165600.0,6.0,296223300.0


In [20]:
pdf.to_excel(PRE_PATH+'/'+'df_'+str(crop)+'_'+str(duration)+'.xlsx')

In [21]:
pdf.loc[pdf.hf > 1000000].loc[pdf.user != 0]

Unnamed: 0,activity,hf,hfnu,lf,lf_hf,lfnu,mhr,mrri,nn50,order,pnn50,posture,rmssd,sdnn,sess,start,stop,total_power,user,vlf
2574,sleep,1071578.0,4.138129,24823650.0,23.16551,95.861871,64.966828,1434.911565,71,39,48.630137,lie,3427.741729,3073.438204,175,2018-01-23 06:03:26,2018-01-23 06:06:26,59268680.0,1,33373460.0
3147,not-recorded,1867944.0,5.083076,34880350.0,18.673127,94.916924,94.734091,792.324561,37,23,16.299559,sit,1055.997881,819.472958,249,2017-10-17 19:21:00,2017-10-17 19:24:00,57377180.0,2,20628880.0
3747,leisure-passive,1607111.0,4.321611,35580660.0,22.139518,95.678389,109.16332,668.0,20,11,9.90099,lie,1326.200263,917.19428,332,2017-12-20 21:23:21,2017-12-20 21:26:21,71969620.0,4,34781850.0
3946,sleep,2110642.0,1.624291,127831700.0,60.565325,98.375709,103.012358,676.0,12,106,5.825243,lie,1593.311423,1143.990707,336,2017-12-21 05:47:19,2017-12-21 05:50:19,426165600.0,4,296223300.0
4114,rest-active,2287168.0,50.931584,2203500.0,0.963418,49.068416,104.768137,966.239362,124,2,66.31016,sit,1391.783919,968.000868,349,2017-12-26 19:53:42,2017-12-26 19:56:42,6514790.0,5,2024122.0
4115,rest-active,1080433.0,9.999296,9724657.0,9.000704,90.000704,98.155053,1057.296512,112,3,65.497076,sit,1693.951132,1291.378637,349,2017-12-26 19:56:42,2017-12-26 19:59:42,20350720.0,5,9545632.0
4122,rest-active,1240884.0,29.378319,2982925.0,2.403871,70.621681,100.454756,858.925234,126,11,59.15493,sit,929.631708,693.612185,349,2017-12-26 20:20:42,2017-12-26 20:23:42,4519560.0,5,295751.9
4123,rest-active,1049548.0,21.742789,3777561.0,3.599226,78.257211,95.857693,833.468182,129,12,58.90411,sit,727.486064,541.333373,349,2017-12-26 20:23:42,2017-12-26 20:26:42,5904974.0,5,1077865.0
4129,rest-active,2531306.0,16.820748,12517410.0,4.945039,83.179252,94.98053,953.71875,140,18,73.298429,sit,1419.239653,963.398405,349,2017-12-26 20:41:42,2017-12-26 20:44:42,21617780.0,5,6569068.0
4381,sleep,1785929.0,23.962839,5666982.0,3.173128,76.037161,89.753629,999.994536,118,118,64.835165,lie,1404.774803,977.344459,355,2017-12-27 05:55:01,2017-12-27 05:58:01,11268440.0,5,3815532.0
