In [3]:
%load_ext autoreload
%autoreload 2
%matplotlib inline  

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from datetime import datetime, timedelta
import parseIntervalFiles as pif
import parseActivityFiles as paf
#pun intended :)
import consolidateFiles as cf


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## ! Parse activity files and parse interval files are to be replaced with the corresponding database queries as soon as they are available 

## 0 - Pipeline configuration 
* set the input/output directories, user id and verbose level

In [4]:
    verbose = True
    
    DATA_PATH = "C:\\Users\\ju\\GDrive\\Projects\\HeRV\\Data\\" 
    # DATA_PATH = "/home/ju/GDrive/Projects/HeRV/Data/"
    
    RAW_PATH = DATA_PATH + "Raw"
    PRE_PATH = DATA_PATH + "PreProcessed"
    
    # duration (in seconds) to be cropped from the beginning of each second to account for stabilization and user adjustment
    crop = 90
    
    # duration (in seconds) of each fragment to be sent to analysis
    duration = 300
    
    # if any fragment has more than 'threshold' consecutive seconds with no beats, it will be discarded
    threshold = 3
    

## 1 - Extract sessions from raw data (intervals + annotations files)

### 1.1 - List activities with session start/stop datetime
Parses activities files and prints all errors found (activities without start or stop, invalid dates, etc)

In [5]:
sessions = []
for user in range(7):
    sessions.extend(paf.get_user_sessions(user, dirname=RAW_PATH))

reading act170929.csv ... 
orphan stop in: ['2017-09-29 00:19:00', 'stop', '', '', '', '', '']
reading act170930.csv ... 
reading act171001.csv ... 
reading act171003.csv ... 
orphan start in: ['2017-10-03 22:56:19', 'start', 'eat', 'sitting']
reading act171004.csv ... 
orphan start in: ['2017-10-04 13:31:16', 'start', 'movement', 'standing']
orphan start in: ['2017-10-04 14:59:24', 'start', 'rest-passive', 'sitting']
orphan start in: ['2017-10-04 15:14:23', 'start', 'movement', 'standing']
orphan start in: ['2017-10-04 17:05:36', 'start', 'rest-passive', 'sitting']
orphan start in: ['2017-10-04 23:30:33', 'start', 'sleep', 'lying down']
reading act171005.csv ... 
orphan start in: ['2017-10-05 11:08:11', 'start', 'focused-passive', 'sitting', '']
orphan start in: ['2017-10-05 12:54:00', 'start', 'eat', 'sitting', '']
orphan start in: ['2017-10-05 13:48:39', 'start', 'rest-active', 'sitting', '']
orphan start in: ['2017-10-05 13:57:37', 'start', 'movement', 'standing', '']
orphan start 

## 2 - Generate fragments from sessions

### 2.1 - breaks the sessions duration in fragments
Configurations:
* duration of each fragment in seconds;
* number of seconds to be discarded at the beginning of the session, accounting for user's stabilization and adjustment to posture and activity 

### 2.2 - extracts the intervals for each fragment
Retrieves from the heartbeat files all the intervals contained in each session's duration and adds them to the fragments objects (in memory)


In [6]:
frags = cf.fragment_sessions(sessions, duration, crop)

351 valid sessions out of 403 total (at least one full fragment of 300 seconds after discarding first 90 seconds)


## 3 - Quality assurance

### To be implemented


In [7]:
valid_frags = [f for f in frags if len(cf.beats_in_fragment(f, RAW_PATH)) > 0.8*duration ]

## 4 - Extract features

In [8]:
beats = cf.beatlist(cf.beats_in_fragment(valid_frags[100], RAW_PATH))
print(cf.calc_metrics(beats))

{'rmssd': 209.48207704122029, 'sdnn': 229.30832319814425, 'nn50': 181, 'pnn50': 68.045112781954884, 'mrri': 1146.2471910112361, 'mhr': 55.228795387239735, 'total_power': 26056.679011398453, 'vlf': 9182.1482395971598, 'lf': 3262.1247887490317, 'hf': 13612.40598305226, 'lf_hf': 0.23964351289628355, 'lfnu': 19.33164739727345, 'hfnu': 80.668352602726557}


In [9]:
print(len(valid_frags))
df = [cf.aggregate_data(f, RAW_PATH) for f in valid_frags]

2694


In [10]:
pdf = pd.DataFrame(df)
pdf.head()

Unnamed: 0,activity,hf,hfnu,lf,lf_hf,lfnu,mhr,mrri,nn50,order,pnn50,posture,rmssd,sdnn,sess,start,stop,total_power,user,vlf
0,focused-active,21238.893957,68.228214,9890.301159,0.465669,31.771786,77.231098,906.097633,229,0,67.952522,sitting,281.708095,306.165341,7,2017-09-29 12:26:58,2017-09-29 12:31:58,42825.808388,0,11696.613272
1,focused-active,37365.176678,71.525847,14874.92739,0.398096,28.474153,65.507411,1051.443299,222,1,76.551724,sitting,320.769608,309.37609,7,2017-09-29 12:31:58,2017-09-29 12:36:58,61754.10481,0,9514.000742
2,focused-active,22629.894321,71.555939,8995.565115,0.397508,28.444061,58.785343,1102.341727,227,2,81.949458,sitting,274.684736,255.319991,7,2017-09-29 12:36:58,2017-09-29 12:41:58,36541.161262,0,4915.701827
3,focused-active,11737.322775,72.035316,4556.522381,0.388208,27.964684,52.56337,1169.790076,218,3,83.524904,sitting,215.258332,174.052679,7,2017-09-29 12:41:58,2017-09-29 12:46:58,18007.710325,0,1713.865169
4,focused-active,15936.758688,65.094341,8545.797731,0.536232,34.905659,56.381556,1112.518116,231,4,84.0,sitting,241.468936,200.321641,7,2017-09-29 12:46:58,2017-09-29 12:51:58,27156.273794,0,2673.717375


In [11]:
pdf.describe()

Unnamed: 0,hf,hfnu,lf,lf_hf,lfnu,mhr,mrri,nn50,order,pnn50,rmssd,sdnn,sess,total_power,user,vlf
count,2694.0,2694.0,2694.0,2694.0,2694.0,2694.0,2694.0,2694.0,2694.0,2694.0,2694.0,2694.0,2694.0,2694.0,2694.0,2694.0
mean,23873.0,45.815472,412769.8,2.46594,54.184528,78.661621,819.019148,89.473645,18.99072,25.773847,112.382566,114.926574,164.193022,1475527.0,1.624722,1038884.0
std,182123.8,23.079335,8977828.0,4.822733,23.079335,18.109035,173.770072,71.387172,24.864286,22.689257,169.919067,122.765916,117.312307,45856380.0,2.009594,37079270.0
min,0.5751146,1.419545,2.567078,0.027289,2.656368,47.596407,334.157205,0.0,0.0,0.0,3.369205,13.625541,7.0,33.90064,0.0,25.59104
25%,372.7382,26.437869,586.396,0.563744,36.05093,66.002474,703.884813,30.0,2.0,7.290387,33.034351,53.88783,52.0,1761.79,0.0,505.9374
50%,917.2297,43.726407,1280.138,1.286951,56.273593,77.199486,793.543971,72.0,7.0,18.870437,53.744197,77.456919,135.0,3576.149,1.0,1088.38
75%,4668.043,63.94907,3023.524,2.782454,73.562131,87.023533,933.362043,136.0,29.0,39.789854,131.090808,138.225573,294.0,11902.74,2.0,2524.048
max,6139868.0,97.343632,426383800.0,69.445109,98.580455,180.176325,1269.037344,354.0,113.0,91.726619,2239.26324,1776.464653,350.0,2338062000.0,6.0,1905538000.0


In [12]:
pdf.to_excel(PRE_PATH+'/'+'df_'+str(crop)+'_'+str(duration)+'.xlsx')

In [13]:
pdf.loc[pdf.hf > 1000000]

Unnamed: 0,activity,hf,hfnu,lf,lf_hf,lfnu,mhr,mrri,nn50,order,pnn50,posture,rmssd,sdnn,sess,start,stop,total_power,user,vlf
18,leisure-passive,3481862.0,2.057892,165713700.0,47.593423,97.942108,114.527201,755.661728,196,9,48.514851,sitting,1667.724397,1226.359214,9,2017-09-29 20:05:28,2017-09-29 20:10:28,349167000.0,0,179971400.0
160,eat,1216702.0,19.404664,5053449.0,4.1534,80.595336,101.162743,965.132911,185,6,58.730159,sitting,1479.885082,1048.500217,24,2017-10-04 13:10:04,2017-10-04 13:15:04,7482753.0,0,1212602.0
161,eat,1176622.0,23.417895,3847834.0,3.270239,76.582105,117.007072,811.032172,220,7,59.139785,sitting,1199.36879,872.067106,24,2017-10-04 13:15:04,2017-10-04 13:20:04,6758502.0,0,1734047.0
162,eat,1149476.0,35.16126,2119679.0,1.844039,64.83874,107.797894,743.728155,184,8,44.768856,sitting,785.166743,625.05849,24,2017-10-04 13:20:04,2017-10-04 13:25:04,3772668.0,0,503513.2
173,eat,1619905.0,7.482271,20030000.0,12.364926,92.517729,116.145955,642.578616,158,19,33.193277,sitting,1024.735296,724.396072,24,2017-10-04 14:15:04,2017-10-04 14:20:04,29988870.0,0,8338966.0
225,focused-passive,1067715.0,26.58397,2948672.0,2.761665,73.41603,101.180399,843.151934,209,39,57.894737,sitting,972.770573,788.434939,25,2017-10-05 12:51:13,2017-10-05 12:56:13,5482440.0,0,1466053.0
835,household-chores,1106908.0,22.782275,3751727.0,3.389377,77.217725,103.029756,655.965739,75,6,16.094421,standing,711.716355,545.558334,67,2017-10-14 13:48:28,2017-10-14 13:53:28,6859219.0,0,2000584.0
1228,sleep,2474411.0,7.423667,30856970.0,12.470431,92.576333,79.669903,852.571031,168,49,46.927374,lie,460.809232,397.942859,116,2017-11-09 04:30:32,2017-11-09 04:35:32,46600440.0,0,13269070.0
1246,sleep,1122949.0,3.42505,31663390.0,28.196653,96.57495,77.857616,953.015528,247,67,76.94704,lie,1097.452557,767.879751,116,2017-11-09 06:00:32,2017-11-09 06:05:32,71959090.0,0,39172750.0
2287,rest-active,2674476.0,23.469649,8720989.0,3.260822,76.530351,100.843465,1095.157143,202,1,72.401434,sit,1711.040725,1207.022049,322,2017-12-26 19:53:12,2017-12-26 19:58:12,14753900.0,5,3358434.0
