In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline  

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from datetime import datetime, timedelta
import parseIntervalFiles as pif
import parseActivityFiles as paf
#pun intended :)
import consolidateFiles as cf


## ! Parse activity files and parse interval files are to be replaced with the corresponding database queries as soon as they are available 

## 0 - Pipeline configuration 
* set the input/output directories, user id and verbose level

In [2]:
    verbose = True
    
    DATA_PATH = "C:\\Users\\ju\\GDrive\\Projects\\HeRV\\Data\\" 
    # DATA_PATH = "/home/ju/GDrive/Projects/HeRV/Data/"
    
    RAW_PATH = DATA_PATH + "Raw"
    PRE_PATH = DATA_PATH + "PreProcessed"
    
    # duration (in seconds) to be cropped from the beginning of each second to account for stabilization and user adjustment
    crop = 90
    
    # duration (in seconds) of each fragment to be sent to analysis
    duration = 300
    
    # if any fragment has more than 'threshold' consecutive seconds with no beats, it will be discarded
    threshold = 3
    

## 1 - Extract sessions from raw data (intervals + annotations files)

### 1.1 - List activities with session start/stop datetime
Parses activities files and prints all errors found (activities without start or stop, invalid dates, etc)

In [3]:
sessions = []
for user in range(7):
    print ("------- parsing activity files for user ", user, " -------")
    user_sess = paf.get_user_sessions(user, dirname=RAW_PATH)
    print ("------- parsing interval files for user ", user, " -------")
    user_sess = cf.sessions_add_beats(sessions=user_sess, dirname=RAW_PATH+"\\"+str(user))
    sessions.extend(user_sess)

------- parsing activity files for user  0  -------
reading act170929.csv ... 
orphan stop in: ['2017-09-29 00:19:00', 'stop', '', '', '', '', '']
reading act170930.csv ... 
reading act171001.csv ... 
reading act171003.csv ... 
orphan start in: ['2017-10-03 22:56:19', 'start', 'eat', 'sitting']
reading act171004.csv ... 
reading act171005.csv ... 
orphan start in: ['2017-10-05 11:08:11', 'start', 'focused-passive', 'sitting', '']
orphan start in: ['2017-10-05 12:54:00', 'start', 'eat', 'sitting', '']
orphan start in: ['2017-10-05 13:48:39', 'start', 'rest-active', 'sitting', '']
orphan start in: ['2017-10-05 13:57:37', 'start', 'movement', 'standing', '']
orphan start in: ['2017-10-05 17:55:44', 'start', 'focused-active', 'sitting', '']
orphan start in: ['2017-10-05 21:53:45', 'start', 'rest-active', 'sitting', '']
orphan start in: ['2017-10-05 23:47:45', 'start', 'focused-active', 'sitting', 'sitting on bed, installing and using pandas']
reading act171006.csv ... 
reading act171010.cs

2017-11-07 16:15:48	25 min	2171 beats
2017-11-07 16:47:35	24 min	2170 beats
2017-11-07 17:28:35	4 min	457 beats
2017-11-07 19:04:00	40 min	3427 beats
2017-11-07 20:46:47	22 min	1964 beats
2017-11-07 21:11:50	3 min	303 beats
2017-11-07 21:31:08	37 min	3702 beats
2017-11-09 00:24:02	456 min	34105 beats
2017-11-09 09:40:49	25 min	2714 beats
2017-11-09 12:15:35	4 min	370 beats
2017-11-09 12:43:56	19 min	1592 beats
2017-11-09 13:18:38	2 min	274 beats
2017-11-09 13:23:55	2 min	254 beats
2017-11-09 13:40:41	26 min	2342 beats
2017-11-09 16:28:26	22 min	2594 beats
2017-11-09 17:23:16	57 min	0 beats
2018-01-23 17:44:09	16 min	0 beats
------- parsing activity files for user  1  -------
reading act170926.csv ... 
reading act171029.csv ... 
reading act171030.csv ... 
reading act171031.csv ... 
reading act171101.csv ... 
reading act171102.csv ... 
reading act171103.csv ... 
reading act171104.csv ... 
reading act171130.csv ... 
reading act180121.csv ... 
reading act180123.csv ... 
53 sessions extract

2017-10-31 08:58:00	23 min	1735 beats
2017-10-31 14:35:00	17 min	1466 beats
2017-10-31 15:00:00	3 min	289 beats
2017-11-02 12:38:00	15 min	1228 beats
2017-11-02 12:54:00	17 min	1414 beats
2017-11-02 13:12:00	28 min	2522 beats
2017-11-12 15:16:06	0 min	19 beats
2017-11-12 15:16:20	50 min	4024 beats
2017-11-12 16:06:45	0 min	65 beats
2017-11-12 16:08:06	12 min	647 beats
2017-11-12 16:20:41	12 min	89 beats
------- parsing activity files for user  3  -------
reading act171031.csv ... 
reading act171101.csv ... 
orphan stop in: ['2017-11-01 06:44:22', 'stop', '', '']
reading act171102.csv ... 
reading act171103.csv ... 
orphan stop in: ['2017-11-03 07:27:42', 'stop', '', '']
reading act171104.csv ... 
reading act171105.csv ... 
orphan stop in: ['2017-11-05 19:38:18', 'stop', '', '']
13 sessions extracted and 3 errors found
------- parsing interval files for user  3  -------
2017-10-31 06:12:08	1 min	75 beats
2017-10-31 06:36:43	0 min	1 beats
2017-10-31 06:37:06	30 min	4733 beats
2017-11-01 

In [4]:
df = pd.DataFrame(sessions)
df.sample(10)

Unnamed: 0,activity,duration,notes,posture,rr,start,stop,user
87,leisure-active,2242,,sitting,"[{'date': 2017-10-14 19:21:58, 'interval': 765...",2017-10-14 19:21:57,2017-10-14 19:59:19,0
153,household-chores,1516,,stand,"[{'date': 2017-11-09 09:40:50, 'interval': 572...",2017-11-09 09:40:49,2017-11-09 10:06:05,0
171,focused-active,1279,,sit,"[{'date': 2017-10-30 00:00:02, 'interval': 802...",2017-10-30 00:00:01,2017-10-30 00:21:20,1
147,focused-active,297,,sit,"[{'date': 2017-11-07 17:28:36, 'interval': 689...",2017-11-07 17:28:35,2017-11-07 17:33:32,0
234,sleep,1620,,lie,"[{'date': 2017-09-30 09:32:01, 'interval': 102...",2017-09-30 09:32:00,2017-09-30 09:59:00,2
298,leisure-passive,900,,lie,"[{'date': 2017-10-11 13:32:01, 'interval': 907...",2017-10-11 13:32:00,2017-10-11 13:47:00,2
247,focused-passive,900,,sit,"[{'date': 2017-10-06 10:00:01, 'interval': 792...",2017-10-06 10:00:00,2017-10-06 10:15:00,2
70,rest-active,2101,online shopping,sitting,"[{'date': 2017-10-13 12:50:57, 'interval': 653...",2017-10-13 12:50:56,2017-10-13 13:25:57,0
296,eat,240,,sit,"[{'date': 2017-10-11 12:45:01, 'interval': 744...",2017-10-11 12:45:00,2017-10-11 12:49:00,2
326,focused-active,900,,sit,"[{'date': 2017-10-30 13:35:01, 'interval': 739...",2017-10-30 13:35:00,2017-10-30 13:50:00,2


In [5]:
df.groupby('activity').count()['duration']

activity
eat                  43
exercise-high         5
exercise-low          1
focused-active      102
focused-passive      36
focused-passive       1
household-chores     28
leisure-active       10
leisure-passive      47
movement             40
not-recorded          4
rest-active          27
rest-passive         16
sleep                47
Name: duration, dtype: int64

## 4 - Extract features

In [None]:
beats = cf.beatlist(cf.beats_in_fragment(valid_frags[100], RAW_PATH))
print(cf.calc_metrics(beats))

In [None]:
print(len(valid_frags))
df = [cf.aggregate_data(f, RAW_PATH) for f in valid_frags]

In [None]:
pdf = pd.DataFrame(df)
pdf.head()

In [None]:
pdf.describe()

In [None]:
pdf.to_excel(PRE_PATH+'/'+'df_'+str(crop)+'_'+str(duration)+'.xlsx')

In [None]:
pdf.loc[pdf.hf > 1000000]