In [None]:
import pandas as pd
from tasrif.processing_pipeline.custom import FlagEpochActivityLessThanOperator
from tasrif.processing_pipeline.custom import FlagDayIfValidEpochsSmallerThanOperator
from tasrif.processing_pipeline.custom import FlagDayIfValidEpochsLargerThanOperator
from tasrif.processing_pipeline.custom import FlagEpochNullColsOperator
from tasrif.processing_pipeline.custom import FlagDayIfNotEnoughConsecutiveDaysOperator
from tasrif.processing_pipeline.custom import ValidationReportOperator
from tasrif.processing_pipeline.custom import RemoveFlaggedDaysOperator

from tasrif.data_readers.my_heart_counts import HealthKitDataDataset
from tasrif.processing_pipeline.pandas import ConvertToDatetimeOperator
from tasrif.processing_pipeline.custom import IterateCsvOperator
from tasrif.processing_pipeline.custom import CreateFeatureOperator
from tasrif.processing_pipeline import ProcessingPipeline

df = pd.DataFrame({'id': [1, 2, 3], 'activity': [100, 3, 57]})

validator_pipeline = ProcessingPipeline([FlagEpochActivityLessThanOperator(activity_col='value', min_activity_threshold=5),
                                        FlagDayIfValidEpochsSmallerThanOperator(valid_minutes_per_day=5),
                                        FlagDayIfValidEpochsLargerThanOperator(max_invalid_minutes_per_day=5),
                                        FlagEpochNullColsOperator(col_list=['value']),
                                        ])


CSV_FOLDER = '/mnt/c/Development/projects/siha/HealthKitData_timeseries'
CSV_PIPELINE = ProcessingPipeline([ConvertToDatetimeOperator(feature_names=["startTime", "endTime"], utc=True)])

PIPELINE = ProcessingPipeline([CreateFeatureOperator(feature_name='file_name', feature_creator=lambda df: df['recordId'] + '.csv'),
                               IterateCsvOperator(folder_path=CSV_FOLDER, field='file_name', pipeline=CSV_PIPELINE)
                               ])

hkd = HealthKitDataDataset(mhc_folder='/mnt/c/Development/projects/siha', processing_pipeline=PIPELINE)
record, df = next(hkd.processed_df)

In [3]:
hkd.raw_df

Unnamed: 0,recordId,appVersion,phoneInfo,healthCode,createdOn,data.csv
0,99708dc7-e9b0-40f6-9433-82ddf568cfb4,"version 1.0.9, build 9",iPhone 6,629cba51-e751-45ad-b90b-e4d5e9319cac,1435635459000,30467194
1,3dee3619-a1fa-4fac-97b9-bac1bc0ca35a,"version 1.0.9, build 9",iPhone 6,12a38046-1512-409a-b3a1-6046e97e650e,1435637669000,30467238
2,4de74d06-bd1a-4e9a-99c8-b67cbcdf39b7,"version 1.0.9, build 9",iPhone 6 Plus,3c5d16b9-61c1-4149-9763-0f19e588ef9f,1435633317000,30467268
3,db52fe7d-835b-4ed5-b436-35f61bf3ca42,"version 1.0.9, build 9",iPhone 6,5aa79529-ef84-45e1-a4f3-804aa871fc9a,1435631417000,30467300
4,fe88a233-33b7-4736-b2de-c1b80dab0a0e,"version 1.0.9, build 9",iPhone 6 Plus,f20fd5dd-53e5-4cc8-89b1-3ede61c4b20c,1435639771000,30469709
...,...,...,...,...,...,...
116946,4b1836ed-3b00-41d9-8d81-f37a26f57cc3,"version 1.0.10, build 1",iPhone 6 Plus,0a6eb7f8-c05d-4119-bfe0-f0e233d9f747,1445960386000,32533021
116947,77fb969a-95b0-4c8c-886c-9b686574c689,"version 1.0.10, build 1",iPhone 5s (GSM),e4b83268-f547-4f8c-bde9-b8701c835851,1445963271000,32533022
116948,d123df8e-61d4-4597-b665-108ad8307100,"version 1.0.10, build 1",iPhone 5s (GSM),1c33a77b-8da7-4a98-a6c6-cc4f91d031c5,1445954693000,32533024
116949,d9a74bd2-935a-49e4-aff7-c7e6a10dd6b0,"version 1.0.10, build 1",iPhone 5 (GSM),ffe3d273-9da3-4cfa-b8a2-7c8e7c5e39c0,1445987018000,32533027


In [4]:
record

Pandas(Index=0, recordId='99708dc7-e9b0-40f6-9433-82ddf568cfb4', appVersion='version 1.0.9, build 9', phoneInfo='iPhone 6', healthCode='629cba51-e751-45ad-b90b-e4d5e9319cac', createdOn=1435635459000, _6=30467194, file_name='99708dc7-e9b0-40f6-9433-82ddf568cfb4.csv')

In [5]:
df

Unnamed: 0,startTime,endTime,type,value,unit,source,sourceIdentifier
0,2015-06-21 00:49:36+00:00,2015-06-21 00:54:41+00:00,HKQuantityTypeIdentifierDistanceWalkingRunning,84.45,m,phone,com.apple.health
1,2015-06-21 00:54:41+00:00,2015-06-21 00:56:11+00:00,HKQuantityTypeIdentifierDistanceWalkingRunning,14.22,m,phone,com.apple.health
2,2015-06-21 01:56:52+00:00,2015-06-21 02:02:52+00:00,HKQuantityTypeIdentifierDistanceWalkingRunning,6.72,m,phone,com.apple.health
3,2015-06-21 02:02:52+00:00,2015-06-21 02:04:40+00:00,HKQuantityTypeIdentifierDistanceWalkingRunning,24.14,m,phone,com.apple.health
4,2015-06-21 14:23:46+00:00,2015-06-21 14:29:46+00:00,HKQuantityTypeIdentifierDistanceWalkingRunning,2.94,m,phone,com.apple.health
...,...,...,...,...,...,...,...
297,2015-06-26 22:13:11+00:00,2015-06-26 22:18:15+00:00,HKQuantityTypeIdentifierDistanceWalkingRunning,14.16,m,phone,com.apple.health
298,2015-06-26 22:18:15+00:00,2015-06-26 22:23:03+00:00,HKQuantityTypeIdentifierDistanceWalkingRunning,127.15,m,phone,com.apple.health
299,2015-06-26 22:36:16+00:00,2015-06-26 22:42:16+00:00,HKQuantityTypeIdentifierDistanceWalkingRunning,1.40,m,phone,com.apple.health
300,2015-06-26 22:42:16+00:00,2015-06-26 22:45:26+00:00,HKQuantityTypeIdentifierDistanceWalkingRunning,80.40,m,phone,com.apple.health


In [6]:
validator_pipeline.process(df)[0]

Unnamed: 0,startTime,endTime,type,value,unit,source,sourceIdentifier,invalid_code,exp_day
0,2015-06-21 00:49:36+00:00,2015-06-21 00:54:41+00:00,HKQuantityTypeIdentifierDistanceWalkingRunning,84.45,m,phone,com.apple.health,512,0
1,2015-06-21 00:54:41+00:00,2015-06-21 00:56:11+00:00,HKQuantityTypeIdentifierDistanceWalkingRunning,14.22,m,phone,com.apple.health,512,0
2,2015-06-21 01:56:52+00:00,2015-06-21 02:02:52+00:00,HKQuantityTypeIdentifierDistanceWalkingRunning,6.72,m,phone,com.apple.health,512,0
3,2015-06-21 02:02:52+00:00,2015-06-21 02:04:40+00:00,HKQuantityTypeIdentifierDistanceWalkingRunning,24.14,m,phone,com.apple.health,512,0
4,2015-06-21 14:23:46+00:00,2015-06-21 14:29:46+00:00,HKQuantityTypeIdentifierDistanceWalkingRunning,2.94,m,phone,com.apple.health,513,0
...,...,...,...,...,...,...,...,...,...
297,2015-06-26 22:13:11+00:00,2015-06-26 22:18:15+00:00,HKQuantityTypeIdentifierDistanceWalkingRunning,14.16,m,phone,com.apple.health,512,5
298,2015-06-26 22:18:15+00:00,2015-06-26 22:23:03+00:00,HKQuantityTypeIdentifierDistanceWalkingRunning,127.15,m,phone,com.apple.health,512,5
299,2015-06-26 22:36:16+00:00,2015-06-26 22:42:16+00:00,HKQuantityTypeIdentifierDistanceWalkingRunning,1.40,m,phone,com.apple.health,513,5
300,2015-06-26 22:42:16+00:00,2015-06-26 22:45:26+00:00,HKQuantityTypeIdentifierDistanceWalkingRunning,80.40,m,phone,com.apple.health,512,5


In [7]:
RemoveFlaggedDaysOperator().process(df)[0]

Unnamed: 0,startTime,endTime,type,value,unit,source,sourceIdentifier,invalid_code,exp_day
