# Apple Health POC

[End to End ETL Pipeline](https://medium.com/@ericfflynn/a-cloud-based-etl-pipeline-apple-health-data-to-mysql-48391576ce8e)

[Health Auto Export Docs](https://github.com/Lybron/health-auto-export)

[Streamlit Visual Example](https://github.com/ericfflynn/health-app/blob/main/Home.py)

[Elasticsearch Visual Example](https://github.com/markwk/qs_ledger/tree/master/apple_health)

[Python Apple Health Package](https://github.com/fedecalendino/apple-health)

This PoC will outline collecting data from Apple Health and preprocessing it for visualization. The current process will handle processing the `export.xml` file from Apple Health (retrieved from IPhone). Once we can confirm that the data is extracted, transformed and loaded again properly we will move on to the next step. I want to do some analysis into the Apple Health Kit to see all of the potential XML tags that can be extracted. I want to build my own data pipeline to process the data.


1. Config

2. Extract data

3. Transform/Preprocess Data

4. Load data (as parquet)

5. Build weekly calendar workout

## 1. Config

In [1]:
ROOT_DIR = '../../data/workout/'
import datetime
import pandas as pd
from health import HealthData
from consts import *
import xmltodict
import re

pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', 200)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

def _parse_float(value, default=None):
    if value is None:
        return default
    
    if value in HK_CONSTANTS:
        return HK_CONSTANTS[value]

    try:
        return float(value)
    except (ValueError, TypeError):
        return default
    
def _parse_date(value):
    if value is None:
        return None
    if type(value) == datetime.datetime:
        return value
    return pd.Timestamp(value).to_pydatetime()

def _clean_string(s):
    if isinstance(s, str):
        return re.sub("[\W_]+",'',s).upper().replace('Â','')
    else:
        return s
    
def _parse_source_id(source_name):
    source_id = _clean_string(source_name)
    return source_id

def _parse_device_string(device_string):
    # Split on commas
    components = device_string.split(',')

    # Initialize the dictionary to store key-value pairs
    device_info = {}
    
    # Initialize variables to hold the key and value
    current_key = None
    current_value = []

    # Process each component
    for component in components:
        # Split on colon to separate key and value
        if ':' in component:
            if current_key is not None:
                # Join the current value parts and store in the dictionary
                device_info[current_key.strip()] = '.'.join(current_value).strip()
            # Split into key and value parts
            key, value = component.split(':', 1)
            current_key = key.replace('&lt;','').replace('>','').replace('<','')
            current_value = [value]
        else:
            # Append the current component to the value list
            current_value.append(component)
    
    # Store the last key-value pair
    if current_key is not None:
        device_info[current_key.strip()] = ':'.join(current_value).strip()

    return device_info

TYPE = "@type"
SOURCE_NAME = "@sourceName"
SOURCE_VERSION = "@sourceVersion"
DEVICE = "@device"
CREATION_DATE = "@creationDate"
START_DATE = "@startDate"
END_DATE = "@endDate"
DATE = '@date'
UNIT = "@unit"
BPM = "@bpm"
TIME = "@time"

KEY = "@key"
VALUE = "@value"
MIN='@minimum'
MAX='@maximum'
AVG='@average'
SUM='@sum'



## 2. Extract data

**Record**


**Activity Summary**


**Workout**


**Config**

In [2]:
file = ROOT_DIR + 'export.xml'

with open(file) as file:
    xml = xmltodict.parse(file.read())
    data = xml["HealthData"]

In [11]:
file = ROOT_DIR + 'export.xml'

class HealthKitData:
    '''
    XML HealthKit data. All data is sorted by date low-to-high
    '''
    def __init__(self, file):
        if type(file) == dict:
            self.data = file
        else:
            self.file = file
            self.config = {}
            self.data = None
            with open(file) as file:
                xml = xmltodict.parse(file.read())
                self.data = xml["HealthData"]
        self.config = self._get_config()
        self.activity_summaries = self._get_activity_summaries()
        self.workouts = self._get_workouts()
        self.records = self._get_records()

    def get_workouts(self, workout_type=None, source_type=None, start_date=None, end_date=None):
        start_date = _parse_date(start_date)
        end_date = _parse_date(end_date)
        for w in self.workouts:
            if workout_type is not None and w['workout_type'] != workout_type:
                continue
            if source_type is not None and self.config['sources'][w['source_id']] != source_type:
                continue
            if start_date is not None and _parse_date(w['start_date_str']) < start_date:
                continue
            if end_date and _parse_date(w['start_date_str']) > end_date:
                continue
            yield w

    def get_records(self, record_type=None, source_type=None, start_date=None, end_date=None):
        for w in self.records:
            if record_type is not None and w['workout_type'] != record_type:
                continue
            if source_type is not None and self.config['sources'][w['source_id']] != source_type:
                continue
            if start_date is not None and _parse_date(w['start_date_str']) < start_date:
                continue
            if end_date and _parse_date(w['start_date_str']) > end_date:
                continue
            yield w

    def set_sources(self, sources):
        self.config['sources'] = sources

    def _get_records(self):
        """
        Parse Records from Payload
        Return a dict
        """
        sources = {}

        records = self.data['Record']#self.data.pop('Records', {})
        out = []
        for r in records:
            rec = Record(**r)
            sources.update(self._parse_source(r))
            out.append(rec.__dict__)

        self.set_sources(sources)
        return out
    
    def _parse_source(self, data):
        """
        Parse Sources from Payload
        Return a dict
        """
        source_name = data.get(SOURCE_NAME)

        source_type = "UNKNOWN"
        if 'WATCH' in source_name.upper():
            source_type = 'WATCH'
        elif 'RENPHO' in source_name.upper():
            source_type = 'SCALE'
        elif 'PHONE' in source_name.upper():
            source_type = 'PHONE'
        elif 'MYFITNESSPAL' in source_name.upper():
            source_type = 'APP'
        elif 'HEALTH' in source_name.upper():
            source_type = 'APP' # manually entered value into health app
        elif 'STRONG' in source_name.upper():
            source_type = 'APP'
        elif 'SLEEP' in source_name.upper():
            source_type = 'APP'
            
        source_version = data.get(SOURCE_VERSION, None)
        source_id = _parse_source_id(source_name)
        device = data.get(DEVICE,None)
        source = {
            "source_name":source_name,
            "source_version": source_version,
            "source_type": source_type,
        }
        if device is not None:
            source = {
                **source,
                **_parse_device_string(device)
            }
            
        return {source_id: source}
        
        
    def _get_config(self):
        """
        Parse Config from Payload from Me, ExportDate and Unique Devices found in Records
        Return a dict
        """
        me = self.data['Me']#self.data.pop('Me', {})
        export_date = self.data['ExportDate']#self.data.pop('ExportDate', {})
        dob = datetime.datetime.strptime(me.get(HK_ME_DATE_OF_BIRTH), "%Y-%m-%d")
        
        return {
            'date_of_birth': dob,
            'age': (datetime.datetime.now() - dob).days // 365,
            'biological_sex': me.get(HK_ME_BIOLOGICAL_SEX),
            'blood_type': me.get(HK_ME_BLOOD_TYPE),
            'skin_type': me.get(HK_ME_SKIN_TYPE),
            'wheelchair_use': me.get(HK_ME_WHEELCHAIR_USE),
            'sources': {},
            'last_updated': pd.Timestamp(export_date['@value']).to_pydatetime(), 
        }
    
    def _get_activity_summaries(self):
        """
        Parse ActivitySummaries from Payload
        Return a dict of {date: {key: value}}
        """
        activity_summaries = self.data['ActivitySummary']#self.data.pop('ActivitySummary', {})
        parsed = {}
        for activity_summary in activity_summaries:
            date = activity_summary.get(DATE_COMPONENTS)
            parsed[date] = {
                'active_energy_burned': _parse_float(activity_summary.get(ACTIVE_ENERGY_BURNED, None)),
                'active_energy_burned_goal': _parse_float(activity_summary.get(ACTIVE_ENERGY_BURNED_GOAL, None)),
                'active_energy_burned_unit': activity_summary.get(ACTIVE_ENERGY_BURNED_UNIT, "Cal"),
                'exercise_time': _parse_float(activity_summary.get(APPLE_EXERCISE_TIME, None)),
                'exercise_time_goal': _parse_float(activity_summary.get(APPLE_EXERCISE_TIME_GOAL, None)),
                'stand_hours': _parse_float(activity_summary.get(APPLE_STAND_HOURS, None)),
                'stand_hours_goal': _parse_float(activity_summary.get(APPLE_STAND_HOURS_GOAL, None)),
            }
        return parsed
    
    def _get_workouts(self):
        workouts = self.data['Workout']#self.data.pop('Workout', {})
        return [Workout(**w).__dict__ for w in workouts]

class Record:
    NAME_KEY = TYPE
    def __init__(self, **data):
        """
        Parse a Record from the Payload, handle metadata creation and source_id
        """
        self.name: str = data[self.NAME_KEY]
        self.source_id = _parse_source_id(data.get(SOURCE_NAME))
        self.created_at: datetime = _parse_date(data.get(CREATION_DATE, None))
        self.start: datetime = _parse_date(data.get(START_DATE))
        self.start_date_str: str = self.start.strftime("%Y-%m-%d")
        self.end: datetime = _parse_date(data.get(END_DATE))
        self.unit: str = data.get(UNIT, None)
        self.value: float = _parse_float(data.get(VALUE, None))
        self.heartrate_variability = []

        metadata = data.get("MetadataEntry", None)
        if metadata is None:
            self.metadata = []
        elif isinstance(metadata, dict):
            self.metadata = [self._parse_metadata(metadata)]
        elif isinstance(metadata, list):
            self.metadata = list(map(lambda m: self._parse_metadata(m), metadata))
        
        heartrate_variability = data.get("HeartRateVariabilityMetadataList", None)
        if heartrate_variability is not None:
            heartrate_variability = heartrate_variability.get("InstantaneousBeatsPerMinute", None)
            self.heartrate_variability = list(map(lambda m: self._parse_heartrate_variability(m), heartrate_variability))

    def _parse_metadata(self, data):
        return {'key':data.get(KEY), 'value': data.get(VALUE)}
    
    def _parse_heartrate_variability(self, data):
        return {'bpm':data.get(BPM), 'time': _parse_date(data.get(TIME))}

class Workout(Record):
    NAME_KEY = WORKOUT_ACTIVITY_TYPE
    def __init__(self, **data):
        super().__init__(**data)
        self.duration: float = _parse_float(data.get(DURATION))
        self.duration_unit: str = data.get(DURATION_UNIT)

        self.distance: float = _parse_float(data.get(TOTAL_DISTANCE, None))
        self.distance_unit: str = data.get(TOTAL_DISTANCE_UNIT, 'mi')

        self.energy_burned: float = _parse_float(data.get(TOTAL_ENERGY_BURNED, None))
        self.energy_burned_unit: str = data.get(TOTAL_ENERGY_BURNED_UNIT, 'Cal')

        self.flights_climbed: float = _parse_float(data.get(TOTAL_FLIGHTS_CLIMBED))
        self.swimming_strokes: float = _parse_float(data.get(TOTAL_SWIMMING_STROKE_COUNT))
        self.events=[]
        self.statistics=[]
        self.route=None

        workout_events = data.get("WorkoutEvent", None)
        if workout_events is not None:
            if isinstance(workout_events, dict):
                self.events = [self._parse_workout_event(workout_events)]
            else:
                self.events = list(map(lambda m: self._parse_workout_event(m), workout_events))

        workout_statistics = data.get("WorkoutStatistics", None)
        if workout_statistics is not None:
            if isinstance(workout_statistics, dict):
                self.statistics = [self._parse_workout_statistic(workout_statistics)]
            else:
                self.statistics = list(map(lambda m: self._parse_workout_statistic(m), workout_statistics))
    
        workout_route = data.get("WorkoutRoute", None)
        if workout_route is not None:
            self.route = self._parse_workout_route(workout_route)

        if self.distance is None or self.distance_unit is None:
            statistic = self.get_statistic(HK_RECORD_DISTANCE_WALKING_RUNNING)
            if statistic is not None:
                self.distance = statistic['sum'] if statistic['sum']  is not None else None
                self.distance_unit = statistic['unit'] if statistic['unit'] is not None else 'mi'

        if self.energy_burned is None or self.energy_burned_unit is None:
            statistic = self.get_statistic(HK_RECORD_ACTIVE_ENERGY_BURNED)
            if statistic is not None:
                self.energy_burned = statistic['sum'] if statistic['sum'] is not None else None
                self.energy_burned_unit = statistic['unit'] if statistic['unit'] is not None else 'Cal'
            
    def get_statistic(self, statistic_type):
        for s in self.statistics:
            if s['name'] == statistic_type:
                return s
        return None
    
    def get_event(self, event_type):
        for s in self.events:
            if s['name'] == event_type:
                return s
        return None

    def _parse_workout_event(self, data):
        return {
            'name': data.get(TYPE),
            'date': _parse_date(data.get(DATE)),
            'duration': _parse_float(data.get(DURATION)),
            'duration_unit': data.get(DURATION_UNIT),
        }
    
    def _parse_workout_statistic(self, data):
        statistic_type = "SUM" if data.get(SUM,None) is not None else "AVG" 
        return {
            'name': data.get(TYPE),
            'type': statistic_type,
            'start_date': _parse_date(data.get(START_DATE, None)),
            'end_date': _parse_date(data.get(END_DATE, None)),
            'sum': _parse_float(data.get(SUM, None)),
            'average': _parse_float(data.get(AVG, None)),
            'min': _parse_float(data.get(MIN, None)),
            'max': _parse_float(data.get(MAX, None)),
            'unit': _parse_float(data.get(UNIT, None)),
        }
    
    def _parse_workout_route(self, data):
        file_reference = data.get("FileReference", None)
        file_path = file_reference.get("@path", None)
        metadata = data.get("MetadataEntry", None)
        if metadata is None:
            metadata = []
        elif isinstance(metadata, dict):
            metadata = [self._parse_metadata(metadata)]
        elif isinstance(metadata, list):
            metadata = list(map(lambda m: self._parse_metadata(m), metadata))

        return {
            'file_path': file_path,
            'file_name': file_path.split("/")[-1],
            'metadata': metadata,
        }
    
    def save():
        

    
hkd = HealthKitData(data)
#hkd.workouts[-6]

In [12]:
hkd.config['sources']

{'RENPHO': {'source_name': 'Renpho',
  'source_version': '1',
  'source_type': 'SCALE'},
 'RENPHOHEALTH': {'source_name': 'Renpho Health',
  'source_version': '32',
  'source_type': 'SCALE'},
 'HEALTH': {'source_name': 'Health',
  'source_version': '16.2',
  'source_type': 'APP'},
 'MYFITNESSPAL': {'source_name': 'MyFitnessPal',
  'source_version': '46391',
  'source_type': 'APP'},
 'CHARLIESAPPLEWATCH': {'source_name': 'Charlieâ€™s AppleÂ\xa0Watch',
  'source_version': '9.6.3',
  'source_type': 'WATCH',
  'HKDevice': '0x28063def0>',
  'name': 'Apple Watch',
  'manufacturer': 'Apple Inc.',
  'model': 'Watch',
  'hardware': 'Watch6.13',
  'software': '9.6.3>'},
 'CHARLIESIPHONE': {'source_name': 'Charlieâ€™s iPhone',
  'source_version': '16.7.2',
  'source_type': 'PHONE'},
 'STRONG': {'source_name': 'Strong',
  'source_version': '7269',
  'source_type': 'APP'},
 'SLEEP': {'source_name': 'Sleep',
  'source_version': '9.0',
  'source_type': 'APP'}}

In [14]:
a = list(hkd.get_workouts(start_date='2024-07-01', end_date='2024-07-30'))
a

[{'name': 'HKWorkoutActivityTypeTraditionalStrengthTraining',
  'source_id': 'CHARLIESAPPLEWATCH',
  'created_at': datetime.datetime(2024, 7, 1, 13, 27, 51, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=72000))),
  'start': datetime.datetime(2024, 7, 1, 11, 59, 59, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=72000))),
  'start_date_str': '2024-07-01',
  'end': datetime.datetime(2024, 7, 1, 13, 27, 46, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=72000))),
  'unit': None,
  'value': None,
  'heartrate_variability': [],
  'metadata': [{'key': 'HKIndoorWorkout', 'value': '0'},
   {'key': 'HKAverageMETs', 'value': '6.49148 kcal/hrÂ·kg'},
   {'key': 'HKWeatherTemperature', 'value': '85.082 degF'},
   {'key': 'HKWeatherHumidity', 'value': '7500 %'},
   {'key': 'HKTimeZone', 'value': 'America/New_York'},
   {'key': 'HKIndoorWorkout', 'value': '0'},
   {'key': 'HKAverageMETs', 'value': '6.49148 kcal/hrÂ·kg'},
   {'key': 'HKWeatherTemperature', 'v

In [31]:
w = Workout(**data['Workout'][-6])
w.get_statistic('HKQuantityTypeIdentifierActiveEnergyBurned')
w.__dict__

{'name': 'HKQuantityTypeIdentifierDistanceWalkingRunning', 'type': 'SUM', 'start_date': datetime.datetime(2024, 7, 17, 12, 37, 53, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=72000))), 'end_date': datetime.datetime(2024, 7, 17, 13, 30, 59, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=72000))), 'sum': 3.4201, 'average': None, 'min': None, 'max': None, 'unit': None}
{'name': 'HKQuantityTypeIdentifierActiveEnergyBurned', 'type': 'SUM', 'start_date': datetime.datetime(2024, 7, 17, 12, 37, 53, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=72000))), 'end_date': datetime.datetime(2024, 7, 17, 13, 30, 59, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=72000))), 'sum': 436.867, 'average': None, 'min': None, 'max': None, 'unit': None}
{'name': 'HKQuantityTypeIdentifierActiveEnergyBurned', 'type': 'SUM', 'start_date': datetime.datetime(2024, 7, 17, 12, 37, 53, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=72000))), 'end_

{'name': 'HKWorkoutActivityTypeRunning',
 'source_id': 'CHARLIESAPPLEWATCH9_6_3',
 'created_at': datetime.datetime(2024, 7, 17, 13, 31, 7, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=72000))),
 'start': datetime.datetime(2024, 7, 17, 12, 37, 53, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=72000))),
 'start_date_str': '2024-07-17',
 'end': datetime.datetime(2024, 7, 17, 13, 30, 59, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=72000))),
 'unit': None,
 'value': None,
 'heartrate_variability': [],
 'metadata': [{'key': 'HKIndoorWorkout', 'value': '0'},
  {'key': 'HKElevationAscended', 'value': '207 cm'},
  {'key': 'HKWeatherHumidity', 'value': '6600 %'},
  {'key': 'HKTimeZone', 'value': 'America/New_York'},
  {'key': 'HKWeatherTemperature', 'value': '89.456 degF'},
  {'key': 'HKAverageMETs', 'value': '6.99379 kcal/hrÂ·kg'},
  {'key': 'HKIndoorWorkout', 'value': '0'},
  {'key': 'HKElevationAscended', 'value': '207 cm'},
  {'key': 'HKWeather

In [12]:
file = ROOT_DIR + 'export.xml'

data = HealthData.read(
    file,
    include_me=True,
    include_activity_summaries=True,
    include_correlations=False,
    include_records=True,
    include_workouts=True,
)

print(data.me.biological_sex)
print(f"{len(data.activity_summaries)} activity records")
print(f"{len(data.correlations)} correlations")
print(f"{len(data.records)} records")
print(f"{len(data.workouts)} workouts")

HKBiologicalSexMale
539 activity records
0 correlations
1732268 records
545 workouts


In [64]:
day = datetime.datetime.date(datetime.datetime(2024,7,17))

config = data.me.__dict__
day_activity_summary = [summary for summary in data.activity_summaries if summary.date.date() == day]
day_workouts = [workout for workout in data.workouts if workout.start.date() == day]
day_records = [record for record in data.records if record.start.date() == day]

In [65]:
config

{'date_of_birth': datetime.datetime(1998, 4, 11, 0, 0),
 'biological_sex': 'HKBiologicalSexMale',
 'blood_type': 'HKBloodTypeNotSet',
 'skin_type': 'HKFitzpatrickSkinTypeNotSet',
 'wheelchair_use': None}

In [58]:
day_activity_summary[0].__dict__

{'date': datetime.datetime(2024, 7, 17, 0, 0),
 'active_energy_burned': 3227.75,
 'active_energy_burned_goal': 500.0,
 'active_energy_burned_unit': 'Cal',
 'exercise_time': 240.0,
 'exercise_time_goal': 45.0,
 'stand_hours': 13.0,
 'stand_hours_goal': 8.0}

In [62]:
workout = day_workouts[1].__dict__

workout_start_time = workout['start']
workout_end_time = workout['end']
print(workout)
workout_records = [record for record in data.records if record.start >= workout_start_time and record.end <= workout_end_time]
workout_records_df = pd.DataFrame([record.__dict__ for record in workout_records])
workout_records_df

{'name': 'HKWorkoutActivityTypeRunning', 'source': 'Charlieâ€™s AppleÂ\xa0Watch', 'created_at': datetime.datetime(2024, 7, 17, 13, 31, 7, tzinfo=tzoffset(None, -14400)), 'start': datetime.datetime(2024, 7, 17, 12, 37, 53, tzinfo=tzoffset(None, -14400)), 'end': datetime.datetime(2024, 7, 17, 13, 30, 59, tzinfo=tzoffset(None, -14400)), 'metadata': [HKIndoorWorkout: 0, HKElevationAscended: 207 cm, HKWeatherHumidity: 6600 %, HKTimeZone: America/New_York, HKWeatherTemperature: 89.456 degF, HKAverageMETs: 6.99379 kcal/hrÂ·kg, HKIndoorWorkout: 0, HKElevationAscended: 207 cm, HKWeatherHumidity: 6600 %, HKTimeZone: America/New_York, HKWeatherTemperature: 89.456 degF, HKAverageMETs: 6.99379 kcal/hrÂ·kg], 'duration': 53.09000549912453, 'duration_unit': 'min', 'distance': 0.0, 'distance_unit': None, 'energy_burned': 0.0, 'energy_burned_unit': None, 'flights_climbed': 0.0, 'swimming_strokes': 0.0}


Unnamed: 0,name,source,created_at,start,end,metadata,unit,value,heart_rate
0,HKQuantityTypeIdentifierHeartRate,Charlieâ€™s AppleÂ Watch,2024-07-17 12:37:55-04:00,2024-07-17 12:37:53-04:00,2024-07-17 12:37:53-04:00,[HKMetadataKeyHeartRateMotionContext: 2],count/min,132.00000,[]
1,HKQuantityTypeIdentifierHeartRate,Charlieâ€™s AppleÂ Watch,2024-07-17 12:38:00-04:00,2024-07-17 12:37:55-04:00,2024-07-17 12:37:55-04:00,[HKMetadataKeyHeartRateMotionContext: 2],count/min,132.00000,[]
2,HKQuantityTypeIdentifierHeartRate,Charlieâ€™s AppleÂ Watch,2024-07-17 12:38:05-04:00,2024-07-17 12:38:01-04:00,2024-07-17 12:38:01-04:00,[HKMetadataKeyHeartRateMotionContext: 2],count/min,131.00000,[]
3,HKQuantityTypeIdentifierHeartRate,Charlieâ€™s AppleÂ Watch,2024-07-17 12:38:10-04:00,2024-07-17 12:38:04-04:00,2024-07-17 12:38:04-04:00,[HKMetadataKeyHeartRateMotionContext: 2],count/min,129.00000,[]
4,HKQuantityTypeIdentifierHeartRate,Charlieâ€™s AppleÂ Watch,2024-07-17 12:38:15-04:00,2024-07-17 12:38:10-04:00,2024-07-17 12:38:10-04:00,[HKMetadataKeyHeartRateMotionContext: 2],count/min,122.00000,[]
...,...,...,...,...,...,...,...,...,...
9052,HKQuantityTypeIdentifierRunningSpeed,Charlieâ€™s AppleÂ Watch,2024-07-17 13:30:48-04:00,2024-07-17 13:30:45-04:00,2024-07-17 13:30:45-04:00,[],mi/hr,6.87578,[]
9053,HKQuantityTypeIdentifierRunningSpeed,Charlieâ€™s AppleÂ Watch,2024-07-17 13:30:51-04:00,2024-07-17 13:30:48-04:00,2024-07-17 13:30:48-04:00,[],mi/hr,6.91889,[]
9054,HKQuantityTypeIdentifierRunningSpeed,Charlieâ€™s AppleÂ Watch,2024-07-17 13:30:52-04:00,2024-07-17 13:30:50-04:00,2024-07-17 13:30:50-04:00,[],mi/hr,7.05495,[]
9055,HKQuantityTypeIdentifierRunningSpeed,Charlieâ€™s AppleÂ Watch,2024-07-17 13:30:56-04:00,2024-07-17 13:30:53-04:00,2024-07-17 13:30:53-04:00,[],mi/hr,7.12622,[]


In [63]:
workout_records_df.name.value_counts()

name
HKQuantityTypeIdentifierBasalEnergyBurned                 1249
HKQuantityTypeIdentifierActiveEnergyBurned                1249
HKQuantityTypeIdentifierDistanceWalkingRunning            1234
HKQuantityTypeIdentifierStepCount                         1233
HKQuantityTypeIdentifierRunningSpeed                      1229
HKQuantityTypeIdentifierRunningPower                      1227
HKQuantityTypeIdentifierHeartRate                          637
HKQuantityTypeIdentifierRunningStrideLength                310
HKQuantityTypeIdentifierRunningVerticalOscillation         305
HKQuantityTypeIdentifierRunningGroundContactTime           295
HKQuantityTypeIdentifierAppleExerciseTime                   52
HKQuantityTypeIdentifierAppleStandTime                      10
HKQuantityTypeIdentifierWalkingSpeed                        10
HKQuantityTypeIdentifierWalkingStepLength                   10
HKQuantityTypeIdentifierWalkingDoubleSupportPercentage       5
HKQuantityTypeIdentifierEnvironmentalAudioExposure

## 3. Transform/Preprocess Data


In [32]:
import datetime 

## I want to create a config file and a data file that is either a parquet or a csv of all of the data that I have.

class UserConfig:
    def __init__(self, config):
        self.date_of_birth = config['date_of_birth']
        self.biological_sex = config['biological_sex']
        self.blood_type = config['blood_type']
        self.skin_type = config['skin_type']
        self.wheelchair_use = config['wheelchair_use']

    def __repr__(self):
        return f"UserConfig(date_of_birth={self.date_of_birth}, biological_sex={self.biological_sex}, blood_type={self.blood_type}, skin_type={self.skin_type}, wheelchair_use={self.wheelchair_use})"
    


In [68]:
day_records[-20].__dict__

{'name': 'HKCategoryTypeIdentifierAppleStandHour',
 'source': 'Charlieâ€™s AppleÂ\xa0Watch',
 'created_at': datetime.datetime(2024, 7, 17, 13, 0, 32, tzinfo=tzoffset(None, -14400)),
 'start': datetime.datetime(2024, 7, 17, 13, 0, tzinfo=tzoffset(None, -14400)),
 'end': datetime.datetime(2024, 7, 17, 14, 0, tzinfo=tzoffset(None, -14400)),
 'metadata': [],
 'unit': None,
 'value': 0,
 'heart_rate': []}

## 4. Load data (as parquet)


## 5. Build weekly calendar workout