In [7]:
import requests
import os 
import glob
import pandas as pd
from ics import Calendar, Event
from dataclasses import dataclass
import numpy as np

In [3]:
import logging
logging.basicConfig()
LOGGER = logging.getLogger(__name__)
LOGGER.setLevel(logging.INFO)


In [8]:
base_folder = os.getcwd()
source_folder = base_folder + '/healthlake/'
apple_health_files = glob.glob(source_folder + '*.json')

names = [
    'apple_exercise_time',
    'active_energy',
    'calories_burnt',
    'calories_consumed',
    'carbohydrates',
    'dietary_caffeine',
    'dietary_energy',
    'dietary_sugar',
    'fiber',
    'protein',
    'sleep_analysis',
    'total_fat',
    'weight_body_mass'
]

cols = [
    'qty',
    'dates',
    'name',
    'units'
]
df_raw = pd.DataFrame()

for json_file in apple_health_files:
    json_raw = pd.read_json(json_file, lines = True)
    df_raw = pd.concat([df_raw, json_raw])

In [23]:
@dataclass
class Time: 
    "A basic time object"
    time: float     
        
    @property 
    def hours(self) -> float: 
        hours = int(self.time)
        return hours 
    
    @property
    def minutes(self) -> str:
        minutes = int((self.time - self.hours) * 60)
        return minutes
    
    @property 
    def title(self) -> str: 
        title = f"{self.hours}h {self.minutes}m"
        return title


@dataclass 
class Food: 
    carbs: float 
    protein: float 
    fat: float 
    fiber: float 
    
    @property 
    def calories(self) -> float: 
        calories = (self.carbs + self.protein) * 4 + (self.fat) * 9
        return calories
    
    @property 
    def title(self) -> str: 
        title = f"🔥 {self.calories:.0f} cals ({self.carbs:.0f}C, {self.protein:.0f}P, {self.fat:.0f}F)"
        return title 
    
    @property 
    def description(self) -> str: 
        description = f"""
        🔥 {self.calories:.0f} kcal
        🥞 {self.title} 
        🍇 {self.fiber:.0f}
        """
        return description

@dataclass
class Sleep:
    time_asleep: Time 
    time_in_bed: Time 
    in_bed_time: str
    
    @property 
    def efficiency(self) -> float: 
        efficiency = self.time_asleep.time / self.time_in_bed.time * 100
        return efficiency
    
    @property
    def efficiency_title(self) -> str: 
        efficient = f"{self.efficiency:.0f}%"
        efficiency_title = f"🛏️ {efficient}"
        return efficiency_title
    
    @property 
    def title(self) -> str: 
        title = f"💤 {self.time_asleep.title} ({self.in_bed_time})"
        return title 
    
    @property
    def description(self) -> str: 
        description = f"""
        💤 Time asleep: {self.time_asleep.title}
        🛏️ Time in bed: {self.time_in_bed.title}
        🧮 Efficiency: {self.efficiency_title}
        """
        return description
    
    
@dataclass
class Activity: 
    activity_mins: Time 
    mindful_mins: Time 
    
    @property 
    def activity_title(self) -> str: 
        title = f"{self.activity_mins.title} active" 
        return title
    
    @property 
    def mindful_title(self) -> str:
        title = f"{self.mindful_mins.minutes} mins mindful"
        return title
    
    @property 
    def title(self) -> str: 
        title = f"🧠 {self.mindful_title}"
        
    @property 
    def description(self) -> str: 
        description = f"""
        🚴‍♂️ Activity: {self.activity_title}
        🧘 Mindful: {self.mindful_title}
        """
        return description

In [24]:
Sleep(time_asleep = Time(7.5), time_in_bed = Time(8.5), in_bed_time="11:48 pm").efficiency_title

'🛏️ 88%'

In [20]:
@dataclass 
class AppleHealthEvent(Event):
    """
    An event derived from Apple Health data
    For usage within .ics format 
    """
    date: str
    description: str 
    name: str
    
    @property 
    def event(self): 
        if self._event is None: 
            self._event = self._create_event()
        return self._event
            
    @staticmethod 
    def _create_event(self):
        all_day_date = f"{self.date} 00:00:00"
        e = Event()
        e.name = self.name
        e.description = self.description
        e.begin = all_day_date
        e.end = all_day_date
        e.make_all_day()

        return e

In [4]:
def convert_kj_to_cal(row, new_name):
    """Converts kj to calories"""
    row_dict = row.to_dict()
    calorie_value = int(row['qty']/4)
    row_dict['qty'] = calorie_value
    row_dict['name'] = new_name
    row_dict['units'] = 'kcal'
    
    return pd.DataFrame(row_dict, index=[0])


In [69]:
# ETL transformations 

df_ahc = df_raw.copy() 

# define transformations to go from df_raw to df_ahc (apple-health-calendar)
# cleaning values 
df_ahc['dates'] = pd.to_datetime(df_ahc['date']).dt.date
df_ahc['qty'] = df_ahc['qty'].fillna(df_ahc['asleep'])

# # create calories 
active_energy_rows = df_ahc[df_ahc['name'] == 'active_energy'][cols]
dietary_energy_rows = df_ahc[df_ahc['name'] == 'dietary_energy'][cols]

for _, row in active_energy_rows.iterrows():
    df_row = convert_kj_to_cal(row, 'calories_burnt')    
    df_ahc = pd.concat([df_ahc, df_row])
    
for _, row in dietary_energy_rows.iterrows(): 
    df_row = convert_kj_to_cal(row, 'calories_consumed')
    df_ahc = pd.concat([df_ahc, df_row])
    
    
# calculate sleep efficiency 
df_sleep = df_raw[df_raw['name'] == 'sleep_analysis']

for _, row in df_sleep.iterrows():
    eff = row['asleep'] / row['inBed'] * 100
    df_row 
    df_ahc = pd.concat([])
    
# filter out values 
df_ahc = df_ahc[df_ahc['name'].isin(names)][cols].reset_index(drop = True)

# round values 
df_ahc['qty'] = df_ahc['qty'].round(2)


In [70]:
df_ahc[df_ahc['name'].isin(['calories_burnt', 'calories_consumed'])]

Unnamed: 0,qty,dates,name,units
67,759.0,2022-09-10,calories_burnt,kcal
68,888.0,2022-09-11,calories_burnt,kcal
69,906.0,2022-09-12,calories_burnt,kcal
70,937.0,2022-09-13,calories_burnt,kcal
71,1066.0,2022-09-14,calories_burnt,kcal
72,992.0,2022-09-15,calories_burnt,kcal
73,495.0,2022-09-16,calories_burnt,kcal
74,2061.0,2022-09-10,calories_consumed,kcal
75,2216.0,2022-09-11,calories_consumed,kcal
76,2012.0,2022-09-12,calories_consumed,kcal


In [23]:
#  Calendar functions
def _collect_event_emoticon(self.type):
    """
    Creates an event name with an emoticon
    """
    emoticons = {
        'sleep'     : "💤",
        'activity'  : "🔥",
        'food'      : "🥞",
        'mindful'   : "🧘",
        'exercise'  : "🏃",
        'weight'    : "🎚️",
        'average'   : "📈"
    }

    return emoticons.get(self.type)

# %%
def create_event(date, event_name, description: None):
    """
    Create an all day event for the given date and type
    :param date: date as type datetime.date
    :param event_name: name of event as string
    """
    all_day_date = f"{date} 00:00:00"
    e = Event()
    e.name = event_name
    e.description = description
    e.begin = all_day_date
    e.end = all_day_date
    e.make_all_day()

    return e



def generate_calendar(df):
    """
    Generates a CSV and ICS from the dataframe
    :param df: cleansed dataframe from `create_description_cols`
    :param outputs: as type string - a combination of both the local and public storage
    """

    # output_csv_path = f"{output_path}/{file_name}.csv"
    # calendar_file_name = f'{file_name}.ics'
    file_name = 'apple_health'

    csv_file_name = f"{file_name}.csv"
    ics_file_name  = f"{file_name}.ics"


    LOGGER.info("Generating calendar (as .ICS)")
    c = Calendar()
    for _, row in df.iterrows():
        e = create_event(row['date'], row['name'], row['dsc'])
        c.events.add(e)

    df.to_csv(csv_file_name, index=False)

    with open(ics_file_name, 'w') as f:
        f.write(str(c))
        f.close()

    LOGGER.info("Outputing CSV and ICS to: %s", csv_file_name)
    return

In [8]:
df_ahc['name'].unique()

array(['apple_exercise_time', 'carbohydrates', 'dietary_caffeine',
       'dietary_energy', 'dietary_sugar', 'fiber', 'protein',
       'sleep_analysis', 'total_fat', 'weight_body_mass',
       'calories_burnt', 'calories_consumed'], dtype=object)

In [38]:
df_raw['name'].unique()

array(['active_energy', 'apple_exercise_time', 'apple_stand_hour',
       'apple_stand_time', 'basal_energy_burned',
       'blood_oxygen_saturation', 'calcium', 'carbohydrates', 'copper',
       'dietary_caffeine', 'dietary_cholesterol', 'dietary_energy',
       'dietary_sugar', 'dietary_water', 'environmental_audio_exposure',
       'fiber', 'flights_climbed', 'folate', 'handwashing',
       'headphone_audio_exposure', 'heart_rate', 'heart_rate_variability',
       'iron', 'magnesium', 'manganese', 'monounsaturated_fat', 'niacin',
       'pantothenic_acid', 'polyunsaturated_fat', 'potassium', 'protein',
       'respiratory_rate', 'resting_heart_rate', 'riboflavin',
       'saturated_fat', 'selenium', 'six_minute_walking_test_distance',
       'sleep_analysis', 'sodium', 'stair_speed_down', 'stair_speed_up',
       'step_count', 'thiamin', 'total_fat', 'vo2_max', 'vitamin_a',
       'vitamin_b12', 'vitamin_b6', 'vitamin_c', 'vitamin_d', 'vitamin_e',
       'vitamin_k', 'walking_runnin

98%
96%
90%
85%
100%
70%
85%


In [39]:
df_raw[df_raw['name'].isin(['sleep_analysis'])]

Unnamed: 0,qty,date,name,units,Avg,Min,Max,sleepStart,sleepEnd,inBedEnd,sleepSource,asleep,inBed,inBedStart,inBedSource
234,,2022-09-10 16:05:00+09:30,sleep_analysis,hr,,,,2022-09-09 21:16:00 +0930,2022-09-10 16:57:00 +0930,2022-09-10 16:57:00 +0930,AutoSleep,9.483333,9.633333,2022-09-09 21:16:00 +0930,AutoSleep
235,,2022-09-11 02:12:00+09:30,sleep_analysis,hr,,,,2022-09-10 22:23:00 +0930,2022-09-11 05:56:00 +0930,2022-09-11 05:56:00 +0930,AutoSleep,7.283333,7.55,2022-09-10 22:23:00 +0930,AutoSleep
236,,2022-09-12 03:45:00+09:30,sleep_analysis,hr,,,,2022-09-11 21:15:00 +0930,2022-09-12 05:59:00 +0930,2022-09-12 05:30:24 +0930,AutoSleep,8.516667,9.475888,2022-09-11 20:01:51 +0930,iPhone
237,,2022-09-13 05:35:08+09:30,sleep_analysis,hr,,,,2022-09-12 20:46:38 +0930,2022-09-13 05:38:38 +0930,2022-09-13 05:30:02 +0930,Nam’s Apple Watch,7.6,8.896404,2022-09-12 20:36:15 +0930,iPhone
238,,2022-09-13 21:15:00+09:30,sleep_analysis,hr,,,,2022-09-13 21:15:00 +0930,2022-09-14 06:17:00 +0930,2022-09-14 06:17:00 +0930,AutoSleep,9.033333,9.033333,2022-09-13 21:15:00 +0930,AutoSleep
239,,2022-09-15 06:15:00+09:30,sleep_analysis,hr,,,,2022-09-14 21:31:00 +0930,2022-09-15 06:44:00 +0930,2022-09-15 06:44:00 +0930,AutoSleep,6.466667,9.216667,2022-09-14 21:31:00 +0930,AutoSleep
240,,2022-09-16 04:43:03+09:30,sleep_analysis,hr,,,,2022-09-15 22:26:33 +0930,2022-09-16 06:08:03 +0930,2022-09-16 06:00:01 +0930,Nam’s Apple Watch,7.583333,8.9635,2022-09-15 21:02:12 +0930,iPhone


In [40]:
# config driven 
sleep_cols = ['sleep_analysis']
food_cols = ['carbohydrates', 'protein', 'total_fat', 'fiber']
activity_cols = ['calories_burnt', 'calories_consumed']

sleeps = df_ahc[df_ahc['name'].isin(sleep_cols)]
foods = df_ahc[df_ahc['name'].isin(food_cols)]
activities = df_ahc[df_ahc['name'].isin(activity_cols)]

# pivot this 
daily_sleep = sleeps.copy() 
daily_food = foods.pivot(index = 'dates', columns = 'name')['qty'].reset_index()
daily_activity = activities.pivot(index = 'dates', columns = 'name')['qty'].reset_index()

# add event column and functionise description 
daily_sleep['name'] = 'sleep'
daily_food['name'] = 'food'
daily_activity['name'] = 'activity' 


daily_events = [daily_sleep, daily_food, daily_activity]
        

In [41]:
daily_events

[     qty       dates   name units
 43  9.48  2022-09-10  sleep    hr
 44  7.28  2022-09-11  sleep    hr
 45  8.52  2022-09-12  sleep    hr
 46  7.60  2022-09-13  sleep    hr
 47  9.03  2022-09-13  sleep    hr
 48  6.47  2022-09-15  sleep    hr
 49  7.58  2022-09-16  sleep    hr,
 name       dates  calories_consumed  carbohydrates  fiber  protein  total_fat  \
 0     2022-09-10             2061.0         184.15  26.60   184.59      51.16   
 1     2022-09-11             2216.0         195.81  28.47   187.67      64.32   
 2     2022-09-12             2012.0         224.11  65.83   179.61      79.28   
 3     2022-09-13             2057.0         213.88  46.46   167.18      46.54   
 4     2022-09-14             2726.0         373.25  55.58   174.08      59.25   
 5     2022-09-15             2665.0         280.21  19.51   166.18      87.76   
 6     2022-09-16             2348.0         265.30  39.15   127.83      71.30   
 
 name  name  
 0     food  
 1     food  
 2     food  
 3   

In [49]:
def make_event_details(row, self.type): 
    """Create a description field for the event"""

    if self.type in ('sleep'): 
        time = row['qty']
        hours = int(time)
        minutes = int((time - hours) * 60)
        value = f"{hours} hours {minutes} mins"
        summary = f"{hours}h {minutes}m"
    
    elif self.type in ('food'): 
        carbs, protein, fats, fiber = row['carbohydrates'], row['protein'], row['total_fat'], row['fiber']
        summary = f"{protein:.0f} P / {carbs:.0f} C / {fats:.0f} F"
        value = f"""
        Macros: {summary}
        Fiber: {fiber:.0f}
        Calories: {row['calories_consumed']:.0f} kcal
        """
        
    
    elif self.type in ('activity'): 
        burned, consumed = row['calories_burnt'], row['calories_consumed']
        value = f"{burned:.0f} kcal / {consumed:.0f} kcal"
        summary = f"{burned:.0f} kcal"
        # summary = f"{row['mindful_minutes']:.0f} mins"
    
    title = f"{_collect_event_emoticon(self.type)} {summary}"
    description = f"{_collect_event_emoticon(self.type)} {value}"
    
    return title, description
LOGGER.info("Generating calendar (as .ICS)")
c = Calendar()
for event_df in daily_events: 
    for _, event_row in event_df.iterrows(): 
        self.type =  event_df['name'].iloc[0]
        summary, description = make_event_details(event_row, self.type)
        # print(description)
        date = event_row['dates']
        e = create_event(date, summary, description)
        c.events.add(e)

INFO:__main__:Generating calendar (as .ICS)


In [50]:
with open('apple_health.ics', 'w') as f:
    f.write(str(c))
    f.close()