In [1]:
import os 
import glob
import pandas as pd
from ics import Calendar, Event
from dataclasses import dataclass, field 
from collections import defaultdict
from typing import List, Dict
import numpy as np
from datetime import datetime

In [2]:
import logging
logging.basicConfig()
LOGGER = logging.getLogger(__name__)
LOGGER.setLevel(logging.INFO)


In [4]:
import json
with open("config/user_config.json", "r") as f:
    user_config = json.load(f)

analysis_cols = ["qty", "dates", "name", "units"]
raw_data_columns = user_config.get("raw_data_columns")
EVENT_TYPES = user_config.get("event_types")


In [279]:
df_ahc[df_ahc["name"].isin(raw_data_columns)][analysis_cols].reset_index(
        drop=True
    )

Unnamed: 0,qty,dates,name,units
0,3039.918672,2022-09-10,active_energy,kJ
1,3553.969096,2022-09-11,active_energy,kJ
2,3627.758120,2022-09-12,active_energy,kJ
3,3750.788640,2022-09-13,active_energy,kJ
4,4264.282592,2022-09-14,active_energy,kJ
...,...,...,...,...
76,2012.000000,2022-09-12,calories_consumed,kcal
77,2057.000000,2022-09-13,calories_consumed,kcal
78,2726.000000,2022-09-14,calories_consumed,kcal
79,2665.000000,2022-09-15,calories_consumed,kcal


In [6]:
base_folder = os.getcwd()
source_folder = base_folder + '/healthlake/'
apple_health_files = glob.glob(source_folder + '*.json')


df_raw = pd.DataFrame()
# read configs
with open("config/user_config.json", "r") as f:
    user_config = json.load(f)

analysis_cols = ["qty", "dates", "name", "units"]
raw_data_columns = user_config.get("raw_data_columns")
EVENT_TYPES = user_config.get("event_types")


# read in raw data 
for json_file in apple_health_files:
    json_raw = pd.read_json(json_file, lines = True)
    df_raw = pd.concat([df_raw, json_raw])

In [7]:
def collect_event_stats(stats_df: pd.DataFrame, column_names: List[str]) -> pd.DataFrame: 
    """
    Extract health data from stats dataframe in the format 
    [['name', 'qty']] where: 
    - name: name of the health data
    - qty: value (in respective unit) of the health data
    
    Output: 
        - DataFrame of health data in the format [['name', 'qty']]
    """
    filtered_stats = stats_df[stats_df['name'].isin(column_names)]
    event_type_stats  = filtered_stats[['name', 'qty']]
    
    return event_type_stats

```mermaid
df_raw -> df_ahc -> df_stats -> output into CSV -> use df_stats to generate obj_args 
```

In [8]:
def create_day_events(stats: pd.DataFrame, event_date: str) -> List[Event]: 
    """
    Iterate through different event types (food / activity / sleep)
    and generate events to add to the daily calendar only if event exists
    """
    day_events = []
    
    for types, col_names in EVENT_TYPES.items(): 
        # collect object name and arguments
        # dynamically create event type objects 
        dataclass_name = types
        dataclass_obj = globals()[dataclass_name]
        
        # collect object arguments to initialise objects from stats
        dataclass_obj_stats = collect_event_stats(stats_df=stats, column_names=col_names)
        
        obj_args = dict(dataclass_obj_stats.values)
        
        if obj_args: 
            obj = dataclass_obj(**obj_args)
            e = AppleHealthEvent(
                date = event_date,
                title = obj.title, 
                description = obj.description
            ).event
            day_events.append(e)
            
    return day_events


In [9]:
@dataclass 
class AppleHealthEvent(Event):
    """
    An event derived from Apple Health data
    For usage within .ics format 
    """
    date: datetime.date
    description: str 
    title: str
            
    @property 
    def event(self):
        all_day_date = f"{self.date} 00:00:00"
        e = Event()
        e.name = self.title
        e.description = self.description
        e.begin = all_day_date
        e.end = all_day_date
        e.make_all_day()

        return e

In [10]:
from typing import Optional
@dataclass
class Time: 
    "A basic time object in hours"
    time: Optional[float] = field(default=0)
    timeInMinutes: Optional[float] = field(default=0)
        
    def __post_init__(self): 
        if self.timeInMinutes:
            self.time = self.timeInMinutes / 60
            
        
    @property 
    def hours(self) -> float: 
        hours = int(self.time)
        return hours 
    
    @property
    def minutes(self) -> str:
        minutes = int((self.time - self.hours) * 60)
        return minutes
    
    @property
    def title(self) -> str:
        title = f"{self.hours}h {self.minutes}m"
        return title

@dataclass 
class Food: 
    "A basic food object"
    carbohydrates: float 
    protein: float 
    total_fat: float 
    fiber: float 
    calories_burnt: float

    def __post_init__(self): 
        # rename objects for easier usage 
        self.carb = self.carbohydrates
        self.fat = self.total_fat
    
    @property 
    def calories_ate(self) -> float: 
        calories = (self.carb + self.protein) * 4 + (self.fat) * 9
        return calories
    
    @property 
    def macros(self) -> str: 
        return f"{self.carb:.0f}C, {self.protein:.0f}P, {self.fat:.0f}F"

    @property 
    def title(self) -> str: 
        title = f"🔥 {self.calories_ate:.0f} cals ({self.macros})" 
        return title 
    
    @property 
    def description(self) -> str: 
        description = f"""
        🔥 {self.calories_burnt:.0f} kcal
        🍽️ {self.calories_ate:.0f} kcal
        🥞 {self.macros} 
        🍇 {self.fiber:.0f} g
        """
        return description

@dataclass
class Activity: 
    "A basic activity for activity and mindfulness"
    apple_exercise_time: Time
    mindful_minutes: Time = None
    
    def __post_init__(self): 
        # rename objects for easier usage
        self.apple_exercise_time = Time(timeInMinutes=self.apple_exercise_time)
        self.mindful_minutes = Time(timeInMinutes=self.mindful_minutes)
    
    @property 
    def activity_description(self) -> str: 
        a_description = f"🚴‍♂️ Activity: {self.apple_exercise_time.title} active" 
        return a_description

    @property 
    def mindful_description(self) -> str:
        m_description = f"🧘 Mindful: {self.mindful_minutes.title} mindful"
        return m_description
     
    @property 
    def description(self) -> str: 
        description = f"""
        {self.activity_description}
        {self.mindful_description}
        """
        return description
    
    @property 
    def mindful_title(self) -> str: 
        title = f"🧠 {self.mindful_minutes.minutes} mins "
        return title
    
    @property 
    def activity_title(self) -> str: 
        "Create blocks of 1 hour increments of activity minutes"
        block = str(self.apple_exercise_time.hours)
        title = f"🚴‍♂️ {block}"
        return title 
  
    @property 
    def title(self) -> str: 
        title = f"{self.mindful_title} | {self.activity_title}"
        return title 
   


@dataclass
class Sleep:
    "A basic sleep object"
    asleep: Time
    inBed: Time
    inBedStartTime: str

    def __post_init__(self): 
        # rename objects for easier usage
        self.time_asleep = Time(time=self.asleep)
        self.time_in_bed = Time(self.inBed)
        self.in_bed_time = self.inBedStartTime
    
    @property 
    def efficiency(self) -> float: 
        efficiency = self.time_asleep.time / self.time_in_bed.time * 100
        return efficiency
    
    @property
    def efficiency_title(self) -> str: 
        efficient = f"{self.efficiency:.0f}%"
        efficiency_title = f"🛏️ {efficient}"
        return efficiency_title
    
    @property 
    def title(self) -> str: 
        title = f"💤 {self.time_asleep.title} ({self.in_bed_time})"
        print(title)
        return title 
    
    @property
    def description(self) -> str: 
        s_description = f"""
        💤 Time asleep: {self.time_asleep.title}
        🛏️ Time in bed: {self.time_in_bed.title}
        🧮 Efficiency: {self.efficiency_title}
        """
        return s_description


In [11]:
print(globals())

{'__name__': '__main__', '__doc__': 'Automatically created module for IPython interactive environment', '__package__': None, '__loader__': None, '__spec__': None, '__builtin__': <module 'builtins' (built-in)>, '__builtins__': <module 'builtins' (built-in)>, '_ih': ['', 'import os \nimport glob\nimport pandas as pd\nfrom ics import Calendar, Event\nfrom dataclasses import dataclass, field \nfrom collections import defaultdict\nfrom typing import List, Dict\nimport numpy as np\nfrom datetime import datetime', 'import logging\nlogging.basicConfig()\nLOGGER = logging.getLogger(__name__)\nLOGGER.setLevel(logging.INFO)', 'with open("config/user_config.json", "r") as f:\n    user_config = json.load(f)\n\nanalysis_cols = ["qty", "dates", "name", "units"]\nraw_data_columns = user_config.get("raw_data_columns")\nEVENT_TYPES = user_config.get("event_types")', 'import json\nwith open("config/user_config.json", "r") as f:\n    user_config = json.load(f)\n\nanalysis_cols = ["qty", "dates", "name", "

In [269]:
c = Calendar()
available_dates = ahc['dates'].unique()

for date in available_dates:
    daily_stats = ahc[ahc['dates'] == date]    
    daily_calendar = create_day_events(
        stats=daily_stats,
        event_date=date
    )
    for event in daily_calendar:
        c.events.add(event)

🧠 0 | 🚴‍♂️ 1
💤 9h 29m (9:16 PM)
🧠 0 | 🚴‍♂️ 2
💤 7h 16m (10:23 PM)
🧠 0 | 🚴‍♂️ 1
💤 8h 30m (8:01 PM)
🧠 0 | 🚴‍♂️ 1
💤 9h 1m (9:15 PM)
🧠 0 | 🚴‍♂️ 2
🧠 0 | 🚴‍♂️ 1
💤 6h 28m (9:31 PM)
🧠 0 | 🚴‍♂️ 0
💤 7h 35m (9:02 PM)


In [270]:
with open('outputs/apple_health.ics', 'w') as my_file:
    my_file.writelines(c)

In [122]:
daily_stats[daily_stats['name']=='sleep_analysis']

Unnamed: 0,qty,date,name,units,Avg,Min,Max,sleepStart,sleepEnd,inBedEnd,sleepSource,asleep,inBed,inBedStart,inBedSource,dates,type
240,,2022-09-16 04:43:03+09:30,sleep_analysis,hr,,,,2022-09-15 22:26:33 +0930,2022-09-16 06:08:03 +0930,2022-09-16 06:00:01 +0930,Nam’s Apple Watch,7.583333,8.9635,2022-09-15 21:02:12 +0930,iPhone,2022-09-16,


Unnamed: 0,qty,date,name,units,Avg,Min,Max,sleepStart,sleepEnd,inBedEnd,sleepSource,asleep,inBed,inBedStart,inBedSource,dates,type
6,1981.70976,2022-09-16 00:00:00+09:30,active_energy,kJ,,,,,,,,,,,,2022-09-16,
13,58.0,2022-09-16 00:00:00+09:30,apple_exercise_time,min,,,,,,,,,,,,2022-09-16,
20,8.0,2022-09-16 00:00:00+09:30,apple_stand_hour,count,,,,,,,,,,,,2022-09-16,
27,120.0,2022-09-16 00:00:00+09:30,apple_stand_time,min,,,,,,,,,,,,2022-09-16,
34,4484.942169,2022-09-16 00:00:00+09:30,basal_energy_burned,kJ,,,,,,,,,,,,2022-09-16,
41,96.916667,2022-09-16 00:00:00+09:30,blood_oxygen_saturation,%,,,,,,,,,,,,2022-09-16,
48,792.363333,2022-09-16 00:00:00+09:30,calcium,mg,,,,,,,,,,,,2022-09-16,
55,265.301367,2022-09-16 00:00:00+09:30,carbohydrates,g,,,,,,,,,,,,2022-09-16,
61,0.49335,2022-09-16 00:00:00+09:30,copper,mg,,,,,,,,,,,,2022-09-16,
75,9393.770293,2022-09-16 00:00:00+09:30,dietary_energy,kJ,,,,,,,,,,,,2022-09-16,


In [281]:
print(globals())

{'__name__': '__main__', '__doc__': 'Automatically created module for IPython interactive environment', '__package__': None, '__loader__': None, '__spec__': None, '__builtin__': <module 'builtins' (built-in)>, '__builtins__': <module 'builtins' (built-in)>, '_ih': ['', 'import os \nimport glob\nimport pandas as pd\nfrom ics import Calendar, Event\nfrom dataclasses import dataclass, field \nfrom collections import defaultdict\nfrom typing import List, Dict\nimport numpy as np\nfrom datetime import datetime', 'import logging\nlogging.basicConfig()\nLOGGER = logging.getLogger(__name__)\nLOGGER.setLevel(logging.INFO)', "base_folder = os.getcwd()\nsource_folder = base_folder + '/healthlake/'\napple_health_files = glob.glob(source_folder + '*.json')\n\n\ndf_raw = pd.DataFrame()\n\nimport json\nwith open('config/event_types.json', 'r') as f:\n    EVENT_TYPES = json.load(f)\n    \nwith open('config/units.json', 'r') as f: \n    UNITS = json.load(f)\n\nCOLUMN_EVENT_TYPES = dict((col_name, types

In [205]:
df_sleep = df_ahc[df_ahc["name"].isin(["sleep_analysis"])]
sleep_data = df_sleep[["asleep", "inBed", "inBedStart", "dates"]].reset_index(
    drop=True
)

# convert inBedStart to 12 hour time
sleep_data["inBedStartTime"] = sleep_data.apply(
    lambda row: convert_to_12_hr(row["inBedStart"]), axis=1
)

## Lambda functions (apply functions by rows)
df_sleep_data = pd.melt(
    sleep_data, id_vars=["dates"], value_vars=["asleep", "inBed", "inBedStartTime"]
).rename(columns={"variable": "name", "value": "qty"})

# merge back to original data
df_ahc = pd.concat([df_ahc, df_sleep_data])

In [207]:
ahc[['dates', 'name', 'qty']]

Unnamed: 0,dates,name,qty
0,2022-09-10,active_energy,3039.918672
1,2022-09-11,active_energy,3553.969096
2,2022-09-12,active_energy,3627.75812
3,2022-09-13,active_energy,3750.78864
4,2022-09-14,active_energy,4264.282592
...,...,...,...
16,2022-09-12,inBedStartTime,08:01 PM
17,2022-09-13,inBedStartTime,08:36 PM
18,2022-09-13,inBedStartTime,09:15 PM
19,2022-09-15,inBedStartTime,09:31 PM


In [55]:
ics_file_name = 'outputs/apple-health.ics'
with open(ics_file_name, 'w') as f:
    f.write(str(c))
    f.close()



In [102]:
def convert_kj_to_cal(row, new_name):
    """Converts kj to calories"""
    row_dict = row.to_dict()
    calorie_value = int(row['qty']/4)
    row_dict['qty'] = calorie_value
    row_dict['name'] = new_name
    row_dict['units'] = 'kcal'
    
    return pd.DataFrame(row_dict, index=[0])


In [280]:
df_ahc = df_raw.copy()

# define transformations to go from df_raw to df_ahc (apple-health-calendar)
# cleaning values
df_ahc["dates"] = pd.to_datetime(df_ahc["date"]).dt.date
df_ahc["qty"] = df_ahc["qty"].fillna(df_ahc["asleep"])

# create calories
logging.info((df_ahc[df_ahc["name"] == "active_energy"]))
active_energy_rows = df_ahc[df_ahc["name"] == "active_energy"][analysis_cols]

for _, row in active_energy_rows.iterrows():
    df_row = convert_kj_to_cal(row, "calories_burnt")
    df_ahc = pd.concat([df_ahc, df_row])

# unpivot sleep columns into its own
df_sleep = df_ahc[df_ahc["name"].isin(["sleep_analysis"])]
sleep_data = df_sleep[["asleep", "inBed", "inBedStart", "dates"]].reset_index(
    drop=True
)

# convert inBedStart to 12 hour time into new column inBedStartTime
sleep_data["inBedStartTime"] = sleep_data.apply(
    lambda row: convert_to_12_hr(row["inBedStart"]), axis=1
)

# Lambda functions (apply functions by rows)
df_sleep_data = pd.melt(
    sleep_data, id_vars=["dates"], value_vars=["asleep", "inBed", "inBedStartTime"]
).rename(columns={"variable": "name", "value": "qty"})

# merge back to original data
df_ahc = pd.concat([df_ahc, df_sleep_data])

# filter out values
df_ahc = df_ahc[df_ahc["name"].isin(raw_data_columns)][analysis_cols].reset_index(
    drop=True
)

# CHECKPOINT
df_ahc.to_csv("outputs/transformations/ahc.csv", index=False)

c = Calendar()
available_dates = df_ahc['dates'].unique()

weekly_events = []
for date in available_dates:
    LOGGER.info("Generating events for %s", date)
    daily_stats = df_ahc[df_ahc["dates"] == date]
    daily_events = create_day_events(stats=daily_stats, event_date=date)
    weekly_events.append(daily_events)

🧠 0 | 🚴‍♂️ 1
🧠 0 | 🚴‍♂️ 2
🧠 0 | 🚴‍♂️ 1
🧠 0 | 🚴‍♂️ 1
🧠 0 | 🚴‍♂️ 2
🧠 0 | 🚴‍♂️ 1
🧠 0 | 🚴‍♂️ 0


NameError: name 'df_ahc' is not defined

In [117]:
with open('apple_health.ics', 'w') as f:
    f.write(str(c))
    f.close()