usage:
1. export your health data:
   1. open the Health app on your iPhone
   2. tap your picture or initials in the top right of the Summary page
   3. tap Export All Health Data
   4. share the zip to your computer somehow
2. extract the `export.xml` file from the zip into `src/scripts` in this repo
3. run the notebook

In [None]:
from xml.etree.ElementTree import iterparse
import pandas as pd
from xml.etree.ElementTree import Element
from pathlib import Path
from casefy import casefy
import numpy as np
from matplotlib import pyplot as plt

In [None]:
SLEEP_TYPE = "HKCategoryTypeIdentifierSleepAnalysis"

In [None]:
plt.rcParams["figure.figsize"] = (14, 6)

In [None]:
def iter_health_data(
    file_path: str | Path,
    *,
    encoding: str = "utf-8",
    tags: set[str] | None = {"Record"},
    types: set[str] | None = None,
):
    root: Element
    element: Element

    with open(file_path, encoding=encoding) as f:
        iterator = iterparse(f, events=("start", "end"))
        _, root = next(iterator)

        for event, element in iterator:
            if (
                event == "end"
                and (tags is None or element.tag in tags)
                and (types is None or element.attrib["type"] in types)
            ):
                yield element.attrib
            
            # https://stackoverflow.com/a/326541
            root.clear()


In [None]:
df = pd.DataFrame(iter_health_data("export.xml", types={SLEEP_TYPE}))
df.head(3)

In [None]:
df.columns = [casefy.snakecase(c) for c in df.columns]
df.columns

In [None]:
df.creation_date = pd.to_datetime(df.creation_date)
df.start_date = pd.to_datetime(df.start_date)
df.end_date = pd.to_datetime(df.end_date)
df.dtypes

In [None]:
assert df[df.value != "HKCategoryValueSleepAnalysisInBed"].empty

In [None]:
df["time_in_bed"] = df.end_date - df.start_date
df.time_in_bed

In [None]:
df2 = df.groupby(df.creation_date.dt.date)[["time_in_bed"]].sum()
df2.index = pd.to_datetime(df2.index)
df2["sleep"] = df2.time_in_bed / np.timedelta64(1, "h")  # type: ignore
df2 = (
    df2.loc["2021-05-01":]
    .query("sleep >= 1")
    .resample("D")
    .sum(min_count=1)
    .fillna(np.nan)
)
df2.describe()

In [None]:
df3 = (
    df2.query("creation_date.dt.dayofweek < 5")
    .resample("D")
    .sum(min_count=1)
    .fillna(np.nan)
)

In [None]:
ax = df2.sleep.plot(label="hours in bed", style="#c2c2c2")
df2.sleep.rolling(7).mean().plot(label="rolling mean")
df3.sleep.rolling(7).mean().plot(label="rolling mean (weekdays)")

ax.legend()
ax.grid()
ax.set_xlabel("")

In [None]:
ax = df2.loc["2023-01-01":].sleep.plot(label="hours in bed", style="#c2c2c2")
df2.loc["2023-01-01":].sleep.rolling(7).mean().plot(label="rolling mean")
df3.loc["2023-01-01":].sleep.rolling(7).mean().plot(label="rolling mean (weekdays)")

ax.legend()
ax.grid()
ax.set_xlabel("")

In [None]:
ax = df2.loc["2023-01-01":].sleep.plot(label="hours in bed")
df3.loc["2023-01-01":].sleep.plot(label="hours in bed (weekdays)")

ax.legend()
ax.grid()
ax.set_xlabel("")

In [None]:
df4 = df2.loc["2023-01-01":]
df4 = df4[df4.sleep >= 10][["sleep"]]
assert isinstance(df4.index, pd.DatetimeIndex)
df4.groupby([df4.index.day_of_week, df4.index.day_name()]).count().droplevel(0, "index")