In [119]:
from pathlib import Path

import pandas as pd

DATA_DIR = Path("./pistachio_1_data")
DEVICE_DATA_DIR = DATA_DIR / "SMRawDec27"
dyad_code = "003"


def get_child_garmin_df(dyad: str, csv_name: str) -> pd.DataFrame:
    # Example name: pistachio003_c_garminActivity_20220325_20220721.csv
    glob = f"*_garmin{csv_name}_*.csv"  # Need flanking underscores bc some names (e.g., garminActivity) are prefixes
    child = f"{dyad}_C"
    [csv] = list((DEVICE_DATA_DIR / child / "Garmin").glob(glob))
    return pd.read_csv(csv)


hr_df = get_child_garmin_df(dyad_code, "HeartRate")
hr_df["ActivityTime"] = pd.to_datetime(
    hr_df["ActivityTime"], format="%m/%d/%Y %I:%M:%S %p"
)

sleep_df = get_child_garmin_df(dyad_code, "Sleep")
sleep_df["CalendarDate"] = pd.to_datetime(sleep_df["CalendarDate"], format="%m/%d/%Y")

In [None]:
from datetime import datetime, time

import numpy as np
import pandas as pd

MS_IN_MINUTE = 60_000


def process_hr_df(
    hr_df: pd.DataFrame,
    timestamp_col: str = "ActivityTime",
    hr_col: str = "HeartRate",
) -> pd.DataFrame:
    df = hr_df.copy()
    df[timestamp_col] = pd.to_datetime(df[timestamp_col])
    df.set_index(timestamp_col, inplace=True)
    return df


def daily_hrv_sdann_sleep(
    hr_df: pd.DataFrame,
    sleep_df: pd.DataFrame,
    default_sleep_end: time = time(7, 0, 0),
    default_sleep_duration: pd.Timedelta = pd.Timedelta(hours=9),
) -> pd.DataFrame:
    """
    :param: hr_df: pd.DataFrame with datetime index and "HeartRate" (bpm) column
    :param sleep_df: pd.DataFrame with "ActivityDateTime", "CalendarDate", "DeepSleepDurationInSeconds", "LightSleepDurationInSeconds", "AwakeDurationInSeconds",
        "UnmeasurableSleepDurationInSeconds", "RemSleepInSeconds" columns
    """

    hr_df = process_hr_df(hr_df)
    sleep_start_and_end_df = sleep_start_and_end_times(sleep_df)

    # For each interval, mask hr_df and compute SDANN
    def compute_sdann(row):
        # row.name is the value of the index column, i.e., the date
        default_end = datetime.combine(row.name, default_sleep_end)
        default_start = default_end - default_sleep_duration

        sleep_start = (
            sleep_start_and_end_df.loc[row.name, "sleep_start"]
            if row.name in sleep_start_and_end_df.index
            else default_start
        )
        sleep_end = (
            sleep_start_and_end_df.loc[row.name, "sleep_end"]
            if row.name in sleep_start_and_end_df.index
            else default_end
        )
        mask = (hr_df.index >= sleep_start) & (hr_df.index <= sleep_end)
        hr_slice = hr_df.loc[mask].copy()
        if hr_slice.empty:
            return np.nan
        hr_slice["nn_interval"] = MS_IN_MINUTE / hr_slice["HeartRate"]
        return sdann_from_hr(hr_slice)

    hrv = hr_df.groupby(hr_df.index.date).apply(compute_sdann)
    return hrv


def sdann_from_hr(hr_df: pd.DataFrame) -> float:
    """
    :param: hr_df: pd.DataFrame with timestamp index and "HeartRate" (bpm) column
    """
    nn_intervals = MS_IN_MINUTE / hr_df["HeartRate"]
    five_min_averages = nn_intervals.resample("5min").mean()
    # ddof=1 for sample standard deviation
    sdann = np.std(five_min_averages.dropna(), ddof=1)
    return sdann


def sleep_start_and_end_times(sleep_df: pd.DataFrame) -> pd.DataFrame:
    """
    :param sleep_df: pd.DataFrame with "ActivityDateTime", "CalendarDate", "DeepSleepDurationInSeconds", "LightSleepDurationInSeconds", "AwakeDurationInSeconds",
        "UnmeasurableSleepDurationInSeconds", "RemSleepInSeconds" columns
    :return: pd.DataFrame with date index, "sleep_start", "sleep_end" columns
    """
    df = sleep_df.copy()

    df["sleep_start"] = pd.to_datetime(df["ActivityDateTime"])
    duration_cols = [
        "DeepSleepDurationInSeconds",
        "LightSleepDurationInSeconds",
        "AwakeDurationInSeconds",
        "UnmeasurableSleepDurationInSeconds",
        "RemSleepInSeconds",
    ]
    df["total_sleep_seconds"] = df[duration_cols].sum(axis=1)
    df["sleep_end"] = df["sleep_start"] + pd.to_timedelta(
        df["total_sleep_seconds"], unit="s"
    )
    df["CalendarDate"] = df["CalendarDate"].dt.date
    result = df.set_index("CalendarDate")[["sleep_start", "sleep_end"]]
    return result

In [125]:
daily_hrv_sdann_sleep(hr_df, sleep_df)

  df["sleep_start"] = pd.to_datetime(df["ActivityDateTime"])


2022-03-25          NaN
2022-03-26    55.223556
2022-03-27    97.755529
2022-03-28    46.021888
2022-03-29    58.397650
                ...    
2022-06-28    29.527062
2022-06-29    77.120664
2022-06-30    75.895936
2022-07-01    54.457274
2022-07-02    17.458864
Length: 97, dtype: float64