In [1]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import os
import glob

%matplotlib inline
#%matplotlib qt
mpl.rcParams['lines.linewidth'] = 0.91
plt.style.use('seaborn-v0_8-whitegrid')
%matplotlib qt

from avro.datafile import DataFileReader
from avro.io import DatumReader

from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, HoverTool, Slider, Select
from bokeh.layouts import gridplot
from bokeh.models import Range1d
from bokeh.io import export_png
from bokeh.models import DatetimeTickFormatter

## Verity Sense

In [2]:
import fastavro

def read_avro_veritysense(file_path, offset_vs = 946684800000000000+ 3600 * 1e9):
    """
    Reads an Avro file and returns the data as a pandas DataFrame.
    """
    with open(file_path, "rb") as avro_file:
        # Use fastavro.reader to read the Avro file
        reader = fastavro.reader(avro_file)
        # Convert the records to a list
        records = [record for record in reader]
       # to dataframe
        df = pd.DataFrame(records)
        df.index = pd.to_datetime(df["timestamp"] + offset_vs, unit="ns") 
        df.drop("timestamp", axis=1, inplace=True)
    return df / 1000

In [3]:
import re
from datetime import datetime

# Function to extract full datetime (including time) for sorting
def extract_datetime(filename):
    match = re.search(r'(\w{3}) (\w{3}) (\d{2}) (\d{2})-(\d{2})-(\d{2}) .* (\d{4})', filename)
    if match:
        weekday, month, day, hour, minute, second, year = match.groups()
        date_str = f"{day} {month} {year} {hour}:{minute}:{second}"
        date_obj = datetime.strptime(date_str, "%d %b %Y %H:%M:%S")  # Convert to datetime object
        return date_obj
    return datetime.max  # Default to a max value if parsing fails

In [None]:
data_path = "/Users/augenpro/Documents/Age-IT/data/Bronze/" # path to the folder containing the subjects
participants = sorted([p for p in os.listdir(data_path) if not p.startswith(".")]) # list of the participants
visit = "T0 (baseline)" # T0 (baseline), T1 (follow-up @ 6 months), T2 (follow-up @ 12 months)

participants = ["23483"]

sensor = "VeritySense"#, "GeneActivPolso", "GeneActivCaviglia", "RootiRx"]

for participant in participants:
    acc_df = pd.DataFrame()
    ppg_df = pd.DataFrame()
    # print(sensor)
    path = os.path.join(data_path, participant, visit, sensor)
    files_in_path = [f for f in os.listdir(path) if not f.startswith(".")]
    if len(files_in_path) <= 2: # "_" and "AVRO"
        continue

    print(f"**************** {participant} ****************")
    acc_path = os.path.join(path, "AVRO/acc")
    ppg_path = os.path.join(path, "AVRO/ppg")
    for f in sorted(os.listdir(acc_path), key=extract_datetime):
        current_acc = read_avro_veritysense(os.path.join(acc_path, f))
        acc_df = pd.concat([acc_df, current_acc])

    for f in sorted(os.listdir(ppg_path), key=extract_datetime):
        current_ppg = read_avro_veritysense(os.path.join(ppg_path, f))
        ppg_df = pd.concat([ppg_df, current_ppg])

    # Check for disconnections
    t_acc_rec = acc_df.index[acc_df.index.to_series().diff().dt.total_seconds() > 0.5]
    t_acc_disc = acc_df.index[np.where(acc_df.index.to_series().diff().dt.total_seconds() > 0.5)[0]-1]
    t_disc_rec_acc = pd.DataFrame({"disc": t_acc_disc, "rec": t_acc_rec})
    total_duration_acc = acc_df.index[-1] - acc_df.index[0]
    disconnetions_duration_acc = t_disc_rec_acc["rec"] - t_disc_rec_acc["disc"]
    effective_duration_acc = total_duration_acc - disconnetions_duration_acc.sum()
    if len(t_acc_disc) > 0:
        print("\n ***** Disconnections in ACC *****")
        # for i in range(len(t_acc_disc)):
        #     print(f"Disconnection {i+1}:")
        #     print(f"Start: {t_acc_disc[i].strftime('%Y-%m-%d %H:%M:%S')}")
        #     print(f"End: {t_acc_rec[i].strftime('%Y-%m-%d %H:%M:%S')}")
        #     print(f"Duration: {str(t_acc_rec[i] - t_acc_disc[i]).split(' ')[2][3:5]} min and {str(t_acc_rec[i] - t_acc_disc[i]).split(' ')[2][6:8]} s")
        print("Number of disconnections: " + str(len(t_acc_disc)))
        print("Total duration of disconnections: " + str(np.sum([t_acc_rec[i] - t_acc_disc[i] for i in range(len(t_acc_disc))])).split(' ')[2][:8] + " (hours, minutes and seconds)")
        print(f"ACC effective duration: {effective_duration_acc}".split(".")[0])
    # Fill disconnection portions with NaNs and linearly interpolate
    for start, end in zip(t_acc_disc, t_acc_rec):
        acc_df.loc[start:end] = np.nan
    # acc_df = acc_df.interpolate(method='time')

    t_ppg_rec = ppg_df.index[ppg_df.index.to_series().diff().dt.total_seconds() > 1]
    t_ppg_disc = ppg_df.index[np.where(ppg_df.index.to_series().diff().dt.total_seconds() > 1)[0]-1]
    t_disc_rec_ppg = pd.DataFrame({"disc": t_ppg_disc, "rec": t_ppg_rec})
    total_duration_ppg = ppg_df.index[-1] - ppg_df.index[0]
    disconnetions_duration_ppg = t_disc_rec_ppg["rec"] - t_disc_rec_ppg["disc"]
    effective_duration_ppg = total_duration_ppg - disconnetions_duration_ppg.sum()
    if len(t_ppg_disc) > 0:
        print("\n ***** Disconnections in PPG *****")
        # for i in range(len(t_ppg_disc)):
        #     print(f"Disconnection {i+1}:")
        #     print(f"Start: {t_ppg_disc[i].strftime('%Y-%m-%d %H:%M:%S')}")
        #     print(f"End: {t_ppg_rec[i].strftime('%Y-%m-%d %H:%M:%S')}")
        #     print(f"Duration: {str(t_ppg_rec[i] - t_ppg_disc[i]).split(' ')[2][3:5]} min and {str(t_ppg_rec[i] - t_ppg_disc[i]).split(' ')[2][6:8]} s")
        print("Number of disconnections: " + str(len(t_ppg_disc)))
        print("Total duration of disconnections: " + str(np.sum([t_ppg_rec[i] - t_ppg_disc[i] for i in range(len(t_ppg_disc))])).split(' ')[2][:8] + " (hours, minutes and seconds)")
        print(f"PPG effective duration: {effective_duration_ppg}".split(".")[0])
        print("")
    # Fill disconnection portions with NaNs and linearly interpolate
    for start, end in zip(t_ppg_disc, t_ppg_rec):
        ppg_df.loc[start:end] = np.nan
    # ppg_df = ppg_df.interpolate(method='time')

**************** 23483 ****************


In [None]:
data_path = "/Users/augenpro/Documents/Age-IT/data/Bronze/" # path to the folder containing the subjects
participants = sorted([p for p in os.listdir(data_path) if not p.startswith(".")]) # list of the participants
visit = "T0 (baseline)" # T0 (baseline), T1 (follow-up @ 6 months), T2 (follow-up @ 12 months)

participants = ["23483"]

sensor = "VeritySense"#, "GeneActivPolso", "GeneActivCaviglia", "RootiRx"]

for participant in participants:
    acc_df = pd.DataFrame()
    ppg_df = pd.DataFrame()
    # print(sensor)
    path = os.path.join(data_path, participant, visit, sensor)
    files_in_path = [f for f in os.listdir(path) if not f.startswith(".")]
    if len(files_in_path) <= 2: # "_" and "AVRO"
        continue

    print(f"**************** {participant} ****************")
    acc_path = os.path.join(path, "AVRO/acc")
    ppg_path = os.path.join(path, "AVRO/ppg")
    for f in sorted(os.listdir(acc_path), key=extract_datetime):
        current_acc = read_avro_veritysense(os.path.join(acc_path, f))
        acc_df = pd.concat([acc_df, current_acc])

    for f in sorted(os.listdir(ppg_path), key=extract_datetime):
        current_ppg = read_avro_veritysense(os.path.join(ppg_path, f))
        ppg_df = pd.concat([ppg_df, current_ppg])

    # Check for disconnections
    t_acc_rec = acc_df.index[acc_df.index.to_series().diff().dt.total_seconds() > 0.5]
    t_acc_disc = acc_df.index[np.where(acc_df.index.to_series().diff().dt.total_seconds() > 0.5)[0]-1]
    t_disc_rec_acc = pd.DataFrame({"disc": t_acc_disc, "rec": t_acc_rec})

    good_portions_acc = pd.DataFrame(columns=["start", "end"])
    good_portions_acc["start"] = t_disc_rec_acc["rec"].iloc[:-1].reset_index(drop=True)
    good_portions_acc["end"] = t_disc_rec_acc["disc"].iloc[1:].reset_index(drop=True)
    first_disconnection_acc = t_disc_rec_acc["disc"].iloc[0]
    last_reconnection_acc = t_disc_rec_acc["rec"].iloc[-1]

    # acc_list contains the ACC signal divided into the potions with signal (between disconnetions)
    acc_list = []
    acc_list.append(acc_df.loc[:first_disconnection_acc])
    for i, portion in good_portions_acc.iterrows():
        acc_list.append(acc_df.loc[portion["start"]:portion["end"]])
    acc_list.append(acc_df.loc[last_reconnection_acc:])

    # PPG
    t_ppg_rec = ppg_df.index[ppg_df.index.to_series().diff().dt.total_seconds() > 1]
    t_ppg_disc = ppg_df.index[np.where(ppg_df.index.to_series().diff().dt.total_seconds() > 1)[0]-1]
    t_disc_rec_ppg = pd.DataFrame({"disc": t_ppg_disc, "rec": t_ppg_rec})

    good_portions_ppg = pd.DataFrame(columns=["start", "end"])
    good_portions_ppg["start"] = t_disc_rec_ppg["rec"].iloc[:-1].reset_index(drop=True)
    good_portions_ppg["end"] = t_disc_rec_ppg["disc"].iloc[1:].reset_index(drop=True)
    first_disconnection_ppg = t_disc_rec_ppg["disc"].iloc[0]
    last_reconnection_ppg = t_disc_rec_ppg["rec"].iloc[-1]

    # ppg_list contains the PPG signal divided into the potions with signal (between disconnetions)
    ppg_list = []
    ppg_list.append(ppg_df.loc[:first_disconnection_ppg])
    for i, portion in good_portions_ppg.iterrows():
        ppg_list.append(ppg_df.loc[portion["start"]:portion["end"]])
    ppg_list.append(ppg_df.loc[last_reconnection_ppg:])

**************** 23483 ****************


In [5]:
from utils.resample_signal import apply_resample

# Loop across the good portions of acc and ppg, see if they overlap, and if so, resample acc to ppg
acc_list_beliefppg = []
ppg_list_beliefppg = []
for i in range(len(acc_list)):
    for j in range(len(ppg_list)):
        if acc_list[i].index[0] < ppg_list[j].index[-1] and acc_list[i].index[-1] > ppg_list[j].index[0]:
            # resample acc to ppg
            acc_resampled_df= pd.DataFrame()
            for acc_axis in ["x", "y", "z"]:
                t_acc_resampled, acc_resampled = apply_resample(
                    time = acc_list[i].index.astype(np.int64).to_numpy(), 
                    time_rs = ppg_list[j].index.astype(np.int64).to_numpy(),
                    data = acc_list[i][f"acc_{acc_axis}"].values
                    )
                acc_resampled_df[f"acc_{acc_axis}"] = acc_resampled[0]
            acc_resampled_df.index = t_acc_resampled
            acc_list_beliefppg.append(acc_resampled_df)
            ppg_list_beliefppg.append(ppg_list[j])

In [6]:
len(acc_list_beliefppg), len(ppg_list_beliefppg)

(134, 134)

### Prediction with BeliefPPG :)

In [7]:
from heart_rate.beliefppg.inference.inference import infer_hr

In [30]:
ppg_list_beliefppg[i].shape, acc_list_beliefppg[i].shape

((270, 4), (252, 3))

In [None]:
hr_all = []
time_hr_all = []
for i, (ppg, acc) in enumerate(zip(ppg_list_beliefppg, acc_list_beliefppg)): ##### Each portion has the same shape between ACC and PPG lesgooo
   ppg = ppg["ppg1"]

   if len(ppg) < 55*60*5: # 5 minutes
         continue
   if len(acc) < 55*60*5: # 5 minutes
         continue

   if ppg.shape[0] < acc.shape[0]:
       acc = acc.iloc[:ppg.shape[0]]
   elif ppg.shape[0] > acc.shape[0]:
       ppg = ppg.iloc[:acc.shape[0]]
   time = ppg.index # same as ppg.index
   hr, idxs = infer_hr(ppg=ppg.values.reshape(-1,1), ppg_freq=64, acc=acc.values, acc_freq=64)
   hr_all.append(hr)
   time_hr_all.append(time[idxs])



2025-02-22 00:33:39.725800: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence




In [40]:
# Concatenate all the portions
hr_belief = np.concatenate(hr_all)
t_hr_belief = pd.to_datetime(np.concatenate(time_hr_all))

# Convert to pandas Series
hr_belief_df = pd.Series(hr_belief, index=t_hr_belief)

# Save to CSV
save_data_path = "/Users/augenpro/Documents/Age-IT/"
hr_belief_df.to_csv(save_data_path + "hr_belief.csv")

In [41]:
hr_belief_df

2025-01-21 14:55:15.503013376    110.062828
2025-01-21 14:55:17.840497152    107.573189
2025-01-21 14:55:20.177974272    106.438690
2025-01-21 14:55:22.515440640    101.132141
2025-01-21 14:55:24.852782080     96.862183
                                    ...    
2025-01-28 14:26:24.879641600     91.913528
2025-01-28 14:26:27.215693056     91.505623
2025-01-28 14:26:29.551781120     93.739883
2025-01-28 14:26:31.887758080     94.178032
2025-01-28 14:26:34.223690240     91.478500
Length: 170644, dtype: float32

In [42]:
plt.figure(figsize=(15, 5))
plt.plot(hr_belief_df.resample("1min").mean(), label="Heart Rate Belief PPG")
plt.title("Heart Rate Belief PPG")
plt.xlabel("Time")
plt.ylabel("Heart Rate (bpm)")
plt.grid()
plt.show()

In [None]:
import subprocess
import os

hr_file_path = save_data_path
hr_file = os.path.join(hr_file_path, "hr_belief.pkl")
spt_file = os.path.join(hr_file_path, "SPT_window_GGIR.npy")

# This calls the script to plot the HR
script_path = os.path.abspath("../visualization/plot_hr.py")
bokeh_process = subprocess.Popen([
    "bokeh", "serve", "--show", script_path,
    "--args", "--hr_file", hr_file, "--spt_file", spt_file
])

To kill the bokeh process: 
 - open terminal
 - write kill -9 <process_id>, where process_id is the process ID of the bokeh server, that can be found in the cell above (Starting Bokeh server with process id: 2553)

## HRV Night

In [35]:
from utils.resample_signal import apply_resample
from utils.compute_acc_metrics import compute_acc_SMV

# Loop across the good portions of acc and ppg, see if they overlap, and if so, resample acc to ppg
acc_list_beliefppg = []
ppg_list_beliefppg = []
for i in range(len(acc_list)):
    acc_list[i].loc[:, "acc_SVM"] = compute_acc_SMV(acc_list[i])
    for j in range(len(ppg_list)):
        if acc_list[i].index[0] < ppg_list[j].index[-1] and acc_list[i].index[-1] > ppg_list[j].index[0]:
            # resample acc to ppg
            acc_resampled_df = pd.DataFrame()
            t_acc_resampled, acc_resampled = apply_resample(
                time = acc_list[i].index.astype(np.int64).to_numpy(), 
                time_rs = ppg_list[j].index.astype(np.int64).to_numpy(),
                data = acc_list[i]["acc_SVM"].values
                )
            acc_resampled_df["acc_SVM"] = acc_resampled[0]
            acc_resampled_df.index = pd.to_datetime(t_acc_resampled)
            acc_list_beliefppg.append(acc_resampled_df)
            ppg_list_beliefppg.append(ppg_list[j])

In [36]:
import neurokit2 as nk
from sleep.detect_acc_bursts import *
from heart_rate.ppg_beat_detection import MSPTDfast
from heart_rate.kubios import signal_fixpeaks
from heart_rate.heart_rate_fragmentation import compute_HRF

In [None]:
# Concatenate all the portions
ppg_hrv = pd.DataFrame()
acc_hrv = pd.DataFrame()

for i, (ppg, acc) in enumerate(zip(ppg_list_beliefppg, acc_list_beliefppg)):
    ppg = nk.ppg_clean(ppg["ppg1"].values, sampling_rate = 55)
    ppg = pd.Series(ppg, index=ppg_list_beliefppg[i].index)
    acc = acc["acc_SVM"]

    if len(ppg) < 55*60*5: # 5 minutes
        continue
    if len(acc) < 55*60*5: # 5 minutes
        continue

    if ppg.shape[0] < acc.shape[0]:
        acc = acc.iloc[:ppg.shape[0]]
    elif ppg.shape[0] > acc.shape[0]:
        ppg = ppg.iloc[:acc.shape[0]]

    ppg_hrv = pd.concat([ppg_hrv, ppg])
    acc_hrv = pd.concat([acc_hrv, acc])

  acc_hrv = pd.concat([acc_hrv, acc])


In [38]:
plt.figure(figsize=(15, 5))
plt.subplot(2, 1, 1)
plt.plot(ppg_hrv)
plt.title("PPG")
plt.xlabel("Time")
plt.ylabel("PPG")
plt.subplot(2, 1, 2, sharex = plt.subplot(2, 1, 1))
plt.plot(acc_hrv)
plt.title("ACC")
plt.xlabel("Time")
plt.ylabel("ACC")

Text(0, 0.5, 'ACC')

## Save sleep onset and offset

In [50]:
ageit_path = "/Users/augenpro/Documents/Age-IT/"
silver_layer_path = "/Users/augenpro/Documents/Age-IT/data/Silver/"
bronze_layer_path = "/Users/augenpro/Documents/Age-IT/data/Bronze/"

participants = sorted([p for p in os.listdir(silver_layer_path) if not p.startswith(".")]) # list of the participants
timeline = ["T0 (baseline)", "T1 (6 mesi)", "T2 (12 mesi)"]
visit = "T0 (baseline)"
sensors = ["GeneActivPolso", "GeneActivCaviglia"]
sensor = sensors[0]

lights_off_events = ["1) A letto", "1) A letto, luci spente"]

# participants = ["23483"]

GGIR_output_dir = "output_icareit"

diary_df = pd.DataFrame()

for participant in participants:

    print(f"***** {participant} *****")

    ### Load the GENEActiv data (for debugging purposes only) ######
    # files = os.listdir(os.path.join(bronze_layer_path, participant, visit, sensor))
    # for f in files:
    #     if f.endswith(".parquet"):
    #         acc_gen = pd.read_parquet(os.path.join(bronze_layer_path, participant, visit, sensor, f))

    ###### Load the GGIR output data ######
    HDCZA_SPT = [] # start and end of SPT based on HDCZA (vanhees2018) 
    ggir_output_path = os.path.join(silver_layer_path, participant, visit, sensor, GGIR_output_dir)
    if not os.path.exists(ggir_output_path): # Skip participants with no Data
        continue
    ggir_part4_output = pd.read_csv(ggir_output_path + "/results/QC/part4_nightsummary_sleep_full.csv")
    for i, day_row in ggir_part4_output.iterrows():
        # Stupid thing to get the correct datetime for segmenting signals into day and night (but no alternatives I guess)
        if day_row["sleeponset_ts"][0] == "0": # if the first digit is after midnight (00, 01, 02, ...)
            sleep_onset = pd.to_datetime(str(pd.to_datetime(day_row["calendar_date"]).date() + pd.Timedelta("1d")) + " " + day_row["sleeponset_ts"])
        else:
            sleep_onset = pd.to_datetime(str(pd.to_datetime(day_row["calendar_date"]).date()) + " " + day_row["sleeponset_ts"])
        wake_onset = pd.to_datetime(str(pd.to_datetime(day_row["calendar_date"]).date() + pd.Timedelta("1d")) + " " + day_row["wakeup_ts"])
        day = pd.to_datetime(day_row["calendar_date"]).date()
        HDCZA_SPT.append((sleep_onset, wake_onset, day))
    HDCZA_SPT = pd.DataFrame(HDCZA_SPT, columns = ["sleep_onset_HDCZA", "wake_onset_HDCZA", "calendar_day"])

    ###### Load the diary data ######
    diary_raw_path = os.path.join(bronze_layer_path, participant, visit, "Diario", f"{participant}_{visit.split(" ")[0]}_Diario.xlsx")
    if not os.path.exists(diary_raw_path): # Skip participants with no Data
        continue
    diary_raw = pd.read_excel(diary_raw_path, sheet_name="Ore")
    diary_raw = diary_raw.dropna(how = "all") # Drop empty rows
    diary_raw = diary_raw[(diary_raw["Evento"].apply(lambda x: x[:1]) == "1") | (diary_raw["Evento"].apply(lambda x: x[:1]) == "2")].reset_index(drop = True)
    diary_raw["Data"] = diary_raw["Data"].apply(lambda x: str(x).split(" ")[0])
    diary_raw["Ora inizio"] = diary_raw["Ora inizio"].apply(lambda x: str(x))
    sleep_onset_diary = pd.to_datetime(diary_raw[diary_raw["Evento"].apply(lambda x: x[:1]) == "1"]["Data"] + " " + diary_raw[diary_raw["Evento"].apply(lambda x: x[:1]) == "1"]["Ora inizio"]).dropna().reset_index(drop = True)
    wake_onset_diary = pd.to_datetime(diary_raw[diary_raw["Evento"].apply(lambda x: x[:1]) == "2"]["Data"] + " " + diary_raw[diary_raw["Evento"].apply(lambda x: x[:1]) == "2"]["Ora inizio"]).dropna().reset_index(drop = True)
    diary_SPT = pd.DataFrame({"sleep_onset_diary": sleep_onset_diary, "wake_onset_diary": wake_onset_diary})
    diary_SPT["calendar_day"] = diary_SPT["sleep_onset_diary"].apply(lambda x: (x - pd.Timedelta(days=1)).date() if x.hour < 12 else x.date())
    # Fill the missing diary data
    for i, row in HDCZA_SPT.iterrows():
        if row["calendar_day"] not in diary_SPT["calendar_day"].values:
            diary_SPT = pd.concat([diary_SPT, pd.DataFrame({"sleep_onset_diary": [np.nan], "wake_onset_diary": [np.nan], "calendar_day": [row["calendar_day"]]})], ignore_index = True)
    diary_SPT = diary_SPT.sort_values(by = "calendar_day").reset_index(drop = True)
    HDCZA_SPT = HDCZA_SPT.sort_values(by = "calendar_day").reset_index(drop = True)

    # Check the overlap between the HDCZA and diary SPT
    overlap = []
    for i, row in HDCZA_SPT.iterrows():
        if row["calendar_day"] in diary_SPT["calendar_day"].values:
            diary_row = diary_SPT[diary_SPT["calendar_day"] == row["calendar_day"]].iloc[0]
            overlap.append((row["sleep_onset_HDCZA"], row["wake_onset_HDCZA"], diary_row["sleep_onset_diary"], diary_row["wake_onset_diary"], row["calendar_day"]))
    SPT_HDCZA_and_diary = pd.DataFrame(overlap, columns = ["sleep_onset_HDCZA", "wake_onset_HDCZA", "sleep_onset_diary", "wake_onset_diary", "calendar_day"])
    SPT_HDCZA_and_diary["diff_sleep_onset"] = (SPT_HDCZA_and_diary["sleep_onset_HDCZA"] - SPT_HDCZA_and_diary["sleep_onset_diary"]).dt.total_seconds() / 3600 # in hours
    SPT_HDCZA_and_diary["diff_wake_onset"] = (SPT_HDCZA_and_diary["wake_onset_HDCZA"] - SPT_HDCZA_and_diary["wake_onset_diary"]).dt.total_seconds() / 3600

    # Flag if the difference between the HDCZA and diary SPT is more than 30 minutes
    SPT_HDCZA_and_diary["flag_sleep_onset"] = np.where(np.abs(SPT_HDCZA_and_diary["diff_sleep_onset"]) > 30, 1, 0)
    SPT_HDCZA_and_diary["flag_wake_onset"] = np.where(np.abs(SPT_HDCZA_and_diary["diff_wake_onset"]) > 30, 1, 0)
    SPT_HDCZA_and_diary["participant"] = participant

    # Save to csv inside the participant folder
    SPT_HDCZA_and_diary.to_csv(os.path.join(silver_layer_path, participant, visit, sensor, "SPT_HDCZA_and_diary.csv"))

    diary_df = pd.concat([diary_df, SPT_HDCZA_and_diary])

    ####### FIGURE AND PDF GENERATION #######

# Drop data of participant 78936
# diary_df = diary_df[diary_df["participant"] != "78936"]  

***** 08623 *****
***** 08667 *****
***** 14219 *****
***** 20603 *****
***** 23483 *****
***** 36644 *****
***** 36765 *****
***** 36920 *****
***** 58319 *****
***** 59794 *****
***** 65381 *****
***** 68503 *****
***** 73496 *****
***** 74003 *****
***** 74913 *****
***** 78936 *****
***** 86693 *****
***** 97060 *****


  diary_SPT = pd.concat([diary_SPT, pd.DataFrame({"sleep_onset_diary": [np.nan], "wake_onset_diary": [np.nan], "calendar_day": [row["calendar_day"]]})], ignore_index = True)
  sleep_onset_diary = pd.to_datetime(diary_raw[diary_raw["Evento"].apply(lambda x: x[:1]) == "1"]["Data"] + " " + diary_raw[diary_raw["Evento"].apply(lambda x: x[:1]) == "1"]["Ora inizio"]).dropna().reset_index(drop = True)
  wake_onset_diary = pd.to_datetime(diary_raw[diary_raw["Evento"].apply(lambda x: x[:1]) == "2"]["Data"] + " " + diary_raw[diary_raw["Evento"].apply(lambda x: x[:1]) == "2"]["Ora inizio"]).dropna().reset_index(drop = True)
  diary_SPT = pd.concat([diary_SPT, pd.DataFrame({"sleep_onset_diary": [np.nan], "wake_onset_diary": [np.nan], "calendar_day": [row["calendar_day"]]})], ignore_index = True)
  diary_SPT = pd.concat([diary_SPT, pd.DataFrame({"sleep_onset_diary": [np.nan], "wake_onset_diary": [np.nan], "calendar_day": [row["calendar_day"]]})], ignore_index = True)
  diary_SPT = pd.concat([diary_

In [10]:
diary_sub = diary_df[diary_df["participant"] == "23483"]

In [11]:
nights = diary_sub[["sleep_onset_HDCZA", "wake_onset_HDCZA"]].values
nights

array([['2025-01-21T22:48:20.000000000', '2025-01-22T07:19:20.000000000'],
       ['2025-01-22T21:52:50.000000000', '2025-01-23T07:22:20.000000000'],
       ['2025-01-23T21:37:20.000000000', '2025-01-24T07:16:50.000000000'],
       ['2025-01-25T00:50:30.000000000', '2025-01-25T08:08:05.000000000'],
       ['2025-01-25T17:24:00.000000000', '2025-01-26T08:40:30.000000000'],
       ['2025-01-26T20:45:05.000000000', '2025-01-27T07:26:15.000000000'],
       ['2025-01-27T22:59:15.000000000', '2025-01-28T07:22:00.000000000']],
      dtype='datetime64[ns]')

In [18]:
plt.figure(figsize=(15, 5))
plt.plot(ppg_hrv)
for night in nights:
    plt.axvline(night[0], color="red", linestyle="--")
    plt.axvline(night[1], color="red", linestyle="--")
plt.title("ACC")

Text(0.5, 1.0, 'ACC')

In [None]:
threshold_bursts = 35/1000 # threshold for detecting bursts in mg (validated)
window_length = pd.Timedelta("5 min")  # window length
window_step = pd.Timedelta("1 min")  # window step

HRV = []  # Reset HRV storage

ibi_quiet_all = []

ppg_hrv.loc[pd.Timestamp("2025-01-26 21:00:00") : pd.Timestamp("2025-01-26 22:00:00")] = np.nan
acc_hrv.loc[pd.Timestamp("2025-01-26 21:00:00") : pd.Timestamp("2025-01-26 22:00:00")] = np.nan

for i, (start_sleep, end_sleep) in enumerate(nights):  # for each night

    print(start_sleep, end_sleep)

    # acc_night = compute_acc_SMV(a.loc[start_sleep:end_sleep])
    acc_night = acc_hrv.loc[start_sleep:end_sleep]
    ppg_night = ppg_hrv.loc[start_sleep:end_sleep]

    # Because of the disconnections, it's possible that no or little data is available for the night
    if len(acc_night) < 55*60*5: # 5 minutes
        continue
    if len(ppg_night) < 55*60*5: # 5 minutes
        continue

    # Detect wrist accelerometer bursts
    bursts = detect_bursts(acc_night, sampling_rate=55, alfa=threshold_bursts)

    # Extract quiet periods (no movement of the wrist)
    quiet_periods = pd.DataFrame()
    quiet_periods["start"] = bursts["end"].iloc[:-1].reset_index(drop=True)
    quiet_periods["end"] = bursts["start"].iloc[1:].reset_index(drop=True)

    for _, quiet_period in quiet_periods.iterrows():  # for each quiet period

        duration_quiet_period = quiet_period["end"] - quiet_period["start"]

        if duration_quiet_period < window_length:  # If the whole period is shorter than 5 min, skip it
            continue
            
        acc_quiet = acc_night.loc[quiet_period["start"]:quiet_period["end"]]
        ppg_quiet = ppg_night.loc[quiet_period["start"]:quiet_period["end"]]

         # Extract systolic peaks from the quiet PPG signal
        feet, peaks = MSPTDfast(ppg_quiet.values.flatten(), sampling_rate = 55)
        t_peaks = ppg_quiet.index.to_series().values[peaks]
        t_feet = ppg_quiet.index.to_series().values[feet]
        ibi = np.diff(t_peaks).astype('timedelta64[ns]').astype('float64') / 1e9  # seconds
        ibi = np.insert(ibi, 0, np.mean(ibi[1:10]), axis=0)  # Set first value as mean of next 10
        ibi = pd.Series(ibi, index=t_peaks)

        # Kubios artifact correction
        artifacts, env_diff_corrected = signal_fixpeaks(ibi.values, 64, iterative=False)
        artifacts_all = np.concatenate((artifacts["ectopic"], artifacts["missed"], artifacts["extra"], artifacts["longshort"]))
        ibi[ibi.index[artifacts_all.astype(int)]] = np.nan
        ibi_clean = ibi.interpolate(method="linear")

        # Generate overlapping windows of 5 minutes with 30-second overlap
        current_start = quiet_period["start"]
        
        # For each window
        while current_start + window_length <= quiet_period["end"]:

            current_end = current_start + window_length

            ibi_window = ibi_clean.loc[current_start:current_end]

            # HRV Features
            ppi = ibi_window.values * 1000  # Convert to ms
            diff_ppi = np.diff(ppi)

            rmssd = np.sqrt(np.mean(diff_ppi**2))  # RMSSD
            sdnn = np.std(ppi, ddof=1)  # SDNN
            PIP = compute_HRF(ppi)  # Custom HRF computation

            HRV.append({
                "night": i+1,
                "time": current_start + window_length / 2,
                "rmssd": rmssd, 
                "sdnn": sdnn, 
                "PIP": PIP
            })

            current_start += window_step  # Move to next overlapping window

        ibi_quiet_all.append(ibi_clean)

HRV_df = pd.DataFrame(HRV)
ibi_quiet_df = pd.concat(ibi_quiet_all)

2025-01-21T22:48:20.000000000 2025-01-22T07:19:20.000000000


In [30]:
# Extract the mean HRV features for each night
HRV_df_mean = HRV_df.groupby("night").mean().round(2)
HRV_df_std = HRV_df.groupby("night").std().round(2)
HRV_df_std

HRV_df_nightly = pd.concat([HRV_df_mean, HRV_df_std], axis=1, keys=["mean", "std"])
HRV_df_nightly

Unnamed: 0_level_0,mean,mean,mean,mean,std,std,std,std
Unnamed: 0_level_1,time,rmssd,sdnn,PIP,time,rmssd,sdnn,PIP
night,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
1,2025-01-22 02:44:18.007333888,32.27,35.15,0.61,0 days 02:12:19.132156696,14.47,10.23,0.04
3,2025-01-24 03:11:42.227953664,25.89,29.19,0.61,0 days 02:07:54.038465420,9.93,8.55,0.04
4,2025-01-25 04:12:18.322620160,19.48,25.89,0.62,0 days 01:56:34.060829021,2.94,7.5,0.04
5,2025-01-26 03:46:07.803227392,20.7,24.24,0.63,0 days 01:57:05.987944478,3.25,4.79,0.03
6,2025-01-27 04:37:37.950905344,48.51,41.8,0.62,0 days 01:26:21.799805066,19.58,12.56,0.04
7,2025-01-28 03:47:23.024957952,43.35,37.11,0.64,0 days 01:41:49.956967499,18.66,12.0,0.04


In [None]:
save_data_path = "/Users/augenpro/Documents/Age-IT/"

HRV_df.to_csv(save_data_path + "HRV_df.csv")
ibi_quiet_df.to_csv(save_data_path + "ibi_quiet_df.csv")

In [16]:
save_data_path = "/Users/augenpro/Documents/Age-IT/"

HRV_df = pd.read_csv(save_data_path + "HRV_df.csv")
HRV_df.index = pd.to_datetime(HRV_df["time"])
ibi_quiet_df = pd.read_csv(save_data_path + "ibi_quiet_df.csv", index_col=0)

In [22]:
from visualization.plot_HRV import plot_HRV

# # Decide whether to plot in notebook or in a separate window
from bokeh.plotting import output_notebook
output_notebook()

plot_HRV(ibi_quiet_df, HRV_df, hrv_metric="rmssd")

Unnamed: 0_level_0,time,day,time.1,rmssd,sdnn,PIP
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2025-01-21 22:51:28.491656448,2025-01-21 22:51:28.491656448,1,2025-01-21 22:51:28.491656448,31.417001,32.888467,0.557632
2025-01-21 22:52:28.491656448,2025-01-21 22:52:28.491656448,1,2025-01-21 22:52:28.491656448,32.207848,32.362180,0.554180
2025-01-21 22:53:28.491656448,2025-01-21 22:53:28.491656448,1,2025-01-21 22:53:28.491656448,32.608418,33.195948,0.580745
2025-01-21 22:54:28.491656448,2025-01-21 22:54:28.491656448,1,2025-01-21 22:54:28.491656448,30.298551,28.806778,0.588785
2025-01-21 23:10:29.784238080,2025-01-21 23:10:29.784238080,1,2025-01-21 23:10:29.784238080,24.893302,45.099106,0.507599
...,...,...,...,...,...,...
2025-01-28 07:08:35.713797120,2025-01-28 07:08:35.713797120,7,2025-01-28 07:08:35.713797120,59.357741,47.587380,0.629179
2025-01-28 07:09:35.713797120,2025-01-28 07:09:35.713797120,7,2025-01-28 07:09:35.713797120,57.142175,47.344066,0.631420
2025-01-28 07:10:35.713797120,2025-01-28 07:10:35.713797120,7,2025-01-28 07:10:35.713797120,51.087081,46.841885,0.611940
2025-01-28 07:11:35.713797120,2025-01-28 07:11:35.713797120,7,2025-01-28 07:11:35.713797120,44.988880,49.837794,0.578171


# All sub

In [4]:
import neurokit2 as nk
from utils.compute_acc_metrics import compute_acc_SMV
from sleep.detect_acc_bursts import *
from heart_rate.ppg_beat_detection import MSPTDfast
from heart_rate.kubios import signal_fixpeaks
from heart_rate.heart_rate_fragmentation import compute_HRF

In [18]:
import warnings
warnings.filterwarnings("ignore")

In [16]:
import seaborn as sns
sns.set_context("talk")

# Load and plot beliefppg HR for each participant
data_path = "/Users/augenpro/Documents/Age-IT/data/Bronze/" # path to the folder containing the subjects
participants = sorted([p for p in os.listdir(data_path) if not p.startswith(".")]) # list of the participants
visit = "T0 (baseline)" # T0 (baseline), T1 (follow-up @ 6 months), T2 (follow-up @ 12 months)

# participants = ["23483"]

sensor = "VeritySense"#, "GeneActivPolso", "GeneActivCaviglia", "RootiRx"]
save_data_path_silver = "/Users/augenpro/Documents/Age-IT/data/Silver"

for participant in participants:

    hr_belief_path = save_data_path_silver + f"/{participant}/{visit}/{sensor}"
    if len(os.listdir(hr_belief_path)) == 0:
        continue

    hr_belief_df = pd.read_pickle(save_data_path_silver + f"/{participant}/{visit}/{sensor}/hr_belief.pkl")

    plt.figure(figsize=(15, 5))
    plt.plot(hr_belief_df.resample("10min").mean(), '-o', linewidth=1)
    plt.title("Participant: " + participant)
    plt.xlabel("Time")
    plt.ylabel("Heart Rate (bpm)")

In [8]:
from utils.resample_signal import apply_resample
from heart_rate.beliefppg.inference.inference import infer_hr
from nonwear.vanhees2013 import vanhees_nonwear

data_path = "/Users/augenpro/Documents/Age-IT/data/Bronze/" # path to the folder containing the subjects
participants = sorted([p for p in os.listdir(data_path) if not p.startswith(".")]) # list of the participants
visit = "T0 (baseline)" # T0 (baseline), T1 (follow-up @ 6 months), T2 (follow-up @ 12 months)

# participants = ["23483"]

sensor = "VeritySense"#, "GeneActivPolso", "GeneActivCaviglia", "RootiRx"]
save_data_path_silver = "/Users/augenpro/Documents/Age-IT/data/Silver"

threshold_bursts = 35/1000 # threshold for detecting bursts in mg (validated)
window_length = pd.Timedelta("5 min")  # window length
window_step = pd.Timedelta("1 min")  # window step

for participant in participants:
    acc_df = pd.DataFrame()
    ppg_df = pd.DataFrame()
    # print(sensor)
    path = os.path.join(data_path, participant, visit, sensor)
    files_in_path = [f for f in os.listdir(path) if not f.startswith(".")]
    if len(files_in_path) <= 2: # "_" and "AVRO"
        continue

    print(f"**************** {participant} ****************")
    acc_path = os.path.join(path, "AVRO/acc")
    ppg_path = os.path.join(path, "AVRO/ppg")
    for f in sorted(os.listdir(acc_path), key=extract_datetime):
        current_acc = read_avro_veritysense(os.path.join(acc_path, f))
        acc_df = pd.concat([acc_df, current_acc])

    for f in sorted(os.listdir(ppg_path), key=extract_datetime):
        current_ppg = read_avro_veritysense(os.path.join(ppg_path, f))
        ppg_df = pd.concat([ppg_df, current_ppg])

    # Check for disconnections
    t_acc_rec = acc_df.index[acc_df.index.to_series().diff().dt.total_seconds() > 0.5]
    t_acc_disc = acc_df.index[np.where(acc_df.index.to_series().diff().dt.total_seconds() > 0.5)[0]-1]
    t_disc_rec_acc = pd.DataFrame({"disc": t_acc_disc, "rec": t_acc_rec})

    good_portions_acc = pd.DataFrame(columns=["start", "end"])
    good_portions_acc["start"] = t_disc_rec_acc["rec"].iloc[:-1].reset_index(drop=True)
    good_portions_acc["end"] = t_disc_rec_acc["disc"].iloc[1:].reset_index(drop=True)
    first_disconnection_acc = t_disc_rec_acc["disc"].iloc[0]
    last_reconnection_acc = t_disc_rec_acc["rec"].iloc[-1]

    # acc_list contains the ACC signal divided into the potions with signal (between disconnetions)
    acc_list = []
    acc_list.append(acc_df.loc[:first_disconnection_acc])
    for i, portion in good_portions_acc.iterrows():
        acc_list.append(acc_df.loc[portion["start"]:portion["end"]])
    acc_list.append(acc_df.loc[last_reconnection_acc:])

    # PPG
    t_ppg_rec = ppg_df.index[ppg_df.index.to_series().diff().dt.total_seconds() > 1]
    t_ppg_disc = ppg_df.index[np.where(ppg_df.index.to_series().diff().dt.total_seconds() > 1)[0]-1]
    t_disc_rec_ppg = pd.DataFrame({"disc": t_ppg_disc, "rec": t_ppg_rec})

    good_portions_ppg = pd.DataFrame(columns=["start", "end"])
    good_portions_ppg["start"] = t_disc_rec_ppg["rec"].iloc[:-1].reset_index(drop=True)
    good_portions_ppg["end"] = t_disc_rec_ppg["disc"].iloc[1:].reset_index(drop=True)
    first_disconnection_ppg = t_disc_rec_ppg["disc"].iloc[0]
    last_reconnection_ppg = t_disc_rec_ppg["rec"].iloc[-1]

    # ppg_list contains the PPG signal divided into the potions with signal (between disconnetions)
    ppg_list = []
    ppg_list.append(ppg_df.loc[:first_disconnection_ppg])
    for i, portion in good_portions_ppg.iterrows():
        ppg_list.append(ppg_df.loc[portion["start"]:portion["end"]])
    ppg_list.append(ppg_df.loc[last_reconnection_ppg:])

    # Loop across the good portions of acc and ppg, see if they overlap, and if so, resample acc to ppg
    acc_list_beliefppg = []
    ppg_list_beliefppg = []
    for i in range(len(acc_list)):
        for j in range(len(ppg_list)):
            if acc_list[i].index[0] < ppg_list[j].index[-1] and acc_list[i].index[-1] > ppg_list[j].index[0]:
                # resample acc to ppg
                acc_resampled_df= pd.DataFrame()
                for acc_axis in ["x", "y", "z"]:
                    t_acc_resampled, acc_resampled = apply_resample(
                        time = acc_list[i].index.astype(np.int64).to_numpy(), 
                        time_rs = ppg_list[j].index.astype(np.int64).to_numpy(),
                        data = acc_list[i][f"acc_{acc_axis}"].values
                        )
                    acc_resampled_df[f"acc_{acc_axis}"] = acc_resampled[0]
                acc_resampled_df.index = t_acc_resampled
                acc_list_beliefppg.append(acc_resampled_df)
                ppg_list_beliefppg.append(ppg_list[j])

    hr_all = []
    time_hr_all = []

    for i, (ppg, acc) in enumerate(zip(ppg_list_beliefppg, acc_list_beliefppg)): ##### Each portion has the same shape between ACC and PPG lesgooo
        ppg = ppg["ppg1"]

        if len(ppg) < 55*60*5:
            continue
        if len(acc) < 55*60*5:
            continue

        if ppg.shape[0] < acc.shape[0]:
            acc = acc.iloc[:ppg.shape[0]]
        elif ppg.shape[0] > acc.shape[0]:
            ppg = ppg.iloc[:acc.shape[0]]

        time = ppg.index
        hr, idxs = infer_hr(ppg=ppg.values.reshape(-1,1), ppg_freq=64, acc=acc.values, acc_freq=64)
        hr_all.append(hr)
        time_hr_all.append(time[idxs])
        
    # Concatenate all the portions
    hr_belief = np.concatenate(hr_all)
    t_hr_belief = pd.to_datetime(np.concatenate(time_hr_all))

    # Convert to pandas Series
    hr_belief_df = pd.Series(hr_belief, index=t_hr_belief)

    # Save to pickle
    hr_belief_df.to_pickle(save_data_path_silver + f"/{participant}/{visit}/{sensor}/hr_belief.pkl")

**************** 08623 ****************


2025-02-22 23:30:19.807675: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence




2025-02-22 23:30:25.678525: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


**************** 23483 ****************


2025-02-22 23:30:27.100358: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]




2025-02-22 23:32:32.305346: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]




2025-02-22 23:33:07.206287: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]




2025-02-22 23:33:33.135569: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]




2025-02-22 23:34:58.190718: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]




2025-02-22 23:37:35.865434: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


**************** 36920 ****************
**************** 58319 ****************
**************** 65381 ****************


2025-02-22 23:46:55.056953: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


**************** 68503 ****************
**************** 74913 ****************


2025-02-23 00:02:27.855402: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


**************** 78936 ****************
**************** 86693 ****************
**************** 97060 ****************


In [7]:
acc["acc_x"].shape

(1018437,)

In [None]:
from utils.resample_signal import apply_resample
from heart_rate.beliefppg.inference.inference import infer_hr

data_path = "/Users/augenpro/Documents/Age-IT/data/Bronze/" # path to the folder containing the subjects
participants = sorted([p for p in os.listdir(data_path) if not p.startswith(".")]) # list of the participants
visit = "T0 (baseline)" # T0 (baseline), T1 (follow-up @ 6 months), T2 (follow-up @ 12 months)

# participants = ["23483"]

sensor = "VeritySense"#, "GeneActivPolso", "GeneActivCaviglia", "RootiRx"]
save_data_path_silver = "/Users/augenpro/Documents/Age-IT/data/Silver"

threshold_bursts = 35/1000 # threshold for detecting bursts in mg (validated)
window_length = pd.Timedelta("5 min")  # window length
window_step = pd.Timedelta("1 min")  # window step

participants = ["97060"]

for participant in participants:
    acc_df = pd.DataFrame()
    ppg_df = pd.DataFrame()
    # print(sensor)
    path = os.path.join(data_path, participant, visit, sensor)
    files_in_path = [f for f in os.listdir(path) if not f.startswith(".")]
    if len(files_in_path) <= 2: # "_" and "AVRO"
        continue

    print(f"**************** {participant} ****************")
    acc_path = os.path.join(path, "AVRO/acc")
    ppg_path = os.path.join(path, "AVRO/ppg")
    for f in sorted(os.listdir(acc_path), key=extract_datetime):
        current_acc = read_avro_veritysense(os.path.join(acc_path, f))
        acc_df = pd.concat([acc_df, current_acc])

    for f in sorted(os.listdir(ppg_path), key=extract_datetime):
        current_ppg = read_avro_veritysense(os.path.join(ppg_path, f))
        ppg_df = pd.concat([ppg_df, current_ppg])

    # Check for disconnections
    t_acc_rec = acc_df.index[acc_df.index.to_series().diff().dt.total_seconds() > 0.5]
    t_acc_disc = acc_df.index[np.where(acc_df.index.to_series().diff().dt.total_seconds() > 0.5)[0]-1]
    t_disc_rec_acc = pd.DataFrame({"disc": t_acc_disc, "rec": t_acc_rec})

    good_portions_acc = pd.DataFrame(columns=["start", "end"])
    good_portions_acc["start"] = t_disc_rec_acc["rec"].iloc[:-1].reset_index(drop=True)
    good_portions_acc["end"] = t_disc_rec_acc["disc"].iloc[1:].reset_index(drop=True)
    first_disconnection_acc = t_disc_rec_acc["disc"].iloc[0]
    last_reconnection_acc = t_disc_rec_acc["rec"].iloc[-1]

    # acc_list contains the ACC signal divided into the potions with signal (between disconnetions)
    acc_list = []
    acc_list.append(acc_df.loc[:first_disconnection_acc])
    for i, portion in good_portions_acc.iterrows():
        acc_list.append(acc_df.loc[portion["start"]:portion["end"]])
    acc_list.append(acc_df.loc[last_reconnection_acc:])

    # PPG
    t_ppg_rec = ppg_df.index[ppg_df.index.to_series().diff().dt.total_seconds() > 1]
    t_ppg_disc = ppg_df.index[np.where(ppg_df.index.to_series().diff().dt.total_seconds() > 1)[0]-1]
    t_disc_rec_ppg = pd.DataFrame({"disc": t_ppg_disc, "rec": t_ppg_rec})

    good_portions_ppg = pd.DataFrame(columns=["start", "end"])
    good_portions_ppg["start"] = t_disc_rec_ppg["rec"].iloc[:-1].reset_index(drop=True)
    good_portions_ppg["end"] = t_disc_rec_ppg["disc"].iloc[1:].reset_index(drop=True)
    first_disconnection_ppg = t_disc_rec_ppg["disc"].iloc[0]
    last_reconnection_ppg = t_disc_rec_ppg["rec"].iloc[-1]

    # ppg_list contains the PPG signal divided into the potions with signal (between disconnetions)
    ppg_list = []
    ppg_list.append(ppg_df.loc[:first_disconnection_ppg])
    for i, portion in good_portions_ppg.iterrows():
        ppg_list.append(ppg_df.loc[portion["start"]:portion["end"]])
    ppg_list.append(ppg_df.loc[last_reconnection_ppg:])

    # Loop across the good portions of acc and ppg, see if they overlap, and if so, resample acc to ppg
    acc_list_beliefppg = []
    ppg_list_beliefppg = []
    for i in range(len(acc_list)):
        acc_list[i].loc[:, "acc_SVM"] = compute_acc_SMV(acc_list[i])
        for j in range(len(ppg_list)):
            if acc_list[i].index[0] < ppg_list[j].index[-1] and acc_list[i].index[-1] > ppg_list[j].index[0]:
                # resample acc to ppg
                acc_resampled_df = pd.DataFrame()
                t_acc_resampled, acc_resampled = apply_resample(
                    time = acc_list[i].index.astype(np.int64).to_numpy(), 
                    time_rs = ppg_list[j].index.astype(np.int64).to_numpy(),
                    data = acc_list[i]["acc_SVM"].values
                    )
                acc_resampled_df["acc_SVM"] = acc_resampled[0]
                acc_resampled_df.index = pd.to_datetime(t_acc_resampled)
                acc_list_beliefppg.append(acc_resampled_df)
                ppg_list_beliefppg.append(ppg_list[j])

    # Concatenate all the portions
    ppg_hrv = pd.DataFrame()
    acc_hrv = pd.DataFrame()

    for i, (ppg, acc) in enumerate(zip(ppg_list_beliefppg, acc_list_beliefppg)):
        ppg = nk.ppg_clean(ppg["ppg1"].values, sampling_rate = 55)
        ppg = pd.Series(ppg, index=ppg_list_beliefppg[i].index)
        acc = acc["acc_SVM"]

        if len(ppg) < 55*60*5: # 5 minutes
            continue
        if len(acc) < 55*60*5: # 5 minutes
            continue

        if ppg.shape[0] < acc.shape[0]:
            acc = acc.iloc[:ppg.shape[0]]
        elif ppg.shape[0] > acc.shape[0]:
            ppg = ppg.iloc[:acc.shape[0]]

        ppg_hrv = pd.concat([ppg_hrv, ppg])
        acc_hrv = pd.concat([acc_hrv, acc])

    HRV = []  # Reset HRV storage
    ibi_quiet_all = []

    ppg_hrv.loc[pd.Timestamp("2025-01-26 21:00:00") : pd.Timestamp("2025-01-26 22:00:00")] = np.nan
    acc_hrv.loc[pd.Timestamp("2025-01-26 21:00:00") : pd.Timestamp("2025-01-26 22:00:00")] = np.nan

    diary_sub = pd.read_csv(save_data_path_silver + f"/{participant}/{visit}/GeneActivPolso/SPT_HDCZA_and_diary.csv")
    nights = diary_sub[["sleep_onset_HDCZA", "wake_onset_HDCZA"]].values
    
    for i, (start_sleep, end_sleep) in enumerate(nights):  # for each night

        print(start_sleep, end_sleep)

        # acc_night = compute_acc_SMV(a.loc[start_sleep:end_sleep])
        acc_night = acc_hrv.loc[start_sleep:end_sleep]
        ppg_night = ppg_hrv.loc[start_sleep:end_sleep]

        # Because of the disconnections, it's possible that no or little data is available for the night
        if len(acc_night) < 55*60*5: # 5 minutes
            continue
        if len(ppg_night) < 55*60*5: # 5 minutes
            continue

        # Detect wrist accelerometer bursts
        bursts = detect_bursts(acc_night, sampling_rate=55, alfa=threshold_bursts)

        # Extract quiet periods (no movement of the wrist)
        quiet_periods = pd.DataFrame()
        quiet_periods["start"] = bursts["end"].iloc[:-1].reset_index(drop=True)
        quiet_periods["end"] = bursts["start"].iloc[1:].reset_index(drop=True)

        for _, quiet_period in quiet_periods.iterrows():  # for each quiet period

            duration_quiet_period = quiet_period["end"] - quiet_period["start"]

            if duration_quiet_period < window_length:  # If the whole period is shorter than 5 min, skip it
                continue
                
            acc_quiet = acc_night.loc[quiet_period["start"]:quiet_period["end"]]
            ppg_quiet = ppg_night.loc[quiet_period["start"]:quiet_period["end"]]

            # Extract systolic peaks from the quiet PPG signal
            feet, peaks = MSPTDfast(ppg_quiet.values.flatten(), sampling_rate = 55)
            t_peaks = ppg_quiet.index.to_series().values[peaks]
            t_feet = ppg_quiet.index.to_series().values[feet]
            ibi = np.diff(t_peaks).astype('timedelta64[ns]').astype('float64') / 1e9  # seconds
            ibi = np.insert(ibi, 0, np.mean(ibi[1:10]), axis=0)  # Set first value as mean of next 10
            ibi = pd.Series(ibi, index=t_peaks)

            # Kubios artifact correction
            artifacts, env_diff_corrected = signal_fixpeaks(ibi.values, 64, iterative=False)
            artifacts_all = np.concatenate((artifacts["ectopic"], artifacts["missed"], artifacts["extra"], artifacts["longshort"]))
            ibi[ibi.index[artifacts_all.astype(int)]] = np.nan
            ibi_clean = ibi.interpolate(method="linear")

            # Generate overlapping windows of 5 minutes with 30-second overlap
            current_start = quiet_period["start"]
            
            # For each window
            while current_start + window_length <= quiet_period["end"]:

                current_end = current_start + window_length

                ibi_window = ibi_clean.loc[current_start:current_end]

                # HRV Features
                ppi = ibi_window.values * 1000  # Convert to ms
                diff_ppi = np.diff(ppi)

                mean_nn = np.mean(ppi)  # Mean NN
                rmssd = np.sqrt(np.mean(diff_ppi**2))  # RMSSD
                sdnn = np.std(ppi, ddof=1)  # SDNN
                PIP = compute_HRF(ppi)  # Custom HRF computation

                HRV.append({
                    "night": i+1,
                    "time": current_start + window_length / 2,
                    "mean_nn": mean_nn,
                    "rmssd": rmssd,
                    "sdnn": sdnn,
                    "PIP": PIP
                })

                current_start += window_step  # Move to next overlapping window

            ibi_quiet_all.append(ibi_clean)

    if len(ibi_quiet_all) == 0: # No night data available
        continue

    HRV_df = pd.DataFrame(HRV)
    ibi_quiet_df = pd.concat(ibi_quiet_all)

    # Save to CSV
    HRV_df.to_csv(save_data_path_silver + f"/{participant}/{visit}/{sensor}/HRV_night.csv")
    ibi_quiet_df.to_csv(save_data_path_silver + f"/{participant}/{visit}/{sensor}/IBI_night.csv")

**************** 86693 ****************


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  acc_list[i].loc[:, "acc_SVM"] = compute_acc_SMV(acc_list[i])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  acc_list[i].loc[:, "acc_SVM"] = compute_acc_SMV(acc_list[i])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  acc_list[i].loc[:, "acc_SVM"] = compute_acc_SMV(acc_list[i])
A value is trying to b

2025-01-24 00:35:50 2025-01-24 07:00:35
2025-01-24 21:06:35 2025-01-25 06:30:05
2025-01-25 13:47:20 2025-01-26 19:14:05
2025-01-26 20:32:55 2025-01-27 06:06:00
2025-01-27 20:43:40 2025-01-28 05:23:10
2025-01-29 01:00:25 2025-01-29 07:24:45
2025-01-29 20:49:50 2025-01-30 05:22:30


In [18]:
plt.figure(figsize=(19, 11))
plt.subplot(2, 1, 1)
plt.plot(ppg_hrv)
plt.title("PPG")
plt.xlabel("Time")
plt.ylabel("PPG")
plt.subplot(2, 1, 2, sharex = plt.subplot(2, 1, 1))
plt.plot(acc_hrv)
plt.title("ACC")
plt.xlabel("Time")
plt.ylabel("ACC")

Text(0, 0.5, 'ACC')

In [9]:
plt.figure(figsize=(15, 5))
plt.plot(ibi_quiet_df)

[<matplotlib.lines.Line2D at 0x14cf33f20>]

2025-02-22 19:58:45.211 python[38109:7908261] +[IMKClient subclass]: chose IMKClient_Modern


In [8]:
# plt.figure(figsize=(15, 5))
# plt.plot(ibi_quiet_df)
participant = "74913"
ibi_quiet_df = pd.read_csv(save_data_path_silver + f"/{participant}/{visit}/{sensor}/IBI_night.csv", index_col=0)
ibi_quiet_df.index = pd.to_datetime(ibi_quiet_df.index)
# plt.plot(ibi_quiet_df, '-o', linewidth=1)

In [None]:
plt.close()

In [None]:
# Load and plot HRV features for each participant
data_path = "/Users/augenpro/Documents/Age-IT/data/Silver/" # path to the folder containing the subjects
participants = sorted([p for p in os.listdir(data_path) if
                        not p.startswith(".")])  # list of the participants
visit = "T0 (baseline)"  # T0 (baseline), T1 (follow-up @ 6 months), T2 (follow-up @ 12 months)

sensor = "VeritySense"  # , "GeneActivPolso", "GeneActivCaviglia", "RootiRx"
save_data_path_silver = "/Users/augenpro/Documents/Age-IT/data/Silver"

for participant in participants:

    path = save_data_path_silver + f"/{participant}/{visit}/{sensor}"

    if "HRV_night.csv" not in os.listdir(path):
        continue
    print(f"**************** {participant} ****************")
    HRV_df = pd.read_csv(save_data_path_silver + f"/{participant}/{visit}/{sensor}/HRV_night.csv")
    HRV_df.index = pd.to_datetime(HRV_df["time"])
    ibi_quiet_df = pd.read_csv(save_data_path_silver + f"/{participant}/{visit}/{sensor}/IBI_night.csv", index_col=0)
    ibi_quiet_df.index = pd.to_datetime(ibi_quiet_df.index)
    plt.figure(figsize=(15, 5))
    plt.subplot(2, 1, 1)
    plt.plot(ibi_quiet_df, linewidth=1)
    plt.title("Participant: " + participant)
    plt.xlabel("Time")
    plt.ylabel("NN (ms)")
    plt.subplot(2, 1, 2, sharex = plt.subplot(2, 1, 1))
    plt.plot(HRV_df["rmssd"], '-o', linewidth=1)
    plt.title("Participant: " + participant)
    plt.xlabel("Time")
    plt.ylabel("RMSSD (ms)")

**************** 23483 ****************
**************** 36920 ****************
**************** 65381 ****************
**************** 68503 ****************
**************** 74913 ****************
**************** 78936 ****************
**************** 86693 ****************
**************** 97060 ****************


In [8]:
ibi_quiet_df

Unnamed: 0,0
2025-01-21 22:48:59.495808512,0.931096
2025-01-21 22:49:00.390408704,0.894600
2025-01-21 22:49:01.284975616,0.894567
2025-01-21 22:49:02.197825536,0.912850
2025-01-21 22:49:03.147192320,0.949367
...,...
2025-01-28 07:15:55.927106560,0.930632
2025-01-28 07:15:56.857766912,0.912382
2025-01-28 07:15:57.751900160,0.894133
2025-01-28 07:15:58.664316416,0.912416


# Save results to gold

In [14]:
data_path = "/Users/augenpro/Documents/Age-IT/data/Bronze/" # path to the folder containing the subjects
participants = sorted([p for p in os.listdir(data_path) if not p.startswith(".")]) # list of the participants
visit = "T0 (baseline)" # T0 (baseline), T1 (follow-up @ 6 months), T2 (follow-up @ 12 months)

# participants = ["23483"]

sensor = "VeritySense"#, "GeneActivPolso", "GeneActivCaviglia", "RootiRx"]
save_data_path_silver = "/Users/augenpro/Documents/Age-IT/data/Silver"

hr_day_summary = pd.DataFrame()
hr_night_summary = pd.DataFrame()

for participant in participants:
    acc_df = pd.DataFrame()
    ppg_df = pd.DataFrame()
    # print(sensor)
    
    path_belief = os.path.join(save_data_path_silver, participant, visit, sensor)
    if len(os.listdir(path_belief)) == 0:
        continue

    hr_belief = pd.read_pickle(save_data_path_silver + f"/{participant}/{visit}/{sensor}/hr_belief.pkl")

    # Load sleep diary
    diary_sub = pd.read_csv(save_data_path_silver + f"/{participant}/{visit}/GeneActivPolso/SPT_HDCZA_and_diary.csv")
    nights = diary_sub[["sleep_onset_HDCZA", "wake_onset_HDCZA"]].values

    # Extract mean HR during the day, and mean HR during the night
    hr_day = []
    hr_night = []

    hr_parameters_night = {
        "minutes_of_recording": [],
        "night": [],
        "calendar_date" : [],
        "mean_HR": [],
        "std_HR": [],
    }

    hr_parameters_day = {
        "minutes_of_recording": [],
        "day": [],
        "calendar_date" : [],
        "mean_HR": [],
        "std_HR": [],
    }

    # 1st day: from beginning of the recording to the first sleep onset

    hr_parameters_day["minutes_of_recording"].append((len(hr_belief.loc[:nights[0][0]]) * 2) / 60) # *2 because beliefPPG outputs every 2 seconds
    hr_parameters_day["day"].append(1)
    hr_parameters_day["calendar_date"].append(hr_belief.index[0].date())
    hr_parameters_day["mean_HR"].append(hr_belief.loc[:nights[0][0]].mean())
    hr_parameters_day["std_HR"].append(hr_belief.loc[:nights[0][0]].std())

    for i, (start_sleep, end_sleep) in enumerate(nights):  # for each night

        start_sleep = pd.to_datetime(start_sleep)
        end_sleep = pd.to_datetime(end_sleep)

        hr_parameters_night["minutes_of_recording"].append((len(hr_belief.loc[start_sleep:end_sleep]) * 2) / 60) # *2 because beliefPPG outputs every 2 seconds
        hr_parameters_night["night"].append(i+1)
        hr_parameters_night["calendar_date"].append(start_sleep.date())
        hr_parameters_night["mean_HR"].append(hr_belief.loc[start_sleep:end_sleep].mean())
        hr_parameters_night["std_HR"].append(hr_belief.loc[start_sleep:end_sleep].std())

        if i < len(nights) - 1:
            hr_parameters_day["minutes_of_recording"].append((len(hr_belief.loc[end_sleep:nights[i+1][0]]) * 2) / 60) # *2 because beliefPPG outputs every 2 seconds
            hr_parameters_day["day"].append(i+1)
            hr_parameters_day["calendar_date"].append(end_sleep.date())
            hr_parameters_day["mean_HR"].append(hr_belief.loc[end_sleep:nights[i+1][0]].mean())
            hr_parameters_day["std_HR"].append(hr_belief.loc[end_sleep:nights[i+1][0]].std())
    
    # Last day: from the last wake onset to the end of the recording
    hr_parameters_day["minutes_of_recording"].append((len(hr_belief.loc[end_sleep:]) * 2) / 60) # *2 because beliefPPG outputs every 2 seconds
    hr_parameters_day["day"].append(i+2)
    hr_parameters_day["calendar_date"].append(end_sleep.date())
    hr_parameters_day["mean_HR"].append(hr_belief.loc[end_sleep:].mean())
    hr_parameters_day["std_HR"].append(hr_belief.loc[end_sleep:].std())
    
    hr_parameters_day = pd.DataFrame(hr_parameters_day)
    hr_parameters_night = pd.DataFrame(hr_parameters_night)

    hr_parameters_day["participant"] = participant
    hr_parameters_night["participant"] = participant

    hr_day_summary = pd.concat([hr_day_summary, hr_parameters_day]).round(2)
    hr_night_summary = pd.concat([hr_night_summary, hr_parameters_night]).round(2)

In [15]:
cols = hr_day_summary.columns.tolist()
cols = cols[-1:] + cols[:-1]
hr_day_summary = hr_day_summary[cols].round(2)
save_path = "/Users/augenpro/Documents/Age-IT/risultati_preliminari/"

hr_day_summary.to_csv(save_path + "hr_day_summary_DAILY.csv", index = False)

cols = hr_night_summary.columns.tolist()
cols = cols[-1:] + cols[:-1]
hr_night_summary = hr_night_summary[cols].round(2)
save_path = "/Users/augenpro/Documents/Age-IT/risultati_preliminari/"

hr_night_summary.to_csv(save_path + "hr_night_summary_DAILY.csv", index = False)

In [28]:
len(os.listdir(path_belief))

1

In [16]:
hr_day_summary_total = hr_day_summary.drop(columns=["calendar_date", "day", "minutes_of_recording", "std_HR"]).groupby("participant").mean().round(2)
# Make "n_nights" the first column
cols = hr_day_summary_total.columns.tolist()
cols = cols[-1:] + cols[:-1]
hr_day_summary_total = hr_day_summary_total[cols]

hr_day_summary_total.index = hr_day_summary_total.index.astype(int)

In [17]:
hrv_night_summary_total = hr_night_summary.drop(columns=["calendar_date", "night", "minutes_of_recording", "std_HR"]).groupby("participant").mean().round(2)
# Make "n_nights" the first column
cols = hrv_night_summary_total.columns.tolist()
cols = cols[-1:] + cols[:-1]
hrv_night_summary_total = hrv_night_summary_total[cols]

hrv_night_summary_total.index = hrv_night_summary_total.index.astype(int)

In [18]:
save_path = "/Users/augenpro/Documents/Age-IT/risultati_preliminari/"
total = pd.read_csv(save_path + "total_summary_new.csv", index_col=0, sep = ";")
total

Unnamed: 0_level_0,n_nights,SPT_duration,TST,WASO,N_awakenings,SE,COSINOR_mesor,COSINOR_amplitude,COSINOR_acrophase,IS,IV,inactivity,light,moderate,vigorous,longest_bout_time,longest_bout_steps,bouts_over_3min,total_steps
participant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
8623,7.0,10.41,7.8,2.19,21.0,0.76,2.491,1.21,3.764,0.71,526.0,513.12,129.38,71.25,31.0,1566.5,2005.62,4.38,6945.38
14219,7.0,9.06,6.3,2.59,17.0,0.7,2.429,665.0,3.783,707.0,859.0,614.0,235.29,8.29,0.0,271.38,230.38,2.62,3188.5
23483,7.0,8.98,7.06,1.55,17.57,0.79,2.544,0.88,3.799,616.0,711.0,614.0,191.29,46.57,0.0,184.0,172.12,1.12,2363.0
36644,7.0,8.18,6.88,1.11,13.0,0.85,2.65,1.281,3.58,793.0,378.0,573.0,317.67,87.17,0.0,564.25,571.75,1.38,4110.5
36920,6.0,6.9,6.48,0.39,8.17,0.94,2.59,912.0,3.747,807.0,459.0,650.8,302.0,61.4,0.0,129.29,116.86,0.29,2343.0
58319,1.0,6.71,4.9,1.75,15.0,0.73,,,,,,,,,,175.75,165.25,0.75,2477.38
59794,6.0,7.76,5.8,1.8,11.0,0.71,2.349,699.0,4.029,467.0,996.0,670.5,190.0,10.67,0.0,88.0,76.71,0.14,1426.14
65381,7.0,8.88,6.7,1.67,17.0,0.76,2.441,888.0,4.111,625.0,883.0,635.67,259.0,27.83,0.0,136.0,126.71,0.29,2051.0
68503,6.0,7.06,6.1,0.84,15.17,0.86,2.883,1.133,4.09,696.0,699.0,457.83,292.83,153.0,0.0,102.62,90.12,0.25,2073.25
73496,7.0,6.45,4.4,1.92,7.43,0.7,2.813,1.041,4.061,784.0,457.0,538.71,360.71,84.0,0.0,122.75,101.75,0.12,2764.5


In [24]:
# Concatenate the HR and HRV summaries
total = pd.concat([total, hr_day_summary_total, hrv_night_summary_total], axis=1)
total

Unnamed: 0_level_0,n_nights,SPT_duration,TST,WASO,N_awakenings,SE,COSINOR_mesor,COSINOR_amplitude,COSINOR_acrophase,IS,...,inactivity,light,moderate,vigorous,longest_bout_time,longest_bout_steps,bouts_over_3min,total_steps,mean_HR,mean_HR
participant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
8623,7.0,10.41,7.8,2.19,21.0,0.76,2.491,1.21,3.764,0.71,...,513.12,129.38,71.25,31.0,1566.5,2005.62,4.38,6945.38,107.64,
14219,7.0,9.06,6.3,2.59,17.0,0.7,2.429,665.0,3.783,707.0,...,614.0,235.29,8.29,0.0,271.38,230.38,2.62,3188.5,,
23483,7.0,8.98,7.06,1.55,17.57,0.79,2.544,0.88,3.799,616.0,...,614.0,191.29,46.57,0.0,184.0,172.12,1.12,2363.0,88.98,87.35
36644,7.0,8.18,6.88,1.11,13.0,0.85,2.65,1.281,3.58,793.0,...,573.0,317.67,87.17,0.0,564.25,571.75,1.38,4110.5,,
36920,6.0,6.9,6.48,0.39,8.17,0.94,2.59,912.0,3.747,807.0,...,650.8,302.0,61.4,0.0,129.29,116.86,0.29,2343.0,101.81,77.59
58319,1.0,6.71,4.9,1.75,15.0,0.73,,,,,...,,,,,175.75,165.25,0.75,2477.38,87.69,
59794,6.0,7.76,5.8,1.8,11.0,0.71,2.349,699.0,4.029,467.0,...,670.5,190.0,10.67,0.0,88.0,76.71,0.14,1426.14,,
65381,7.0,8.88,6.7,1.67,17.0,0.76,2.441,888.0,4.111,625.0,...,635.67,259.0,27.83,0.0,136.0,126.71,0.29,2051.0,95.64,83.38
68503,6.0,7.06,6.1,0.84,15.17,0.86,2.883,1.133,4.09,696.0,...,457.83,292.83,153.0,0.0,102.62,90.12,0.25,2073.25,88.21,78.89
73496,7.0,6.45,4.4,1.92,7.43,0.7,2.813,1.041,4.061,784.0,...,538.71,360.71,84.0,0.0,122.75,101.75,0.12,2764.5,,


In [None]:
# HRV night

# Load and plot HRV features for each participant
data_path = "/Users/augenpro/Documents/Age-IT/data/Silver/" # path to the folder containing the subjects
participants = sorted([p for p in os.listdir(data_path) if
                        not p.startswith(".")])  # list of the participants
visit = "T0 (baseline)"  # T0 (baseline), T1 (follow-up @ 6 months), T2 (follow-up @ 12 months)

In [20]:
hr_parameters_day["minutes_of_recording"][0] / 60

4.432777777777777

In [3]:
len(hr_belief.loc[:nights[0][0]]) /2 /60

66.49166666666666

In [4]:
plt.figure(figsize=(15, 5))
plt.plot(hr_belief.loc[:nights[0][0]])

[<matplotlib.lines.Line2D at 0x1042d9af0>]

2025-02-23 13:43:21.079 python[45093:8203385] +[IMKClient subclass]: chose IMKClient_Modern


In [None]:
plt.plot(hr_belief.loc[:nights[0][0]].index.dt.total_seconds())

[<matplotlib.lines.Line2D at 0x12c0e0050>]

In [15]:
hr_belief.loc[:nights[0][0]].index.total_seconds()

AttributeError: 'DatetimeIndex' object has no attribute 'total_seconds'

In [10]:
data_path = "/Users/augenpro/Documents/Age-IT/data/Bronze/" # path to the folder containing the subjects
participants = sorted([p for p in os.listdir(data_path) if not p.startswith(".")]) # list of the participants
visit = "T0 (baseline)" # T0 (baseline), T1 (follow-up @ 6 months), T2 (follow-up @ 12 months)

# participants = ["23483"]

sensor = "VeritySense"#, "GeneActivPolso", "GeneActivCaviglia", "RootiRx"]
save_data_path_silver = "/Users/augenpro/Documents/Age-IT/data/Silver"

HRV_summary = pd.DataFrame()

for participant in participants:
    acc_df = pd.DataFrame()
    ppg_df = pd.DataFrame()
    # print(sensor)
    
    path_belief = os.path.join(save_data_path_silver, participant, visit, sensor)
    if "HRV_night.csv" not in os.listdir(path_belief):
        continue

    HRV_night = pd.read_csv(save_data_path_silver + f"/{participant}/{visit}/{sensor}/HRV_night.csv", index_col=0)

    # Drop all rows where RMSSD is lower than 10 or higher than 200
    HRV_night = HRV_night[(HRV_night["rmssd"] > 10) & (HRV_night["rmssd"] < 200)]

    HRV_parameters = HRV_night.drop(columns = ["time"]).groupby("night").mean().round(2)
    HRV_parameters["minutes_of_recording"] = HRV_night.groupby("night").size() # Number of windows (since the window step is 1 min)

    HRV_parameters["participant"] = participant

    HRV_summary = pd.concat([HRV_summary, HRV_parameters]).round(2)

HRV_summary["nights"] = HRV_summary.index
HRV_summary = HRV_summary.reset_index(drop=True)
HRV_summary["participant2"] = HRV_summary["participant"].astype(int)
HRV_summary.drop(columns = ["participant"], inplace = True)
HRV_summary.rename(columns = {"participant2": "participant"}, inplace = True)
HRV_summary

Unnamed: 0,mean_nn,rmssd,sdnn,PIP,minutes_of_recording,nights,participant
0,857.17,32.6,35.22,0.61,289,1,23483
1,819.8,25.93,29.2,0.61,224,3,23483
2,785.0,19.78,25.87,0.62,248,4,23483
3,760.34,20.7,24.24,0.63,204,5,23483
4,882.65,48.51,41.8,0.62,175,6,23483
5,830.58,43.42,37.14,0.64,212,7,23483
6,920.26,50.92,41.02,0.6,286,1,36920
7,906.87,33.77,37.99,0.57,241,1,65381
8,710.47,56.61,69.0,0.59,239,2,65381
9,886.9,35.94,38.12,0.58,259,4,65381


In [11]:
cols = HRV_summary.columns.tolist()
cols = cols[-1:] + cols[:-1]
HRV_summary = HRV_summary[cols].round(2)
save_path = "/Users/augenpro/Documents/Age-IT/risultati_preliminari/"

HRV_summary.to_csv(save_path + "HRV_summary_DAILY.csv", index = False)

In [13]:
HRV_summary_total = HRV_summary.drop(columns=["nights", "minutes_of_recording"]).groupby("participant").mean().round(2)
# Make "n_nights" the first column

HRV_summary_total.index = HRV_summary_total.index.astype(int)

HRV_summary_total

Unnamed: 0_level_0,mean_nn,rmssd,sdnn,PIP
participant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
23483,822.59,31.82,32.24,0.62
36920,920.26,50.92,41.02,0.6
65381,851.52,39.68,42.13,0.6
68503,910.53,26.52,30.76,0.62
74913,943.42,23.58,27.33,0.59
78936,932.47,29.73,55.37,0.53
86693,664.88,75.96,84.62,0.61
97060,1020.9,75.04,106.16,0.53


In [19]:
total = pd.read_csv(save_path + "total_summary_new.csv", index_col=0, sep = ";")


# Concatenate the HR and HRV summaries
total = pd.concat([total, hr_day_summary_total, hrv_night_summary_total, HRV_summary_total], axis=1)

In [21]:
total.to_csv(save_path + "TOTAL_SUMMARY.csv", sep = ";")

In [22]:
save_path

'/Users/augenpro/Documents/Age-IT/risultati_preliminari/'