# AGE-it - Exploratory Data Analysis

- Summary of the recordings - how many hours/days for each sensor?
- How many hour/das do we consider to be enough for the analysis?

### Import libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import os
import glob

%matplotlib inline
#%matplotlib qt
mpl.rcParams['lines.linewidth'] = 0.91
plt.style.use('seaborn-v0_8-whitegrid')
%matplotlib qt

from avro.datafile import DataFileReader
from avro.io import DatumReader

from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, HoverTool, Slider, Select
from bokeh.layouts import gridplot
from bokeh.models import Range1d
from bokeh.io import export_png
from bokeh.models import DatetimeTickFormatter

## GENEActiv
Files are already converted into .bin with the bin2parquet notebook

In [5]:
data_path = "/Users/augenpro/Documents/Age-IT/data/Bronze/" # path to the folder containing the subjects
participants = sorted([p for p in os.listdir(data_path) if not p.startswith(".")]) # list of the participants
visit = "T0 (baseline)" # T0 (baseline), T1 (follow-up @ 6 months), T2 (follow-up @ 12 months)

# participants = participants[5:6] # select the first participant
participants = ["08667", "20603", "36765"]

sensors = ["GeneActivPolso", "GeneActivCaviglia"]#, "RootiRx"]

geneactiv = {}

for participant in participants:
    print("***** " + participant + " *****")
    for sensor in sensors:
        # print(sensor)
        # Load the data
        path = os.path.join(data_path, participant, visit, sensor)
        files = os.listdir(os.path.join(data_path, participant, visit, sensor))
        # if there are already parquet files, skip
        for f in files:
            if f.endswith(".parquet"):
                # print(f)
                acc_df = pd.read_parquet(os.path.join(path, f))
                geneactiv[sensor] = acc_df
                if len(acc_df) == 0:
                    print(f"{sensor}: is empty")
                else:
                    print(f"{sensor}:  {acc_df.index[-1] - acc_df.index[0]}".split(".")[0])

    print("")
# # Plot the data
# p = figure(plot_width=800, plot_height=400, x_axis_type="datetime", title=f"{participant} - {sensor} - {visit}")
# p.line(data["timestamp"], data["x"], line_width=2, legend_label="x", color="blue")
# p.line(data["timestamp"], data["y"], line_width=2, legend_label="y", color="green")
# p.line(data["timestamp"], data["z"], line_width=2, legend_label="z", color="red")
# p.xaxis.formatter = DatetimeTickFormatter(days="%d/%m %H:%M", hours="%H:%M")
# p.legend.location = "top_left"
# p.legend.click_policy="hide"
# show(p)
# export_png(p, filename=f"{participant}_{sensor}_{visit}.png")
# print(f"{participant}_{sensor}_{visit}.png")
# print("")

***** 08667 *****
GeneActivPolso:  5 days 18:02:50
GeneActivCaviglia:  5 days 23:59:02

***** 20603 *****
GeneActivPolso:  6 days 21:00:17
GeneActivCaviglia:  6 days 20:59:56

***** 36765 *****
GeneActivPolso:  6 days 23:19:53
GeneActivCaviglia:  6 days 23:20:35



In [6]:
# Recording duration
for sensor, data in geneactiv.items():
    print(f"{sensor}: {data.index[-1] - data.index[0]}".split(".")[0])

GeneActivPolso: 6 days 23:19:53
GeneActivCaviglia: 6 days 23:20:35


In [10]:
from utils.compute_acc_metrics import compute_acc_SMV

# for sensor, data in geneactiv.items():
#     data = compute_acc_SMV(data)
#     geneactiv[sensor] = data

plt.figure(figsize = (19,11))
plt.subplot(2, 1, 1)
plt.plot(geneactiv["GeneActivPolso"].iloc[:1000000], label = "GeneActivPolso")
plt.plot(geneactiv["GeneActivCaviglia"].iloc[:1000000], label = "GeneActivCaviglia")
plt.title("Recording Start", fontsize = 21)
plt.legend()
plt.subplot(2, 1, 2)
plt.plot(geneactiv["GeneActivPolso"].iloc[-1000000:], label = "GeneActivPolso")
plt.plot(geneactiv["GeneActivCaviglia"].iloc[-1000000:], label = "GeneActivCaviglia")
plt.title("Recording End", fontsize = 21)
plt.legend()

<matplotlib.legend.Legend at 0x1723c48c0>

## Verity Sense

In [2]:
from data_io.veritysense.convert_polar import process_polar

In [3]:
import fastavro

def read_avro_veritysense(file_path, offset_vs = 946684800000000000+ 3600 * 1e9):
    """
    Reads an Avro file and returns the data as a pandas DataFrame.
    """
    with open(file_path, "rb") as avro_file:
        # Use fastavro.reader to read the Avro file
        reader = fastavro.reader(avro_file)
        # Convert the records to a list
        records = [record for record in reader]
       # to dataframe
        df = pd.DataFrame(records)
        df.index = pd.to_datetime(df["timestamp"] + offset_vs, unit="ns") 
        df.drop("timestamp", axis=1, inplace=True)
    return df / 1000

In [4]:
import re
from datetime import datetime

# Function to extract full datetime (including time) for sorting
def extract_datetime(filename):
    match = re.search(r'(\w{3}) (\w{3}) (\d{2}) (\d{2})-(\d{2})-(\d{2}) .* (\d{4})', filename)
    if match:
        weekday, month, day, hour, minute, second, year = match.groups()
        date_str = f"{day} {month} {year} {hour}:{minute}:{second}"
        date_obj = datetime.strptime(date_str, "%d %b %Y %H:%M:%S")  # Convert to datetime object
        return date_obj
    return datetime.max  # Default to a max value if parsing fails

In [5]:
data_path = "/Users/augenpro/Documents/Age-IT/data/Bronze/" # path to the folder containing the subjects
participants = sorted([p for p in os.listdir(data_path) if not p.startswith(".")]) # list of the participants
visit = "T0 (baseline)" # T0 (baseline), T1 (follow-up @ 6 months), T2 (follow-up @ 12 months)

# participants = participants[0:1] # select the first participant

participants = ["97060",]# "20603", "36765"]

participants = ["36920"]

sensor = "VeritySense"#, "GeneActivPolso", "GeneActivCaviglia", "RootiRx"]

for participant in participants:
    acc_df = pd.DataFrame()
    ppg_df = pd.DataFrame()
    # print(sensor)
    path = os.path.join(data_path, participant, visit, sensor)
    files_in_path = [f for f in os.listdir(path) if not f.startswith(".")]
    if len(files_in_path) <= 2: # "_" and "AVRO"
        continue

    print(f"**************** {participant} ****************")
    acc_path = os.path.join(path, "AVRO/acc")
    ppg_path = os.path.join(path, "AVRO/ppg")
    for f in sorted(os.listdir(acc_path), key=extract_datetime):
        current_acc = read_avro_veritysense(os.path.join(acc_path, f))
        acc_df = pd.concat([acc_df, current_acc])

    for f in sorted(os.listdir(ppg_path), key=extract_datetime):
        current_ppg = read_avro_veritysense(os.path.join(ppg_path, f))
        ppg_df = pd.concat([ppg_df, current_ppg])

    t_acc_rec = acc_df.index[acc_df.index.to_series().diff().dt.total_seconds() > 0.5]
    t_acc_disc = acc_df.index[np.where(acc_df.index.to_series().diff().dt.total_seconds() > 0.5)[0]-1]
    t_disc_rec_acc = pd.DataFrame({"disc": t_acc_disc, "rec": t_acc_rec})
    total_duration_acc = acc_df.index[-1] - acc_df.index[0]
    disconnetions_duration_acc = t_disc_rec_acc["rec"] - t_disc_rec_acc["disc"]
    effective_duration_acc = total_duration_acc - disconnetions_duration_acc.sum()
    if len(t_acc_disc) > 0:
        print("\n ***** Disconnections in ACC *****")
        for i in range(len(t_acc_disc)):
            print(f"Disconnection {i+1}:")
            print(f"Start: {t_acc_disc[i].strftime('%Y-%m-%d %H:%M:%S')}")
            print(f"End: {t_acc_rec[i].strftime('%Y-%m-%d %H:%M:%S')}")
            print(f"Duration: {str(t_acc_rec[i] - t_acc_disc[i]).split(' ')[2][3:5]} min and {str(t_acc_rec[i] - t_acc_disc[i]).split(' ')[2][6:8]} s")
        print("Number of disconnections: " + str(len(t_acc_disc)))
        print("Total duration of disconnections: " + str(np.sum([t_acc_rec[i] - t_acc_disc[i] for i in range(len(t_acc_disc))])).split(' ')[2][:8] + " (hours, minutes and seconds)")
        print(f"ACC effective duration: {effective_duration_acc}".split(".")[0])
    # Fill disconnection portions with NaNs and linearly interpolate
    for start, end in zip(t_acc_disc, t_acc_rec):
        acc_df.loc[start:end] = np.nan
    acc_df = acc_df.interpolate(method='time')

    t_ppg_rec = ppg_df.index[ppg_df.index.to_series().diff().dt.total_seconds() > 1]
    t_ppg_disc = ppg_df.index[np.where(ppg_df.index.to_series().diff().dt.total_seconds() > 1)[0]-1]
    t_disc_rec_ppg = pd.DataFrame({"disc": t_ppg_disc, "rec": t_ppg_rec})
    total_duration_ppg = ppg_df.index[-1] - ppg_df.index[0]
    disconnetions_duration_ppg = t_disc_rec_ppg["rec"] - t_disc_rec_ppg["disc"]
    effective_duration_ppg = total_duration_ppg - disconnetions_duration_ppg.sum()
    if len(t_ppg_disc) > 0:
        print("\n ***** Disconnections in PPG *****")
        for i in range(len(t_ppg_disc)):
            print(f"Disconnection {i+1}:")
            print(f"Start: {t_ppg_disc[i].strftime('%Y-%m-%d %H:%M:%S')}")
            print(f"End: {t_ppg_rec[i].strftime('%Y-%m-%d %H:%M:%S')}")
            print(f"Duration: {str(t_ppg_rec[i] - t_ppg_disc[i]).split(' ')[2][3:5]} min and {str(t_ppg_rec[i] - t_ppg_disc[i]).split(' ')[2][6:8]} s")
        print("Number of disconnections: " + str(len(t_ppg_disc)))
        print("Total duration of disconnections: " + str(np.sum([t_ppg_rec[i] - t_ppg_disc[i] for i in range(len(t_ppg_disc))])).split(' ')[2][:8] + " (hours, minutes and seconds)")
        print(f"PPG effective duration: {effective_duration_ppg}".split(".")[0])
        print("")
    # Fill disconnection portions with NaNs and linearly interpolate
    for start, end in zip(t_ppg_disc, t_ppg_rec):
        ppg_df.loc[start:end] = np.nan
    ppg_df = ppg_df.interpolate(method='time')

**************** 36920 ****************

 ***** Disconnections in ACC *****
Disconnection 1:
Start: 2025-01-30 10:25:24
End: 2025-01-30 10:25:31
Duration: 00 min and 07 s
Disconnection 2:
Start: 2025-01-30 12:21:21
End: 2025-01-30 12:21:51
Duration: 00 min and 29 s
Disconnection 3:
Start: 2025-01-30 12:39:58
End: 2025-01-30 12:41:39
Duration: 01 min and 41 s
Disconnection 4:
Start: 2025-01-30 15:02:10
End: 2025-01-30 15:16:14
Duration: 14 min and 03 s
Disconnection 5:
Start: 2025-01-30 15:16:28
End: 2025-01-30 15:19:03
Duration: 02 min and 35 s
Disconnection 6:
Start: 2025-01-30 16:19:34
End: 2025-01-30 16:19:49
Duration: 00 min and 15 s
Disconnection 7:
Start: 2025-01-30 16:33:11
End: 2025-01-30 16:33:42
Duration: 00 min and 31 s
Disconnection 8:
Start: 2025-01-30 17:21:11
End: 2025-01-30 17:23:52
Duration: 02 min and 41 s
Disconnection 9:
Start: 2025-01-30 17:46:52
End: 2025-01-30 17:56:28
Duration: 09 min and 35 s
Disconnection 10:
Start: 2025-01-30 17:58:54
End: 2025-01-30 17:59:22

##### Soggetto 97060 ------- weird timestamps starting at 1970

In [8]:
total_dur = acc_df.index[-1] - acc_df.index[0]
disconnetions_dur = t_disc_rec_acc["rec"] - t_disc_rec_acc["disc"]
effective_dur = total_dur - disconnetions_dur.sum()
total_dur, disconnetions_dur.sum(), effective_dur

(Timedelta('6 days 21:33:21.926823424'),
 Timedelta('2 days 14:54:02.761108992'),
 Timedelta('4 days 06:39:19.165714432'))

In [None]:
for start, end in zip(t_acc_disc, t_acc_rec):
    print(start,end)
    break

ppg_df.loc[start:end]

2025-01-21 16:17:29.975070208 2025-01-21 16:20:03.052938240


Unnamed: 0_level_0,ppg1,ppg2,ppg3,ambient
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


In [13]:
import neurokit2 as nk
ppg_df["ppg_filt"] = -nk.signal_filter(ppg_df["ppg1"], lowcut=0.5, highcut=8, sampling_rate=55)

In [None]:
plt.figure(figsize = (19,11))
plt.subplot(2, 1, 1)
plt.plot(acc_df)
plt.title("Acceleration")
# plt.legend(acc_df.columns)
plt.subplot(2, 1, 2, sharex = plt.subplot(2, 1, 1))
plt.plot(ppg_df["ppg_filt"])
plt.title("PPG")
# plt.legend(["ppg_filt"])

Text(0.5, 1.0, 'PPG')

: 

In [None]:
# Calculate the actual recording duration by summing up the differences between consecutive timestamps
time_diffs = acc_df.index.to_series().diff().dropna()
overall_duration = time_diffs.sum()
print(f"Overall duration of the recording (excluding gaps): {overall_duration}")

Overall duration of the recording (excluding gaps): 0 days 05:29:49.749643520


In [None]:
ppg_df.index[-1] - ppg_df.index[0]

Timedelta('0 days 05:29:53.340454144')

## Rooti

In [36]:
rooti_file_path1 = "/Users/augenpro/Documents/Age-IT/data/Bronze/14219/T0 (baseline)/RootiRx/14219/measure/12483bd6-bee6-4975-9832-14b4772f2ecc/GSENSOR/1737113661.zip"
rooti_file_path2 = "/Users/augenpro/Documents/Age-IT/data/Bronze/14219/T0 (baseline)/RootiRx/14219/measure/12483bd6-bee6-4975-9832-14b4772f2ecc/GSENSOR/1737117261.zip"

acc1 = pd.read_csv(rooti_file_path1, compression='zip', header = None, names = ["timestamp", "packet", "acc_x", "acc_y", "acc_z", "idk", "idk2"])

acc2 = pd.read_csv(rooti_file_path2, compression='zip', header = None, names = ["timestamp", "packet", "acc_x", "acc_y", "acc_z", "idk", "idk2"])

acc1_xyz = acc1[["acc_x", "acc_y", "acc_z"]].values
acc2_xyz = acc2[["acc_x", "acc_y", "acc_z"]].values

# concat the two dataframes
acc_xyz = np.concatenate((acc1_xyz, acc2_xyz), axis = 0)

# plt.figure(figsize = (19,11))
# plt.plot(acc_xyz)

In [35]:
plt.figure(figsize = (19,11))
plt.plot(acc1["timestamp"].diff().dropna()) 

[<matplotlib.lines.Line2D at 0x118f64b30>]

In [37]:
samples_per_sec = acc1.groupby("timestamp").count()["packet"]
plt.figure(figsize = (19,11))
plt.plot(samples_per_sec)

[<matplotlib.lines.Line2D at 0x11901dbe0>]

In [None]:
samples_per_sec = acc1.groupby("timestamp").count()["packet"]

first_sec_interp = np.arange(samples_per_sec.index[0], samples_per_sec.index[1], 1/samples_per_sec.iloc[0])

# Do the same for all the other seconds
for i in range(1, len(samples_per_sec)-1):
    sec_interp = np.arange(samples_per_sec.index[i], samples_per_sec.index[i+1], 1/samples_per_sec.iloc[i])
    first_sec_interp = np.concatenate((first_sec_interp, sec_interp))

# Do the same for the last second
last_sec_interp = np.arange(samples_per_sec.index[-1], samples_per_sec.index[-1] + 1, 1/samples_per_sec.iloc[-1])

# Concatenate all the seconds
all_sec_interp = np.concatenate((first_sec_interp, last_sec_interp))

array([1.73711006e+09, 1.73711006e+09, 1.73711006e+09, ...,
       1.73711366e+09, 1.73711366e+09, 1.73711366e+09], shape=(112293,))

In [None]:
acc_interp = pd.DataFrame(acc1[["acc_x", "acc_y", "acc_z"]].values, index = pd.to_datetime(all_sec_interp, unit = "s"), columns = ["acc_x", "acc_y", "acc_z"])

from utils.compute_acc_metrics import compute_acc_SMV

acc_interp["acc_SMV"] = compute_acc_SMV(acc_interp)

plt.figure(figsize = (19,11))
plt.plot(acc_interp["acc_SMV"])

[<matplotlib.lines.Line2D at 0x1198c4b00>]

In [27]:
acc1.index

RangeIndex(start=0, stop=112293, step=1)

In [29]:
plt.figure(figsize = (19,11))
plt.plot(acc_interp.index.diff().dropna().total_seconds())

[<matplotlib.lines.Line2D at 0x13f674fb0>]

In [None]:
data_path = "/Users/augenpro/Documents/Age-IT/data/Gold/" # path to the folder containing the subjects
participants = sorted([p for p in os.listdir(data_path) if not p.startswith(".")]) # list of the participants
visit = "T0 (baseline)" # T0 (baseline), T1 (follow-up @ 6 months), T2 (follow-up @ 12 months)
participants = ["14219"]
# participants = participants[5:6] # select the first participant

sensors = ["RootiRx"]#, "RootiRx"]

sensor = sensors[0]

for participant in participants:
    path = os.path.join(data_path, participant, visit, sensor)
    files = os.listdir(os.path.join(data_path, participant, visit, sensor))

    files = [f for f in files if f == participant]

    if len(files) == 0:
        # print("No RootiRx data")
        continue
    
    print("***** " + participant + " *****")
    rooti_path = os.path.join(data_path, participant, visit, sensor, files[0], "measure")

    raw_acc_path = [x[0] for x in os.walk(rooti_path) if x[0].endswith("/GSENSOR")][0]

    # print(raw_acc_path)

    acc_files = sorted(os.listdir(raw_acc_path))

    # print(len(acc_files))

    acc = pd.DataFrame()

    acc_first = pd.read_csv(raw_acc_path + '/' + acc_files[0], compression='zip', index_col = 0, header = None, names = ["packet", "acc_x", "acc_y", "acc_z", "idk", "idk2"])
    acc_first_start = pd.to_datetime(acc_first.index[0], unit = "s")

    acc_last = pd.read_csv(raw_acc_path + '/' + acc_files[-1], compression='zip', index_col = 0, header = None, names = ["packet", "acc_x", "acc_y", "acc_z", "idk", "idk2"])
    acc_last_end = pd.to_datetime(acc_last.index[-1], unit = "s")

    print(f"Total duration: {acc_last_end - acc_first_start}")

    for i, acc_file in enumerate(acc_files):
        acc1 = pd.read_csv(raw_acc_path + '/' + acc_file, compression='zip', header = None, names = ["timestamp", "packet", "acc_x", "acc_y", "acc_z", "idk", "idk2"])
        acc1 = acc1[["acc_x", "acc_y", "acc_z"]]
        acc1.index = pd.date_range(start = pd.to_datetime(acc1.index[0], unit = "s"), periods = len(acc1), freq = "0.032s")
        acc = pd.concat([acc, acc1])

    # print(path)

    print("")
    break

    # print(files)

***** 14219 *****
Total duration: 6 days 23:49:20



In [20]:
plt.figure(figsize = (19,11))
plt.plot(np.diff(acc.index).astype('timedelta64[s]'))

[<matplotlib.lines.Line2D at 0x1585d9730>]

In [None]:
acc1.head()

Unnamed: 0,packet,acc_x,acc_y,acc_z,idk,idk2
2025-01-17 10:34:21.000000,0,176,-191,1288,18,166
2025-01-17 10:34:21.003200,1,150,-183,1324,18,166
2025-01-17 10:34:21.006400,2,150,119,1187,18,166
2025-01-17 10:34:21.009600,3,115,-164,1379,18,166
2025-01-17 10:34:21.012800,4,60,-287,1468,18,166


# Synch between GENEActiv and Rooti

In [12]:
# data_path = "/Users/augenpro/Documents/Age-IT/data/Bronze" # path to the folder containing the subjects
# participants = sorted([p for p in os.listdir(data_path) if not p.startswith(".")]) # list of the participants
# visit = "T0 (baseline)" # T0 (baseline), T1 (follow-up @ 6 months), T2 (follow-up @ 12 months)

# # participants = participants[5:6] # select the first participant

# sensors = ["GeneActivPolso", "GeneActivCaviglia"]#, "RootiRx"]

# geneactiv = {}

# participants = ["14219"]

# for participant in participants:
#     print(participant)
#     for sensor in sensors:
#         path = os.path.join(data_path, participant, visit, sensor)
#         files = os.listdir(os.path.join(data_path, participant, visit, sensor))
#         # if there are already parquet files, skip
#         for f in files:
#             if f.endswith(".parquet"):
#                 # print(f)
#                 acc_gen = pd.read_parquet(os.path.join(path, f))
#                 # geneactiv[sensor] = acc_gen
#                 if len(acc_gen) == 0:
#                     print(f"{sensor}: is empty")
#                 else:
#                     print(f"{sensor}:  {acc_gen.index[-1] - acc_gen.index[0]}".split(".")[0])
#                 if sensor == "GeneActivPolso":
#                     geneactiv["wrist"] = acc_gen
#                 elif sensor == "GeneActivCaviglia":
#                     geneactiv["ankle"] = acc_gen
#                 del acc_gen
#     print("")

# Rooti
sensor = "RootiRx"
for participant in participants:
    path = os.path.join(data_path, participant, visit, sensor)
    files = os.listdir(os.path.join(data_path, participant, visit, sensor))

    files = [f for f in files if f == participant]

    if len(files) == 0:
        # print("No RootiRx data")
        continue
    
    print("***** " + participant + " *****")
    rooti_path = os.path.join(data_path, participant, visit, sensor, files[0], "measure")

    raw_acc_path = [x[0] for x in os.walk(rooti_path) if x[0].endswith("/GSENSOR")][0]
    acc_files = sorted(os.listdir(raw_acc_path))
    acc_rooti = pd.DataFrame()
    t_acc_rooti = pd.Series()
    first_acc_file = pd.read_csv(raw_acc_path + '/' + acc_files[0], compression='zip', index_col = 0, header = None)
    t_start_rooti = pd.to_datetime(first_acc_file.index[0], unit = "s")
    for i, acc_file in enumerate(acc_files):
        acc1 = pd.read_csv(raw_acc_path + '/' + acc_files[i], compression='zip', index_col = 0, header = None) / 1000 # convert to g
        t_acc_start = pd.to_datetime(acc1.index[0], unit = "s")
        t_acc_end = pd.to_datetime(acc1.index[-1]+1, unit = "s")
        acc1 = acc1.iloc[:, [1, 2, 3]].reset_index(drop = True)
        acc1.columns = ["acc_x", "acc_y", "acc_z"]
        t_acc1 = pd.date_range(start = t_acc_start, end = t_acc_end, periods=len(acc1)+1)[:-1]
        acc_rooti = pd.concat([acc_rooti, acc1])
        t_acc_rooti = pd.concat([t_acc_rooti, t_acc1.to_series()])

    # ecg_filtered_path = [x[0] for x in os.walk(rooti_path) if x[0].endswith("/FilteredECG")][0] + "/250"
    # ecg_filtered_files = sorted(os.listdir(ecg_filtered_path))
    # ecg_dataframes = []

    # # Iterate through ecg filtered files
    # for i, ecg_file in enumerate(ecg_filtered_files):
    #     ecg_filtered_df = pd.read_csv(
    #         ecg_filtered_path + '/' + ecg_file, 
    #         compression='zip', 
    #         header=None, 
    #         names=["ecg", "Rpeaks"]
    #     )
    #     # t_start = pd.to_datetime(ecg_filtered_files[0].split(".")[0], unit = "s")
    #     # ecg_filtered_df.index = pd.date_range(start = t_start, periods = len(ecg_filtered_df), freq = "0.004s")
    #     ecg_dataframes.append(ecg_filtered_df)
    # t_start_ecg = pd.to_datetime(int(ecg_filtered_files[0].split(".")[0]), unit = "s")
    # ecg_filtered = pd.concat(ecg_dataframes, axis=0)
    # ecg_filtered.index = pd.date_range(start = t_start_ecg, periods = len(ecg_filtered), freq = "0.004s")

    # # print(path)

    # print("")

***** 14219 *****


  t_acc_rooti = pd.concat([t_acc_rooti, t_acc1.to_series()])


### New for rooti

In [3]:
sensor = "RootiRx"
for participant in participants:
    path = os.path.join(data_path, participant, visit, sensor)
    files = os.listdir(os.path.join(data_path, participant, visit, sensor))

    files = [f for f in files if f == participant]

    if len(files) == 0:
        # print("No RootiRx data")
        continue
    
    print("***** " + participant + " *****")
    rooti_path = os.path.join(data_path, participant, visit, sensor, files[0], "measure")

    raw_acc_path = [x[0] for x in os.walk(rooti_path) if x[0].endswith("/GSENSOR")][0]
    acc_files = sorted(os.listdir(raw_acc_path))
    acc_rooti = pd.DataFrame()
    for i, acc_file in enumerate(acc_files):
        acc1 = pd.read_csv(raw_acc_path + '/' + acc_files[i], compression='zip', index_col = 0, header = None) / 1000
        acc1 = acc1.iloc[:, [1, 2, 3]].reset_index(drop = True) # Extract only ACC
        acc1.columns = ["acc_x", "acc_y", "acc_z"]
        acc_rooti = pd.concat([acc_rooti, acc1])

***** 14219 *****


In [4]:
acc_rooti.reset_index(drop = True, inplace = True)

In [13]:
acc_rooti.index = t_acc_rooti.index + pd.Timedelta("1h")

In [5]:
acc_rooti.head()

Unnamed: 0,acc_x,acc_y,acc_z
0,0.176,-0.191,1.288
1,0.15,-0.183,1.324
2,0.15,0.119,1.187
3,0.115,-0.164,1.379
4,0.06,-0.287,1.468


In [6]:
plt.figure(figsize = (19,11))
plt.plot(acc_rooti["acc_x"].iloc[:5000000])

[<matplotlib.lines.Line2D at 0x15ada0080>]

2025-02-20 11:30:03.866 python[85447:6279091] +[IMKClient subclass]: chose IMKClient_Modern


In [14]:
acc_gen = geneactiv["wrist"]

In [None]:
from utils.compute_acc_metrics import compute_acc_SMV

acc_rooti["acc_SMV"] = compute_acc_SMV(acc_rooti)
acc_gen["acc_SMV"] = compute_acc_SMV(acc_gen)

In [9]:
import seaborn as sns
sns.set_context("talk")

In [11]:
plt.figure(figsize = (19,11))
plt.plot(acc_gen["acc_SMV"].iloc[:500000].values, label = "GENEActiv", linewidth = 0.91)
plt.plot(acc_rooti["acc_SMV"].iloc[:500000], label = "RootiRx", linewidth = 0.91)
plt.legend(loc = "upper right")

<matplotlib.legend.Legend at 0x47a2cb6b0>

## Script that check for cross-correlations between different sensors, and adjust the start time accordingly

Assumption: the rotations are done within the first hour of recording

1. Resample the first hour of the signals to the same number of samples (for geneactiv 100 Hz)
2. Segment signal in 20 second windows with an overlap of 10s (the rotations last 10 seconds)
3. Compute the CCF for each of the windows 
4. Find the maximum and plot?

In [16]:
# 1. Resample the overall Rooti signal to the same number of samples (for geneactiv 100 Hz)

from utils.resample_signal import apply_resample

t_acc_rooti_resampled, acc_rooti_resampled = apply_resample(time = acc_rooti.index.astype(np.int64).to_numpy(), time_rs = acc_gen.index.astype(np.int64).to_numpy(), 
                                     data = acc_rooti["acc_SMV"].values)

acc_rooti_resampled = pd.Series(acc_rooti_resampled[0], index = pd.to_datetime(t_acc_rooti_resampled))

In [17]:
acc_rooti_resampled.shape, acc_gen.shape

((59925000,), (59925000, 6))

In [20]:
3*60*60*100

1080000

In [22]:
plt.figure(figsize = (19,11))
plt.plot(acc_rooti_resampled.iloc[:5*60*60*100].values, label = "RootiRx", linewidth = 0.91)
plt.plot(acc_gen["acc_SMV"].iloc[:5*60*60*100].values, label = "GENEActiv", linewidth = 0.91)
plt.legend(loc = "upper right")

<matplotlib.legend.Legend at 0x15d116a50>

In [66]:
# 1. Resample the first hour of the signals to the same number of samples (for geneactiv 100 Hz)

from utils.resample_signal import apply_resample

fs_res = 100

acc_rooti_first_hour = acc_rooti.loc[:t_start_rooti + pd.Timedelta("4h")]["acc_SMV"]
acc_gen_first_hour = acc_gen.loc[:acc_gen.index[0] + pd.Timedelta("3h")]["acc_SMV"]

t_acc_rooti_resampled, acc_rooti_resampled = apply_resample(time = acc_rooti_first_hour.index.astype(np.int64).to_numpy(), time_rs = acc_gen_first_hour.index.astype(np.int64).to_numpy(), 
                                     data = acc_rooti_first_hour.values)

acc_rooti_resampled = pd.Series(acc_rooti_resampled[0], index = pd.to_datetime(t_acc_rooti_resampled))

In [69]:
# 2. Segment the signals into 20s windows with 50% overlap

from utils.segment_signal import segment_signal

acc_gen_segments = segment_signal(acc_gen_first_hour, window_size = 20*fs_res, overlap = 0.5)
acc_rooti_segments = segment_signal(acc_rooti_resampled, window_size = 20*fs_res, overlap = 0.5)

In [68]:
plt.figure()
plt.plot(acc_rooti_resampled)

[<matplotlib.lines.Line2D at 0x3b96f7e60>]

In [70]:
# 3. Compute the crosscorrelation between the two signals for each window
from utils.crosscorr import crosscorr

offsets = []
max_corr = []
lags = np.arange(-400, 401, 1) # 400 samples = 4s, resolution of 1 samples = 0.01s
for i, (gen, rooti) in enumerate(zip(acc_gen_segments, acc_rooti_segments)):
    if len(gen) != len(rooti):
        continue
    ccf = [crosscorr(gen, rooti, lag) for lag in lags]
    max_corr.append(np.max(ccf))
    offset = np.argmax(ccf) - lags[-1]
    offset_s = offset / fs_res
    offsets.append(offset_s)

In [71]:
plt.figure(figsize = (11,5))
plt.subplot(2, 1, 1)
plt.plot(max_corr, '-*', label = "Max correlation")
plt.xlabel("Window")
plt.ylabel("Correlation")
plt.legend()
plt.subplot(2, 1, 2, sharex = plt.subplot(2, 1, 1))
plt.plot(offsets, '-*', label = "Offset (s)")
plt.xlabel("Window")
plt.ylabel("Offset (s)")
plt.legend()

<matplotlib.legend.Legend at 0x3af0caea0>

In [59]:
# Find the maximum correlation and the corresponding offset
max_corr = np.array(max_corr)
offsets = np.array(offsets)
max_corr_idx = np.argmax(max_corr)
max_corr_val = max_corr[max_corr_idx]

# Apply the offset to the RootiRx signal
offset = offsets[max_corr_idx]

acc_rooti_synched = acc_rooti.copy()
acc_rooti_synched.index = acc_rooti_synched.index - pd.Timedelta(f"{offset}s")
# Plot the signals
plt.figure(figsize = (19,11))
plt.plot(acc_gen["acc_SMV"].iloc[:500000], label = "GENEActiv ACC SMV")
plt.plot(acc_rooti_synched["acc_SMV"].iloc[:500000], label = "RootiRx ACC SMV")
plt.ylabel("Acceleration (g)")
plt.legend(loc = "upper left")

<matplotlib.legend.Legend at 0x3a7190e60>

## Do it for all GENEActiv

In [12]:
geneactiv.keys()

dict_keys(['wrist', 'ankle'])

In [None]:
from utils.compute_acc_metrics import compute_acc_SMV

# Compute SMV for the GENEActiv signals
geneactiv["wrist"]["acc_SMV"] = compute_acc_SMV(geneactiv["wrist"])
geneactiv["ankle"]["acc_SMV"] = compute_acc_SMV(geneactiv["ankle"])

In [32]:
acc_gen_wirst_first_hour = geneactiv["wrist"].loc[:geneactiv["wrist"].index[0] + pd.Timedelta("5min")]["acc_SMV"]
acc_gen_ankle_first_hour = geneactiv["ankle"].loc[:geneactiv["ankle"].index[0] + pd.Timedelta("5min")]["acc_SMV"]

# 1. Resample the first hour of the signals to the same number of samples (for geneactiv 100 Hz)
fs_res = 100

from utils.resample_signal import apply_resample

t_acc_gen_ankle_resampled, acc_gen_ankle_resampled = apply_resample(time = acc_gen_ankle_first_hour.index.astype(np.int64).to_numpy(), time_rs = acc_gen_wirst_first_hour.index.astype(np.int64).to_numpy(), 
                                     data = acc_gen_ankle_first_hour.values)

acc_gen_ankle_resampled = pd.Series(acc_gen_ankle_resampled[0], index = pd.to_datetime(t_acc_gen_ankle_resampled))

# 2. Segment the signals into 20s windows with 50% overlap

from utils.segment_signal import segment_signal

acc_gen_wrist_segments = segment_signal(acc_gen_wirst_first_hour, window_size = 20*fs_res, overlap = 0.5)
acc_gen_ankle_segments = segment_signal(acc_gen_ankle_resampled, window_size = 20*fs_res, overlap = 0.5)

In [33]:
acc_gen_wirst_first_hour

2025-01-17 11:38:44.000000000    0.987872
2025-01-17 11:38:44.009999990    1.025177
2025-01-17 11:38:44.019999981    1.045466
2025-01-17 11:38:44.029999971    1.076971
2025-01-17 11:38:44.039999962    1.043428
                                   ...   
2025-01-17 11:43:43.960000038    1.005571
2025-01-17 11:43:43.970000029    1.005720
2025-01-17 11:43:43.980000019    1.012752
2025-01-17 11:43:43.990000010    1.011449
2025-01-17 11:43:44.000000000    1.015252
Name: acc_SMV, Length: 30001, dtype: float64

In [35]:
# 3. Compute the crosscorrelation between the two signals for each window

from utils.crosscorr import crosscorr

offsets = []
max_corr = []
lags = np.arange(-400, 401, 1) # 400 samples = 4s, resolution of 1 samples = 0.01s
for i, (wrist, ankle) in enumerate(zip(acc_gen_wrist_segments, acc_gen_ankle_segments)):
    if len(wrist) != len(ankle):
        continue
    ccf = [crosscorr(wrist, ankle, lag) for lag in lags]
    max_corr.append(np.max(ccf))
    offset = np.argmax(ccf) - lags[-1]
    offset_s = offset / fs_res
    offsets.append(offset_s)

plt.figure(figsize = (11,5))
plt.subplot(2, 1, 1)
plt.plot(max_corr, '-*', label = "Max correlation")
plt.xlabel("Window")
plt.ylabel("Correlation")
plt.legend()
plt.subplot(2, 1, 2, sharex = plt.subplot(2, 1, 1))
plt.plot(offsets, '-*', label = "Offset (s)")
plt.xlabel("Window")
plt.ylabel("Offset (s)")
plt.legend()

<matplotlib.legend.Legend at 0x3c7dac740>

In [None]:
# Find the maximum correlation and the corresponding offset
max_corr = np.array(max_corr)
offsets = np.array(offsets)
max_corr_idx = np.argmax(max_corr)
max_corr_val = max_corr[max_corr_idx]
offset = offsets[max_corr_idx]

# Apply the offset to the GENEActiv ankle signal
acc_gen_ankle_synched = geneactiv["ankle"].copy()
acc_gen_ankle_synched.index = acc_gen_ankle_synched.index - pd.Timedelta(f"{offset}s")

# Plot the signals
plt.figure(figsize = (19,11))
plt.plot(geneactiv["wrist"]["acc_SMV"].iloc[:500000], label = "GENEActiv wrist ACC SMV")
plt.plot(acc_gen_ankle_synched["acc_SMV"].iloc[:500000], label = "GENEActiv ankle ACC SMV")
plt.ylabel("Acceleration (g)")
plt.legend(loc = "upper left")

<matplotlib.legend.Legend at 0x38d5156a0>

In [37]:
geneactiv["wrist"].head()

Unnamed: 0,x,y,z,acc_SMV
2025-01-17 11:38:44.000000000,-0.14322,-0.975911,0.054555,0.987872
2025-01-17 11:38:44.009999990,-0.131305,-1.015662,0.046677,1.025177
2025-01-17 11:38:44.019999981,-0.131305,-1.035537,0.058495,1.045466
2025-01-17 11:38:44.029999971,-0.127333,-1.063362,0.113641,1.076971
2025-01-17 11:38:44.039999962,-0.11939,-1.031562,0.101824,1.043428


In [None]:
from nonwear.DETACH import nimbaldetach

# Apply the non-wear detection algorithm to the GENEActiv wrist signal
start_stop_nw, _ = nimbaldetach(acc['x'].values, acc['y'].values, acc['z'].values, temp["temp"].values, accel_freq=64, temperature_freq=1, quiet=True)

# Create the structure in the silver layer

In [2]:
silver_layer_path = "/Users/augenpro/Documents/Age-IT/data/Silver/"

# Create the same folder structure for the Silver data
data_path = "/Users/augenpro/Documents/Age-IT/data/Gold/" # path to the folder containing the subjects

participants = sorted([p for p in os.listdir(data_path) if not p.startswith(".")]) # list of the participants
timeline = ["T0 (baseline)", "T1 (6 mesi)", "T2 (12 mesi)"]

for participant in participants:
    for visit in timeline:
        path = os.path.join(silver_layer_path, participant, visit)
        os.makedirs(path, exist_ok=True)
        os.makedirs(os.path.join(path, "GeneActivPolso"), exist_ok=True)
        os.makedirs(os.path.join(path, "GeneActivCaviglia"), exist_ok=True)
        os.makedirs(os.path.join(path, "RootiRx"), exist_ok=True)
        os.makedirs(os.path.join(path, "VeritySense"), exist_ok=True)
        os.makedirs(os.path.join(path, "Diario"), exist_ok=True)

# Process GGIR output

In [2]:
from matplotlib.backends.backend_pdf import PdfPages
from utils.compute_acc_metrics import compute_anglez
import seaborn as sns
sns.set_context("talk")

Diary legend:
- 1 --> lights off
- 2 --> lights on

#### Generate PDF for report

In [None]:
ageit_path = "/Users/augenpro/Documents/Age-IT/"
silver_layer_path = "/Users/augenpro/Documents/Age-IT/data/Silver/"
bronze_layer_path = "/Users/augenpro/Documents/Age-IT/data/Bronze/"

participants = sorted([p for p in os.listdir(silver_layer_path) if not p.startswith(".")]) # list of the participants
timeline = ["T0 (baseline)", "T1 (6 mesi)", "T2 (12 mesi)"]
visit = "T0 (baseline)"
sensors = ["GeneActivPolso", "GeneActivCaviglia"]
sensor = sensors[0]

lights_off_events = ["1) A letto", "1) A letto, luci spente"]

participants = ["08667", "20603", "36765"]
# participants = ["36765"]

GGIR_output_dir = "output_icareit"

diary_df = pd.DataFrame()

pdf_path = os.path.join(ageit_path, "SleepReport_HDCZA_vs_Diary_wrist_newSubjects.pdf")

with PdfPages(pdf_path) as pdf:
    for participant in participants:

        print(f"***** {participant} *****")

        ### Load the GENEActiv data (for debugging purposes only) ######
        files = os.listdir(os.path.join(bronze_layer_path, participant, visit, sensor))
        for f in files:
            if f.endswith(".parquet"):
                acc_gen = pd.read_parquet(os.path.join(bronze_layer_path, participant, visit, sensor, f))

        ###### Load the GGIR output data ######
        HDCZA_SPT = [] # start and end of SPT based on HDCZA (vanhees2018) 
        ggir_output_path = os.path.join(silver_layer_path, participant, visit, sensor, GGIR_output_dir)
        if not os.path.exists(ggir_output_path): # Skip participants with no Data
            continue
        ggir_part4_output = pd.read_csv(ggir_output_path + "/results/QC/part4_nightsummary_sleep_full.csv")
        for i, day_row in ggir_part4_output.iterrows():
            # Stupid thing to get the correct datetime for segmenting signals into day and night (but no alternatives I guess)
            if day_row["sleeponset_ts"][0] == "0": # if the first digit is after midnight (00, 01, 02, ...)
                sleep_onset = pd.to_datetime(str(pd.to_datetime(day_row["calendar_date"]).date() + pd.Timedelta("1d")) + " " + day_row["sleeponset_ts"])
            else:
                sleep_onset = pd.to_datetime(str(pd.to_datetime(day_row["calendar_date"]).date()) + " " + day_row["sleeponset_ts"])
            wake_onset = pd.to_datetime(str(pd.to_datetime(day_row["calendar_date"]).date() + pd.Timedelta("1d")) + " " + day_row["wakeup_ts"])
            day = pd.to_datetime(day_row["calendar_date"]).date()
            HDCZA_SPT.append((sleep_onset, wake_onset, day))
        HDCZA_SPT = pd.DataFrame(HDCZA_SPT, columns = ["sleep_onset_HDCZA", "wake_onset_HDCZA", "calendar_day"])

        ###### Load the diary data ######
        diary_raw_path = os.path.join(bronze_layer_path, participant, visit, "Diario", f"{participant}_{visit.split(" ")[0]}_Diario.xlsx")
        if not os.path.exists(diary_raw_path): # Skip participants with no Data
            continue
        diary_raw = pd.read_excel(diary_raw_path, sheet_name="Ore")
        diary_raw = diary_raw.dropna(how = "all") # Drop empty rows
        diary_raw = diary_raw[(diary_raw["Evento"].apply(lambda x: x[:1]) == "1") | (diary_raw["Evento"].apply(lambda x: x[:1]) == "2")].reset_index(drop = True)
        diary_raw["Data"] = diary_raw["Data"].apply(lambda x: str(x).split(" ")[0])
        diary_raw["Ora inizio"] = diary_raw["Ora inizio"].apply(lambda x: str(x))
        sleep_onset_diary = pd.to_datetime(diary_raw[diary_raw["Evento"].apply(lambda x: x[:1]) == "1"]["Data"] + " " + diary_raw[diary_raw["Evento"].apply(lambda x: x[:1]) == "1"]["Ora inizio"]).dropna().reset_index(drop = True)
        wake_onset_diary = pd.to_datetime(diary_raw[diary_raw["Evento"].apply(lambda x: x[:1]) == "2"]["Data"] + " " + diary_raw[diary_raw["Evento"].apply(lambda x: x[:1]) == "2"]["Ora inizio"]).dropna().reset_index(drop = True)
        diary_SPT = pd.DataFrame({"sleep_onset_diary": sleep_onset_diary, "wake_onset_diary": wake_onset_diary})
        diary_SPT["calendar_day"] = diary_SPT["sleep_onset_diary"].apply(lambda x: (x - pd.Timedelta(days=1)).date() if x.hour < 12 else x.date())
        # Fill the missing diary data
        for i, row in HDCZA_SPT.iterrows():
            if row["calendar_day"] not in diary_SPT["calendar_day"].values:
                diary_SPT = pd.concat([diary_SPT, pd.DataFrame({"sleep_onset_diary": [np.nan], "wake_onset_diary": [np.nan], "calendar_day": [row["calendar_day"]]})], ignore_index = True)
        diary_SPT = diary_SPT.sort_values(by = "calendar_day").reset_index(drop = True)
        HDCZA_SPT = HDCZA_SPT.sort_values(by = "calendar_day").reset_index(drop = True)

        # Check the overlap between the HDCZA and diary SPT
        overlap = []
        for i, row in HDCZA_SPT.iterrows():
            if row["calendar_day"] in diary_SPT["calendar_day"].values:
                diary_row = diary_SPT[diary_SPT["calendar_day"] == row["calendar_day"]].iloc[0]
                overlap.append((row["sleep_onset_HDCZA"], row["wake_onset_HDCZA"], diary_row["sleep_onset_diary"], diary_row["wake_onset_diary"], row["calendar_day"]))
        SPT_HDCZA_and_diary = pd.DataFrame(overlap, columns = ["sleep_onset_HDCZA", "wake_onset_HDCZA", "sleep_onset_diary", "wake_onset_diary", "calendar_day"])
        SPT_HDCZA_and_diary["diff_sleep_onset"] = (SPT_HDCZA_and_diary["sleep_onset_HDCZA"] - SPT_HDCZA_and_diary["sleep_onset_diary"]).dt.total_seconds() / 3600 # in hours
        SPT_HDCZA_and_diary["diff_wake_onset"] = (SPT_HDCZA_and_diary["wake_onset_HDCZA"] - SPT_HDCZA_and_diary["wake_onset_diary"]).dt.total_seconds() / 3600

        # Flag if the difference between the HDCZA and diary SPT is more than 30 minutes
        SPT_HDCZA_and_diary["flag_sleep_onset"] = np.where(np.abs(SPT_HDCZA_and_diary["diff_sleep_onset"]) > 30, 1, 0)
        SPT_HDCZA_and_diary["flag_wake_onset"] = np.where(np.abs(SPT_HDCZA_and_diary["diff_wake_onset"]) > 30, 1, 0)
        SPT_HDCZA_and_diary["participant"] = participant

        SPT_HDCZA_and_diary.to_csv(os.path.join(silver_layer_path, participant, visit, "GeneActivPolso", "SPT_HDCZA_and_diary.csv"))

        diary_df = pd.concat([diary_df, SPT_HDCZA_and_diary])
        # diary_df.to_csv(os.path.join(silver_layer_path, participant, visit, "GeneActivPolso", "SPT_HDCZA_and_diary.csv"))

        ####### FIGURE AND PDF GENERATION #######

        anglez = compute_anglez(acc_gen).to_frame()
        anglez_output_path = os.path.join(silver_layer_path, participant, visit, sensor, "anglez_wrist.parquet")
        # anglez = pd.read_parquet(os.path.join(silver_layer_path, participant, visit, sensor, "anglez.parquet"))

        anglez.to_parquet(anglez_output_path)
        diary_sub = diary_df[diary_df["participant"] == participant]

        plt.figure(figsize=(19,6))
        plt.plot(anglez, linewidth = 0.79)
        plt.ylabel("Angle Z (degrees)")
        plt.title(f"Participant {participant}")
        for i, row in diary_sub.iterrows():
            if participant not in ["23483", "36920", "78936"]:
                plt.hlines(y = 95, xmin = row["sleep_onset_diary"], xmax = row["wake_onset_diary"], color = "red", linewidth = 9)     
            else:
                plt.title(f"Participant {participant} - Sleep Diary Inconsistency")
            plt.axvspan(row["sleep_onset_HDCZA"], row["wake_onset_HDCZA"], color = "blue", alpha = 0.3)
        # Put the legend out of the figure
        if participant in ["23483", "36920", "78936"]:
            plt.legend(["Angle Z", "HDCZA SPT"], frameon = True, fancybox = True, shadow = True, loc = "lower right", bbox_to_anchor=(1.12, 0.5))
        else:
            plt.legend(["Angle Z", "Diary SPT", "HDCZA SPT"], frameon = True, fancybox = True, shadow = True, loc = "lower right", bbox_to_anchor=(1.12, 0.5))

        pdf.savefig()
        plt.close()

# Drop data of participant 78936
# diary_df = diary_df[diary_df["participant"] != "78936"]  

SyntaxError: unmatched ')' (3601841548.py, line 85)

In [31]:
ageit_path = "/Users/augenpro/Documents/Age-IT/"
silver_layer_path = "/Users/augenpro/Documents/Age-IT/data/Silver/"
bronze_layer_path = "/Users/augenpro/Documents/Age-IT/data/Bronze/"

participants = sorted([p for p in os.listdir(silver_layer_path) if not p.startswith(".")]) # list of the participants
timeline = ["T0 (baseline)", "T1 (6 mesi)", "T2 (12 mesi)"]
visit = "T0 (baseline)"
sensors = ["GeneActivPolso", "GeneActivCaviglia"]
sensor = sensors[0]

lights_off_events = ["1) A letto", "1) A letto, luci spente"]

participants = ["08667", "20603", "36765"]
# participants = ["36765"]

GGIR_output_dir = "output_icareit"

diary_df = pd.DataFrame()

pdf_path = os.path.join(ageit_path, "SleepReport_HDCZA_vs_Diary_wrist_newSubjects.pdf")

for participant in participants:

    print(f"***** {participant} *****")

    ### Load the GENEActiv data (for debugging purposes only) ######
    files = os.listdir(os.path.join(bronze_layer_path, participant, visit, sensor))
    for f in files:
        if f.endswith(".parquet"):
            acc_gen = pd.read_parquet(os.path.join(bronze_layer_path, participant, visit, sensor, f))

    ###### Load the GGIR output data ######
    HDCZA_SPT = [] # start and end of SPT based on HDCZA (vanhees2018) 
    ggir_output_path = os.path.join(silver_layer_path, participant, visit, sensor, GGIR_output_dir)
    if not os.path.exists(ggir_output_path): # Skip participants with no Data
        continue
    ggir_part4_output = pd.read_csv(ggir_output_path + "/results/QC/part4_nightsummary_sleep_full.csv")
    for i, day_row in ggir_part4_output.iterrows():
        # Stupid thing to get the correct datetime for segmenting signals into day and night (but no alternatives I guess)
        if day_row["sleeponset_ts"][0] == "0": # if the first digit is after midnight (00, 01, 02, ...)
            sleep_onset = pd.to_datetime(str(pd.to_datetime(day_row["calendar_date"]).date() + pd.Timedelta("1d")) + " " + day_row["sleeponset_ts"])
        else:
            sleep_onset = pd.to_datetime(str(pd.to_datetime(day_row["calendar_date"]).date()) + " " + day_row["sleeponset_ts"])
        wake_onset = pd.to_datetime(str(pd.to_datetime(day_row["calendar_date"]).date() + pd.Timedelta("1d")) + " " + day_row["wakeup_ts"])
        day = pd.to_datetime(day_row["calendar_date"]).date()
        HDCZA_SPT.append((sleep_onset, wake_onset, day))
    HDCZA_SPT = pd.DataFrame(HDCZA_SPT, columns = ["sleep_onset_HDCZA", "wake_onset_HDCZA", "calendar_day"])

    ###### Load the diary data ######
    diary_raw_path = os.path.join(bronze_layer_path, participant, visit, "Diario", f"{participant}_{visit.split(" ")[0]}_Diario.xlsx")
    if not os.path.exists(diary_raw_path): # Skip participants with no Data
        continue
    diary_raw = pd.read_excel(diary_raw_path, sheet_name="Ore")
    diary_raw = diary_raw.dropna(how = "all") # Drop empty rows
    diary_raw = diary_raw[(diary_raw["Evento"].apply(lambda x: x[:1]) == "1") | (diary_raw["Evento"].apply(lambda x: x[:1]) == "2")].reset_index(drop = True)
    diary_raw["Data"] = diary_raw["Data"].apply(lambda x: str(x).split(" ")[0])
    diary_raw["Ora inizio"] = diary_raw["Ora inizio"].apply(lambda x: str(x))
    sleep_onset_diary = pd.to_datetime(diary_raw[diary_raw["Evento"].apply(lambda x: x[:1]) == "1"]["Data"] + " " + diary_raw[diary_raw["Evento"].apply(lambda x: x[:1]) == "1"]["Ora inizio"]).dropna().reset_index(drop = True)
    wake_onset_diary = pd.to_datetime(diary_raw[diary_raw["Evento"].apply(lambda x: x[:1]) == "2"]["Data"] + " " + diary_raw[diary_raw["Evento"].apply(lambda x: x[:1]) == "2"]["Ora inizio"]).dropna().reset_index(drop = True)
    diary_SPT = pd.DataFrame({"sleep_onset_diary": sleep_onset_diary, "wake_onset_diary": wake_onset_diary})
    diary_SPT["calendar_day"] = diary_SPT["sleep_onset_diary"].apply(lambda x: (x - pd.Timedelta(days=1)).date() if x.hour < 12 else x.date())
    # Fill the missing diary data
    for i, row in HDCZA_SPT.iterrows():
        if row["calendar_day"] not in diary_SPT["calendar_day"].values:
            diary_SPT = pd.concat([diary_SPT, pd.DataFrame({"sleep_onset_diary": [np.nan], "wake_onset_diary": [np.nan], "calendar_day": [row["calendar_day"]]})], ignore_index = True)
    diary_SPT = diary_SPT.sort_values(by = "calendar_day").reset_index(drop = True)
    HDCZA_SPT = HDCZA_SPT.sort_values(by = "calendar_day").reset_index(drop = True)

    # Check the overlap between the HDCZA and diary SPT
    overlap = []
    for i, row in HDCZA_SPT.iterrows():
        if row["calendar_day"] in diary_SPT["calendar_day"].values:
            diary_row = diary_SPT[diary_SPT["calendar_day"] == row["calendar_day"]].iloc[0]
            overlap.append((row["sleep_onset_HDCZA"], row["wake_onset_HDCZA"], diary_row["sleep_onset_diary"], diary_row["wake_onset_diary"], row["calendar_day"]))
    SPT_HDCZA_and_diary = pd.DataFrame(overlap, columns = ["sleep_onset_HDCZA", "wake_onset_HDCZA", "sleep_onset_diary", "wake_onset_diary", "calendar_day"])
    SPT_HDCZA_and_diary["diff_sleep_onset"] = (SPT_HDCZA_and_diary["sleep_onset_HDCZA"] - SPT_HDCZA_and_diary["sleep_onset_diary"]).dt.total_seconds() / 3600 # in hours
    SPT_HDCZA_and_diary["diff_wake_onset"] = (SPT_HDCZA_and_diary["wake_onset_HDCZA"] - SPT_HDCZA_and_diary["wake_onset_diary"]).dt.total_seconds() / 3600

    # Flag if the difference between the HDCZA and diary SPT is more than 30 minutes
    SPT_HDCZA_and_diary["flag_sleep_onset"] = np.where(np.abs(SPT_HDCZA_and_diary["diff_sleep_onset"]) > 30, 1, 0)
    SPT_HDCZA_and_diary["flag_wake_onset"] = np.where(np.abs(SPT_HDCZA_and_diary["diff_wake_onset"]) > 30, 1, 0)
    SPT_HDCZA_and_diary["participant"] = participant

    SPT_HDCZA_and_diary.to_csv(os.path.join(silver_layer_path, participant, visit, "GeneActivPolso", "SPT_HDCZA_and_diary.csv"))

# Drop data of participant 78936
# diary_df = diary_df[diary_df["participant"] != "78936"]  

***** 08667 *****
***** 20603 *****
***** 36765 *****


In [None]:
# anglez = compute_anglez(acc_gen)

diary_sub = diary_df[diary_df["participant"] == "23483"]

plt.figure(figsize=(19,11))
plt.plot(anglez, linewidth = 0.91)
plt.ylabel("Angle Z (degrees)")
for i, row in diary_sub.iterrows():
    plt.axvspan(row["sleep_onset_HDCZA"], row["wake_onset_HDCZA"], color = "blue", alpha = 0.3)
    plt.hlines(y = 90, xmin = row["sleep_onset_diary"], xmax = row["wake_onset_diary"], color = "red")
    print(row["sleep_onset_diary"], row["wake_onset_diary"])
plt.legend(["Angle Z", "HDCZA SPT", "Diary SPT"], frameon = True, fancybox = True, shadow = True, loc = "lower right")

2025-01-21 22:00:00 2025-01-22 07:00:00
2025-01-22 22:00:00 2025-01-23 07:00:00
NaT NaT
2025-01-25 00:15:00 2025-01-26 07:00:00
NaT NaT
2025-01-26 23:00:00 2025-01-27 07:00:00
2025-01-27 23:00:00 2025-01-28 06:45:00


<matplotlib.legend.Legend at 0x136502150>

In [11]:
plt.figure(figsize = (11,5))
plt.subplot(1, 2, 1)
sns.histplot(diary_df["diff_sleep_onset"], bins = 50)
plt.axvline(x = 0, color = "black", linestyle = "--")
plt.xlabel("Difference in sleep onset (hours), n=15 subjects")
plt.ylabel("Frequency")
plt.subplot(1, 2, 2)
sns.histplot(diary_df["diff_wake_onset"], bins = 50)
plt.axvline(x = 0, color = "black", linestyle = "--")
plt.xlabel("Difference in wake onset (hours), n=15 subjects")
plt.ylabel("Frequency")

Text(0, 0.5, 'Frequency')

In [None]:
# Check the overlap between the HDCZA and diary SPT
overlap = []
for i, row in HDCZA_SPT.iterrows():
    if row["calendar_day"] in diary_SPT["calendar_day"].values:
        diary_row = diary_SPT[diary_SPT["calendar_day"] == row["calendar_day"]].iloc[0]
        overlap.append((row["sleep_onset_HDCZA"], row["wake_onset_HDCZA"], diary_row["sleep_onset_diary"], diary_row["wake_onset_diary"], row["calendar_day"]))
SPT_HDCZA_and_diary = pd.DataFrame(overlap, columns = ["sleep_onset_HDCZA", "wake_onset_HDCZA", "sleep_onset_diary", "wake_onset_diary", "calendar_day"])
SPT_HDCZA_and_diary["diff_sleep_onset"] = (SPT_HDCZA_and_diary["sleep_onset_HDCZA"] - SPT_HDCZA_and_diary["sleep_onset_diary"]).dt.total_seconds() / 60
SPT_HDCZA_and_diary["diff_wake_onset"] = (SPT_HDCZA_and_diary["wake_onset_HDCZA"] - SPT_HDCZA_and_diary["wake_onset_diary"]).dt.total_seconds() / 60

# Flag if the difference between the HDCZA and diary SPT is more than 30 minutes
SPT_HDCZA_and_diary["flag_sleep_onset"] = np.where(np.abs(SPT_HDCZA_and_diary["diff_sleep_onset"]) > 30, 1, 0)
SPT_HDCZA_and_diary["flag_wake_onset"] = np.where(np.abs(SPT_HDCZA_and_diary["diff_wake_onset"]) > 30, 1, 0)

# Plot the discrepancies between the HDCZA and diary SPT
import seaborn as sns
sns.set_context("talk")
plt.figure(figsize = (19,11))
sns.histplot(SPT_HDCZA_and_diary["diff_sleep_onset"]/60, bins = 50, color = "blue", kde = True, label = "Sleep onset")

<Axes: xlabel='diff_sleep_onset', ylabel='Count'>

In [51]:
SPT_HDCZA_and_diary["diff_sleep_onset"]

0   -124.333333
1     24.333333
2     30.000000
3   -250.333333
4           NaN
5           NaN
6           NaN
Name: diff_sleep_onset, dtype: float64

#### Activity Counts

In [4]:
from utils.compute_acc_metrics import compute_enmo
from matplotlib.backends.backend_pdf import PdfPages
import seaborn as sns
sns.set_context("talk")

ageit_path = "/Users/augenpro/Documents/Age-IT/"
silver_layer_path = "/Users/augenpro/Documents/Age-IT/data/Silver/"
bronze_layer_path = "/Users/augenpro/Documents/Age-IT/data/Bronze/"

participants = sorted([p for p in os.listdir(silver_layer_path) if not p.startswith(".")]) # list of the participants
timeline = ["T0 (baseline)", "T1 (6 mesi)", "T2 (12 mesi)"]
visit = "T0 (baseline)"
sensors = ["GeneActivPolso", "GeneActivCaviglia"]
# sensor = sensors[0] ##### Wrist

# participants = ["08667", "20603", "36765"]

pdf_path = os.path.join(ageit_path, "ActivityCounts.pdf")

with PdfPages(pdf_path) as pdf:
    
    for participant in participants:

        print(f"***** {participant} *****")

        ###### Load the GGIR output data ######
        diary = pd.read_csv(os.path.join(silver_layer_path, participant, visit, "GeneActivPolso", "SPT_HDCZA_and_diary.csv"))

        ENMO_dict = {}

        for sensor in sensors:

            ### Load the GENEActiv data ######
            files = os.listdir(os.path.join(bronze_layer_path, participant, visit, sensor))
            for f in files:
                if f.endswith("calibrated.parquet"):
                    acc_gen = pd.read_parquet(os.path.join(bronze_layer_path, participant, visit, sensor, f))
                    enmo = compute_enmo(acc_gen)
                    ENMO_dict[sensor] = enmo
            if sensor not in ENMO_dict.keys():
                ENMO_dict[sensor] = pd.Series()
        #         break
        #     break
        # break

        ####### FIGURE AND PDF GENERATION #######

        f, (ax1, ax2) = plt.subplots(2, 1, figsize = (19, 11), sharex = True)

        ax1.bar(ENMO_dict["GeneActivPolso"].resample("1min").mean().index, ENMO_dict["GeneActivPolso"].resample("1min").mean(), linewidth = 0.91, width=0.0009, color = "black")
        for i, (sleep_onset, wake_onset) in enumerate(zip(diary["sleep_onset_HDCZA"], diary["wake_onset_HDCZA"])):
            ax1.axvspan(sleep_onset, wake_onset, color = "blue", alpha = 0.1, lw = 0)
            if i == 0:
                ax1.legend(["Sleep period time", "ENMO Wrist"], frameon = True, fancybox = True, shadow = True, loc = "lower right", bbox_to_anchor=(1.06, 0.76))

        ax2.bar(ENMO_dict["GeneActivCaviglia"].resample("1min").mean().index, ENMO_dict["GeneActivCaviglia"].resample("1min").mean(), linewidth = 0.91, width=0.0009, color = "black")
        ax2.legend(["ENMO Ankle", "__nolegend__"], frameon = True, fancybox = True, shadow = True, loc = "lower right", bbox_to_anchor=(1.06, 0.76))

        for sleep_onset, wake_onset in zip(diary["sleep_onset_HDCZA"], diary["wake_onset_HDCZA"]):
            ax2.axvspan(sleep_onset, wake_onset, color = "blue", alpha = 0.1, lw = 0)


        # remove top and right spines and rotate
        for ax in [ax1, ax2]:
            ax.spines['top'].set_visible(False)
            ax.spines['right'].set_visible(False)
            ax.set_ylabel("ENMO (mg)")

        # rotate xticks for the bottom plot by 19 degrees
        plt.setp(ax2.get_xticklabels(), rotation=19)

        # reduce the space between the plots
        plt.subplots_adjust(hspace=0.05)

        # Thinner grid 
        ax1.grid(True, linestyle='--', linewidth=0.5)
        ax2.grid(True, linestyle='--', linewidth=0.5)

        pdf.savefig()
        plt.close()

# Drop data of participant 78936
# diary_df = diary_df[diary_df["participant"] != "78936"]  

***** 08623 *****
***** 08667 *****
***** 14219 *****
***** 20603 *****
***** 23483 *****
***** 36644 *****
***** 36765 *****
***** 36920 *****


KeyboardInterrupt: 

2025-03-04 10:46:11.879 python[55162:12736317] +[IMKClient subclass]: chose IMKClient_Modern


In [None]:
f, (ax1, ax2) = plt.subplots(2, 1, figsize = (19, 11), sharex = True)

ax1.bar(ENMO_dict["GeneActivPolso"].resample("5min").mean().index, ENMO_dict["GeneActivPolso"].resample("5min").mean(), linewidth = 0.91, width=0.009, color = "black")
for i, (sleep_onset, wake_onset) in enumerate(zip(diary["sleep_onset_HDCZA"], diary["wake_onset_HDCZA"])):
    ax1.axvspan(sleep_onset, wake_onset, color = "blue", alpha = 0.1, lw = 0)
    if i == 0:
        ax1.legend(["Sleep period time", "ENMO Wrist"], frameon = True, fancybox = True, shadow = True, loc = "lower right", bbox_to_anchor=(1.06, 0.76))

ax2.bar(ENMO_dict["GeneActivCaviglia"].resample("5min").mean().index, ENMO_dict["GeneActivCaviglia"].resample("5min").mean(), linewidth = 0.91, width=0.009, color = "black")
ax2.legend(["ENMO Ankle", "__nolegend__"], frameon = True, fancybox = True, shadow = True, loc = "lower right", bbox_to_anchor=(1.06, 0.76))

for sleep_onset, wake_onset in zip(diary["sleep_onset_HDCZA"], diary["wake_onset_HDCZA"]):
    ax2.axvspan(sleep_onset, wake_onset, color = "blue", alpha = 0.1, lw = 0)

# remove top and right spines and rotate
for ax in [ax1, ax2]:
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.set_ylabel("ENMO (mg)")

# rotate xticks for the bottom plot by 19 degrees
plt.setp(ax2.get_xticklabels(), rotation=19)

# reduce the space between the plots
plt.subplots_adjust(hspace=0.05)

# Thinner grid 
ax1.grid(True, linestyle='--', linewidth=0.5)
ax2.grid(True, linestyle='--', linewidth=0.5)

### Extract Sleep Parameters

In [17]:
ageit_path = "/Users/augenpro/Documents/Age-IT/"
silver_layer_path = "/Users/augenpro/Documents/Age-IT/data/Silver/"
bronze_layer_path = "/Users/augenpro/Documents/Age-IT/data/Bronze/"

participants = sorted([p for p in os.listdir(silver_layer_path) if not p.startswith(".")]) # list of the participants
timeline = ["T0 (baseline)", "T1 (6 mesi)", "T2 (12 mesi)"]
visit = "T0 (baseline)"
sensors = ["GeneActivPolso", "GeneActivCaviglia"]
sensor = sensors[0] ##### Wrist

# participants = ["08667", "20603", "36765"]

GGIR_output_dir = "output_icareit"

sleep_summary = pd.DataFrame()

for participant in participants:

    print(f"***** {participant} *****")

    ### Load the GENEActiv data (for debugging purposes only) ######
    # files = os.listdir(os.path.join(bronze_layer_path, participant, visit, sensor))
    # for f in files:
    #     if f.endswith(".parquet"):
    #         acc_gen = pd.read_parquet(os.path.join(bronze_layer_path, participant, visit, sensor, f))
   
    ggir_output_path = os.path.join(silver_layer_path, participant, visit, sensor, GGIR_output_dir)
    if not os.path.exists(ggir_output_path): # Skip participants with no Data
        continue
    ggir_part4_output = pd.read_csv(ggir_output_path + "/results/QC/part4_nightsummary_sleep_full.csv")

    sleep_parameters = {
        "night": [],
        "calendar_date" : [],
        "SPT_duration": [],
        "TST": [],
        "WASO": [],
        "N_awakenings": [],
        "SE": [],
    }

    for i, day_row in ggir_part4_output.iterrows():
        sleep_parameters["night"].append(day_row["night"])
        sleep_parameters["calendar_date"].append(day_row["calendar_date"])
        sleep_parameters["SPT_duration"].append(day_row["guider_SptDuration"])
        sleep_parameters["TST"].append(day_row["SleepDurationInSpt"])
        sleep_parameters["WASO"].append(day_row["WASO"]) # dSPT - aS - SO
        sleep_parameters["N_awakenings"].append(day_row["number_of_awakenings"])
        sleep_parameters["SE"].append(day_row["SleepDurationInSpt"] / (day_row["guider_SptDuration"])) # aS / SPT

    sleep_parameters = pd.DataFrame(sleep_parameters)

    sleep_parameters["participant"] = participant

    sleep_summary = pd.concat([sleep_summary, sleep_parameters])

***** 08623 *****
***** 08667 *****
***** 14219 *****
***** 20603 *****
***** 23483 *****
***** 36644 *****
***** 36765 *****
***** 36920 *****
***** 58319 *****
***** 59794 *****
***** 65381 *****
***** 68503 *****
***** 73496 *****
***** 74003 *****
***** 74913 *****
***** 78936 *****
***** 86693 *****
***** 97060 *****


In [18]:
cols = sleep_summary.columns.tolist()
cols = cols[-1:] + cols[:-1]
sleep_summary = sleep_summary[cols].round(2)
save_path = "/Users/augenpro/Documents/Age-IT/risultati_preliminari/"

sleep_summary.to_csv(save_path + "sleep_summary_DAILY.csv", index = False)

In [None]:
import seaborn as sns
sns.set_context("talk")
sns.set_palette("Set2")

colors = sns.color_palette()

for sleep_metric in ["TST", "WASO", "N_awakenings", "SE"]:

    plt.figure(figsize = (11,5))
    sns.boxplot(data = sleep_summary, x = "night", y = sleep_metric, showfliers = False, width=0.5)
    # marker edge color black
    sns.stripplot(data = sleep_summary, x = "night", y = sleep_metric, size = 6, color = colors[0], linewidth = 0.8, alpha = 0.8, edgecolor = "black")
    # print the number of participants in each group
    for i, night in enumerate(sleep_summary["night"].unique()):
        n_participants = sleep_summary[sleep_summary["night"] == night].shape[0]
        plt.text(i, plt.gca().get_ylim()[1] + 0.5, f"n={n_participants}", ha = "center", va = "center", fontsize = 12)
    plt.ylabel("TST (min)")

Text(0, 0.5, 'TST (min)')

In [1]:
sleep_summary["n_nights"] = sleep_summary.groupby("participant")["calendar_date"].transform("count").astype(int)
sleep_summary_total = sleep_summary.drop(columns=["calendar_date", "night"]).groupby("participant").mean()
# Make "n_nights" the first column
cols = sleep_summary_total.columns.tolist()
cols = cols[-1:] + cols[:-1]
sleep_summary_total = sleep_summary_total[cols]

NameError: name 'sleep_summary' is not defined

In [25]:
save_path = "/Users/augenpro/Documents/Age-IT/risultati_preliminari/"

sleep_summary_total.round(2).to_csv(save_path + "sleep_summary_TOTAL.csv")

In [20]:
from utils.compute_acc_metrics import compute_enmo
from circadian.cosine_fit import cosine_fit

ageit_path = "/Users/augenpro/Documents/Age-IT/"
silver_layer_path = "/Users/augenpro/Documents/Age-IT/data/Silver/"
bronze_layer_path = "/Users/augenpro/Documents/Age-IT/data/Bronze/"

participants = sorted([p for p in os.listdir(silver_layer_path) if not p.startswith(".")]) # list of the participants
timeline = ["T0 (baseline)", "T1 (6 mesi)", "T2 (12 mesi)"]
visit = "T0 (baseline)"
sensors = ["GeneActivPolso", "GeneActivCaviglia"]
sensor = sensors[0] ##### Wrist

GGIR_output_dir = "output_icareit"

circadian_summary = pd.DataFrame()

for participant in participants:

    print(f"***** {participant} *****")

    ggir_output_path = os.path.join(silver_layer_path, participant, visit, sensor, GGIR_output_dir)
    if not os.path.exists(ggir_output_path): # Skip participants with no Data
        continue
    ggir_part2output = pd.read_csv(ggir_output_path + "/results/part2_summary.csv")

    circadian_parameters = {
        "COSINOR_mesor": [],
        "COSINOR_amplitude": [],
        "COSINOR_acrophase": [],
        "IS": [],
        "IV": [],
    }

    for i, day_row in ggir_part2output.iterrows():
        try:
            circadian_parameters["COSINOR_mesor"].append(day_row["cosinor_mes"])
            circadian_parameters["COSINOR_amplitude"].append(day_row["cosinor_amp"])
            circadian_parameters["COSINOR_acrophase"].append(day_row["cosinor_acrophase"])
            circadian_parameters["IS"].append(day_row["IS_interdailystability"])
            circadian_parameters["IV"].append(day_row["IV_intradailyvariability"])
        except: # Not enough data for circadian analysis
            circadian_parameters["COSINOR_mesor"].append(np.nan)
            circadian_parameters["COSINOR_amplitude"].append(np.nan)
            circadian_parameters["COSINOR_acrophase"].append(np.nan)
            circadian_parameters["IS"].append(np.nan)
            circadian_parameters["IV"].append(np.nan)

    circadian_parameters = pd.DataFrame(circadian_parameters)

    circadian_parameters["participant"] = participant

    circadian_summary = pd.concat([circadian_summary, circadian_parameters])

circadian_summary.index = circadian_summary["participant"]
circadian_summary.drop(columns = ["participant"], inplace = True)

***** 08623 *****
***** 08667 *****
***** 14219 *****
***** 20603 *****
***** 23483 *****
***** 36644 *****
***** 36765 *****
***** 36920 *****
***** 58319 *****
***** 59794 *****
***** 65381 *****
***** 68503 *****
***** 73496 *****
***** 74003 *****
***** 74913 *****
***** 78936 *****
***** 86693 *****
***** 97060 *****


In [23]:
circadian_summary.astype(float).round(2).to_csv(save_path + "circan_summary.csv")

In [None]:
from utils.compute_acc_metrics import compute_enmo
from circadian.cosine_fit import cosine_fit

ageit_path = "/Users/augenpro/Documents/Age-IT/"
silver_layer_path = "/Users/augenpro/Documents/Age-IT/data/Silver/"
bronze_layer_path = "/Users/augenpro/Documents/Age-IT/data/Bronze/"

participants = sorted([p for p in os.listdir(silver_layer_path) if not p.startswith(".")]) # list of the participants
timeline = ["T0 (baseline)", "T1 (6 mesi)", "T2 (12 mesi)"]
visit = "T0 (baseline)"
sensors = ["GeneActivPolso", "GeneActivCaviglia"]
sensor = sensors[0] ##### Wrist

GGIR_output_dir = "output_icareit"

sleep_summary = pd.DataFrame()

for participant in participants:

    print(f"***** {participant} *****")

    ### Load the GENEActiv data (for debugging purposes only) ######
    files = os.listdir(os.path.join(bronze_layer_path, participant, visit, sensor))
    for f in files:
        if f.endswith(".parquet"):
            acc_gen = pd.read_parquet(os.path.join(bronze_layer_path, participant, visit, sensor, f))

    enmo = compute_enmo(acc_gen)
    enmo_for_cosinor = enmo.resample("1min").mean()

    time_enmo_for_cosinor = np.arange(len(enmo_for_cosinor))
    # Fit a cosine curve to the ENMO signal
    fitted_cosine, amplitude, phase = cosine_fit(enmo_for_cosinor.values, time_enmo_for_cosinor, 24*60)
    offset = np.mean(enmo_for_cosinor.values)
    break

## Physical activity

In [5]:
ageit_path = "/Users/augenpro/Documents/Age-IT/"
silver_layer_path = "/Users/augenpro/Documents/Age-IT/data/Silver/"
bronze_layer_path = "/Users/augenpro/Documents/Age-IT/data/Bronze/"

participants = sorted([p for p in os.listdir(silver_layer_path) if not p.startswith(".")]) # list of the participants
timeline = ["T0 (baseline)", "T1 (6 mesi)", "T2 (12 mesi)"]
visit = "T0 (baseline)"
sensors = ["GeneActivPolso", "GeneActivCaviglia"]
sensor = sensors[0] ##### Wrist

GGIR_output_dir = "output_icareit"

PA_summary = pd.DataFrame()

for participant in participants:

    print(f"***** {participant} *****")

    ### Load the GENEActiv data (for debugging purposes only) ######
    # files = os.listdir(os.path.join(bronze_layer_path, participant, visit, sensor))
    # for f in files:
    #     if f.endswith(".parquet"):
    #         acc_gen = pd.read_parquet(os.path.join(bronze_layer_path, participant, visit, sensor, f))
   
    PA_parameters = {
        "day": [],
        "calendar_date" : [],
        "inactivity": [],
        "light" : [],
        "moderate": [],
        "vigorous": [],
    }

    ggir_output_path = os.path.join(silver_layer_path, participant, visit, sensor, GGIR_output_dir)
    if not os.path.exists(ggir_output_path): # Skip participants with no Data
        continue
    try:
        ggir_part5_output = pd.read_csv(ggir_output_path + "/results/part5_daysummary_MM_L40M100V400_T5A5.csv") # Numbers need to be changed if the thresholds used as cut-points are different
    except: # No PA data due to low number of valid days
        PA_parameters["day"].append(np.nan)
        PA_parameters["calendar_date"].append(np.nan)
        PA_parameters["inactivity"].append(np.nan)
        PA_parameters["light"].append(np.nan)
        PA_parameters["moderate"].append(np.nan)
        PA_parameters["vigorous"].append(np.nan)
        continue

    for i, day_row in ggir_part5_output.iterrows():
        PA_parameters["day"].append(day_row["window_number"])
        PA_parameters["calendar_date"].append(day_row["calendar_date"])
        PA_parameters["inactivity"].append(day_row["dur_day_total_IN_min"])
        PA_parameters["light"].append(day_row["dur_day_total_LIG_min"])
        PA_parameters["moderate"].append(day_row["dur_day_total_MOD_min"])
        PA_parameters["vigorous"].append(day_row["dur_day_total_VIG_min"])

    PA_parameters = pd.DataFrame(PA_parameters)

    PA_parameters["participant"] = participant

    PA_summary = pd.concat([PA_summary, PA_parameters])

***** 08623 *****
***** 08667 *****
***** 14219 *****
***** 20603 *****
***** 23483 *****
***** 36644 *****
***** 36765 *****
***** 36920 *****
***** 58319 *****
***** 59794 *****
***** 65381 *****
***** 68503 *****
***** 73496 *****
***** 74003 *****
***** 74913 *****
***** 78936 *****
***** 86693 *****
***** 97060 *****


In [29]:
# cols = PA_summary.columns.tolist()
# cols = cols[-1:] + cols[:-1]
# PA_summary = PA_summary[cols].round(2)
# save_path = "/Users/augenpro/Documents/Age-IT/risultati_preliminari/"

PA_summary.to_csv(save_path + "PA_summary_DAILY.csv", index = False)
# PA_summary

In [6]:
PA_summary_total = PA_summary.drop(columns=["calendar_date", "day"]).groupby("participant").mean().round(2)

In [7]:
total = pd.concat([sleep_summary_total.round(2), circadian_summary, PA_summary_total], axis = 1)
# total.to_csv(save_path + "total_summary.csv")

# Ankle gait

In [17]:
from utils.compute_acc_metrics import compute_acc_SMV
from nimbalwear import gait


ageit_path = "/Users/augenpro/Documents/Age-IT/"
silver_layer_path = "/Users/augenpro/Documents/Age-IT/data/Silver/"
bronze_layer_path = "/Users/augenpro/Documents/Age-IT/data/Bronze/"

participants = sorted([p for p in os.listdir(silver_layer_path) if not p.startswith(".")]) # list of the participants
timeline = ["T0 (baseline)", "T1 (6 mesi)", "T2 (12 mesi)"]
visit = "T0 (baseline)"
sensors = ["GeneActivPolso", "GeneActivCaviglia"]
sensor = sensors[1] ##### Ankle

GGIR_output_dir = "output_icareit"

steps_daily = {}

participants[10:]

['65381', '68503', '73496', '74003', '74913', '78936', '86693', '97060']

In [20]:
files = os.listdir(os.path.join(bronze_layer_path, participant, visit, sensor))
files

['_',
 '65381_right ankle_104577_2025-01-21 15-22-35.parquet',
 '65381_right ankle_104577_2025-01-21 15-22-35.bin']

In [21]:
from utils.compute_acc_metrics import compute_acc_SMV
from gait import nimbal_gait

ageit_path = "/Users/augenpro/Documents/Age-IT/"
silver_layer_path = "/Users/augenpro/Documents/Age-IT/data/Silver/"
bronze_layer_path = "/Users/augenpro/Documents/Age-IT/data/Bronze/"

participants = sorted([p for p in os.listdir(silver_layer_path) if not p.startswith(".")]) # list of the participants
timeline = ["T0 (baseline)", "T1 (6 mesi)", "T2 (12 mesi)"]
visit = "T0 (baseline)"
sensors = ["GeneActivPolso", "GeneActivCaviglia"]
sensor = sensors[1] ##### Ankle

participants = participants[11:]
# participants = ["08667", "20603", "36765"]

GGIR_output_dir = "output_icareit"

steps_daily = {}

step_detect_type = 'accel'
fs = 100
start_time = 0
location = "ankle"

for participant in participants:

    print(f"***** {participant} *****")

    ### Load the GENEActiv data (for debugging purposes only) ######
    files = os.listdir(os.path.join(bronze_layer_path, participant, visit, sensor))
    for f in files:
        if f.endswith(".parquet"):
            acc_gen = pd.read_parquet(os.path.join(bronze_layer_path, participant, visit, sensor, f))[["x", "y", "z"]]
    acc_gen["acc_SMV"] = compute_acc_SMV(acc_gen)

    steps = nimbal_gait.detect_steps(left_data=None, right_data=acc_gen["acc_SMV"].values, loc=location, data_type=step_detect_type,
                             start_time=acc_gen.index[0], freq=fs, orient_signal=True, low_pass=12)
    
    steps2, bouts = nimbal_gait.define_bouts(steps=steps, freq=fs, start_time=acc_gen.index[0], max_break=3, min_steps=6,
                                        remove_unbouted=False)
    # Save to silver layer 
    steps2.to_csv(os.path.join(silver_layer_path, participant, visit, sensor, "steps.csv"))
    bouts.to_csv(os.path.join(silver_layer_path, participant, visit, sensor, "bouts.csv"))

    # daily steps
    daily_steps = nimbal_gait.gait_stats(bouts, stat_type='daily', single_leg=False)
    # Save to silver layer
    daily_steps.to_csv(os.path.join(silver_layer_path, participant, visit, sensor, "daily_steps.csv"))

    # steps_daily[participant] = daily_steps

# steps_daily_df = pd.DataFrame()

# for participant, steps_part in steps_daily.items():
#     steps_part["participant"] = participant
#     steps_daily_df = pd.concat([steps_daily_df, steps_part])

# steps_daily_df = steps_daily_df.reset_index(drop = True)
# steps_daily_df["participant"].unique()

***** 68503 *****
Finding steps: Right ankle, acceleration, state space controller.
Pushoff Detection...


  timestamps = pd.date_range(start=start_time, periods=len(vert_accel), freq=f"{round(1 / freq, 6)}S")
                                                                     

Pushoff Detection...


  timestamps = pd.date_range(start=start_time, periods=len(vert_accel), freq=f"{round(1 / freq, 6)}S")
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  new_steps['gait_bout_num'][~new_steps['gait_bout_num'].isin(bout_index)] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the d

***** 73496 *****
Finding steps: Right ankle, acceleration, state space controller.
Pushoff Detection...


  timestamps = pd.date_range(start=start_time, periods=len(vert_accel), freq=f"{round(1 / freq, 6)}S")
                                                                     

Pushoff Detection...


  timestamps = pd.date_range(start=start_time, periods=len(vert_accel), freq=f"{round(1 / freq, 6)}S")
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  new_steps['gait_bout_num'][~new_steps['gait_bout_num'].isin(bout_index)] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the d

***** 74003 *****
Finding steps: Right ankle, acceleration, state space controller.
Pushoff Detection...


  timestamps = pd.date_range(start=start_time, periods=len(vert_accel), freq=f"{round(1 / freq, 6)}S")
                                                                     

Pushoff Detection...


  timestamps = pd.date_range(start=start_time, periods=len(vert_accel), freq=f"{round(1 / freq, 6)}S")
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  new_steps['gait_bout_num'][~new_steps['gait_bout_num'].isin(bout_index)] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the d

***** 74913 *****
Finding steps: Right ankle, acceleration, state space controller.
Pushoff Detection...


  timestamps = pd.date_range(start=start_time, periods=len(vert_accel), freq=f"{round(1 / freq, 6)}S")
                                                                    

Pushoff Detection...


  timestamps = pd.date_range(start=start_time, periods=len(vert_accel), freq=f"{round(1 / freq, 6)}S")
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  new_steps['gait_bout_num'][~new_steps['gait_bout_num'].isin(bout_index)] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the d

***** 78936 *****
Finding steps: Right ankle, acceleration, state space controller.
Pushoff Detection...


  timestamps = pd.date_range(start=start_time, periods=len(vert_accel), freq=f"{round(1 / freq, 6)}S")
                                                                     

Pushoff Detection...


  timestamps = pd.date_range(start=start_time, periods=len(vert_accel), freq=f"{round(1 / freq, 6)}S")
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  new_steps['gait_bout_num'][~new_steps['gait_bout_num'].isin(bout_index)] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the d

***** 86693 *****
Finding steps: Right ankle, acceleration, state space controller.
Pushoff Detection...


  timestamps = pd.date_range(start=start_time, periods=len(vert_accel), freq=f"{round(1 / freq, 6)}S")
                                                                    

Pushoff Detection...


  timestamps = pd.date_range(start=start_time, periods=len(vert_accel), freq=f"{round(1 / freq, 6)}S")
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  new_steps['gait_bout_num'][~new_steps['gait_bout_num'].isin(bout_index)] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the d

***** 97060 *****
Finding steps: Right ankle, acceleration, state space controller.
Pushoff Detection...


  timestamps = pd.date_range(start=start_time, periods=len(vert_accel), freq=f"{round(1 / freq, 6)}S")
                                                                     

Pushoff Detection...


  timestamps = pd.date_range(start=start_time, periods=len(vert_accel), freq=f"{round(1 / freq, 6)}S")
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  new_steps['gait_bout_num'][~new_steps['gait_bout_num'].isin(bout_index)] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the d

In [22]:
steps

Unnamed: 0,step_num,step_time,step_idx,step_state,swing_start_time,mid_swing_time,heel_strike_time,swing_start_accel,mid_swing_accel,heel_strike_accel,step_duration,loc,side,data_type,alg
0,1,2025-01-21 17:00:24.810,181,success,2025-01-21 17:00:25.200,2025-01-21 17:00:25.330,2025-01-21 17:00:25.510,1.073708,0.515969,0.768308,0.76,ankle,right,accel,ssc
1,2,2025-01-21 17:00:25.880,288,success,2025-01-21 17:00:26.270,2025-01-21 17:00:26.420,2025-01-21 17:00:26.540,1.076404,0.911517,1.165045,0.72,ankle,right,accel,ssc
2,3,2025-01-21 17:00:26.620,362,success,2025-01-21 17:00:27.010,2025-01-21 17:00:27.250,2025-01-21 17:00:27.630,1.402488,1.018390,1.277481,1.07,ankle,right,accel,ssc
3,4,2025-01-21 17:00:31.820,882,success,2025-01-21 17:00:32.210,2025-01-21 17:00:32.350,2025-01-21 17:00:32.430,1.127285,0.890529,1.149425,0.67,ankle,right,accel,ssc
4,5,2025-01-21 17:00:39.310,1631,success,2025-01-21 17:00:39.700,2025-01-21 17:00:39.990,2025-01-21 17:00:40.110,1.277084,0.725124,0.953325,0.86,ankle,right,accel,ssc
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41800,41801,2025-01-28 17:04:09.200,60502620,success,2025-01-28 17:04:09.590,2025-01-28 17:04:09.800,2025-01-28 17:04:10.090,1.306245,0.959373,1.201044,0.95,ankle,right,accel,ssc
41801,41802,2025-01-28 17:04:11.100,60502810,success,2025-01-28 17:04:11.490,2025-01-28 17:04:11.730,2025-01-28 17:04:11.880,1.060651,1.044533,1.266694,0.84,ankle,right,accel,ssc
41802,41803,2025-01-28 17:04:13.100,60503010,success,2025-01-28 17:04:13.490,2025-01-28 17:04:13.650,2025-01-28 17:04:13.800,1.204159,0.849806,1.087277,0.76,ankle,right,accel,ssc
41803,41804,2025-01-28 17:04:17.610,60503461,success,2025-01-28 17:04:18.000,2025-01-28 17:04:18.050,2025-01-28 17:04:18.330,0.834744,0.440100,1.285107,0.78,ankle,right,accel,ssc


In [13]:
steps2

Unnamed: 0,step_num,gait_bout_num,step_time,step_idx,step_state,swing_start_time,mid_swing_time,heel_strike_time,swing_start_accel,mid_swing_accel,heel_strike_accel,step_duration,loc,side,data_type,alg
0,1,0,2025-02-05 14:21:19.750,86075,success,2025-02-05 14:21:20.140,2025-02-05 14:21:20.470,2025-02-05 14:21:20.630,1.294422,0.547316,1.571904,0.94,ankle,right,accel,ssc
1,2,0,2025-02-05 14:21:29.120,87012,success,2025-02-05 14:21:29.510,2025-02-05 14:21:29.700,2025-02-05 14:21:30.000,1.471777,0.798364,0.985061,0.94,ankle,right,accel,ssc
2,3,0,2025-02-05 14:21:33.960,87496,success,2025-02-05 14:21:34.350,2025-02-05 14:21:34.510,2025-02-05 14:21:34.590,1.335881,1.008564,1.305108,0.69,ankle,right,accel,ssc
3,4,0,2025-02-05 14:21:42.340,88334,success,2025-02-05 14:21:42.730,2025-02-05 14:21:42.820,2025-02-05 14:21:42.900,1.260878,1.183770,1.380052,0.62,ankle,right,accel,ssc
4,5,0,2025-02-05 14:21:45.560,88656,success,2025-02-05 14:21:45.950,2025-02-05 14:21:46.080,2025-02-05 14:21:46.220,1.051275,0.594353,1.317914,0.72,ankle,right,accel,ssc
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40502,40503,0,2025-02-11 14:01:39.890,51808089,success,2025-02-11 14:01:40.280,2025-02-11 14:01:40.340,2025-02-11 14:01:40.550,1.828026,1.650244,1.881163,0.72,ankle,right,accel,ssc
40503,40504,0,2025-02-11 14:01:53.220,51809422,success,2025-02-11 14:01:53.610,2025-02-11 14:01:53.830,2025-02-11 14:01:54.070,1.366886,0.877770,1.109384,0.91,ankle,right,accel,ssc
40504,40505,0,2025-02-11 14:01:55.020,51809602,success,2025-02-11 14:01:55.410,2025-02-11 14:01:55.590,2025-02-11 14:01:56.000,1.078727,0.593951,0.894187,1.04,ankle,right,accel,ssc
40505,40506,0,2025-02-11 14:01:57.690,51809869,success,2025-02-11 14:01:58.080,2025-02-11 14:01:58.370,2025-02-11 14:01:58.470,1.729335,0.274269,0.767876,0.84,ankle,right,accel,ssc


In [None]:
from utils.compute_acc_metrics import compute_acc_SMV
from gait import nimbal_gait

ageit_path = "/Users/augenpro/Documents/Age-IT/"
silver_layer_path = "/Users/augenpro/Documents/Age-IT/data/Silver/"
bronze_layer_path = "/Users/augenpro/Documents/Age-IT/data/Bronze/"

participants = sorted([p for p in os.listdir(silver_layer_path) if not p.startswith(".")]) # list of the participants
timeline = ["T0 (baseline)", "T1 (6 mesi)", "T2 (12 mesi)"]
visit = "T0 (baseline)"
sensors = ["GeneActivPolso", "GeneActivCaviglia"]
sensor = sensors[1] ##### Ankle

GGIR_output_dir = "output_icareit"

steps_daily = {}

step_detect_type = 'accel'
fs = 100
start_time = 0
location = "ankle"

for participant in participants:

    # print(f"***** {participant} *****")

    ### Load the GENEActiv data (for debugging purposes only) ######
    files = os.listdir(os.path.join(bronze_layer_path, participant, visit, sensor))
    for f in files:
        if f.endswith(".parquet"):
            print(participant)

08623
14219
23483
36644
36920
58319
59794
65381
68503
73496
74003
74913
78936
86693
97060


In [8]:
participant_to_remove = np.array(["08667", "20603", "36765"]).astype(int)
participant_to_remove

array([ 8667, 20603, 36765])

In [70]:
steps_daily_df["participant"].iloc[0]

8623

In [19]:
# Save to csv
# steps_daily_df.to_csv("/Users/augenpro/Documents/Age-IT/steps_daily.csv", index = False)

steps_daily_df = pd.read_csv("/Users/augenpro/Documents/Age-IT/steps_daily.csv")
steps_daily_df

# Remove participants with no data

steps_daily_df = steps_daily_df[~steps_daily_df["participant"].isin(participant_to_remove)]

steps_daily_df["participant"] = steps_daily_df["participant"].apply(lambda x: str(x))

steps_daily_df

Unnamed: 0,day_num,date,type,longest_bout_time,longest_bout_steps,bouts_over_3min,total_steps,participant
0,1,2025-01-28,daily,1607,2069,6,9062,8623
1,2,2025-01-29,daily,2150,2949,3,6411,8623
2,3,2025-01-30,daily,3007,3986,4,8815,8623
3,4,2025-01-31,daily,274,242,2,983,8623
4,5,2025-02-01,daily,1630,2019,8,9641,8623
...,...,...,...,...,...,...,...,...
127,4,2025-01-17,daily,222,223,1,2808,58319
128,5,2025-01-18,daily,270,255,2,2998,58319
129,6,2025-01-19,daily,151,124,0,2514,58319
130,7,2025-01-20,daily,115,108,0,2636,58319


In [10]:
steps_daily_df["participant"].unique()

array([ 8623, 14219, 23483, 36644, 36920, 59794, 68503, 73496, 74003,
       74913, 78936, 86693, 97060, 58319])

In [None]:
cols = steps_daily_df.columns.tolist()
cols = cols[-1:] + cols[:-1]
steps_daily_df = steps_daily_df[cols].round(2)

steps_daily_df.rename(columns = {"day_num": "day", "date": "calendar_date"}, inplace = True)
steps_daily_df.drop(columns=["type"], inplace = True)

steps_daily_df.to_csv(save_path + "/gait_summary_DAILY.csv", index = False)

In [12]:
steps_daily_df.drop(columns=["calendar_date", "day"], inplace = True)
steps_daily_total = steps_daily_df.groupby("participant").mean().round(2)
steps_daily_total

Unnamed: 0_level_0,longest_bout_time,longest_bout_steps,bouts_over_3min,total_steps
participant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
8623,1566.5,2005.62,4.38,6945.38
14219,271.38,230.38,2.62,3188.5
23483,184.0,172.12,1.12,2363.0
36644,564.25,571.75,1.38,4110.5
36920,129.29,116.86,0.29,2343.0
58319,175.75,165.25,0.75,2477.38
59794,88.0,76.71,0.14,1426.14
68503,136.0,126.71,0.29,2051.0
73496,102.62,90.12,0.25,2073.25
74003,122.75,101.75,0.12,2764.5


In [18]:
sleep_summary_total.index

Index(['08623', '14219', '23483', '36644', '36920', '58319', '59794', '65381',
       '68503', '73496', '74913', '78936', '86693', '97060'],
      dtype='object', name='participant')

In [14]:
save_path = "/Users/augenpro/Documents/Age-IT/risultati_preliminari/"

total = pd.concat([sleep_summary_total.round(2), circadian_summary, PA_summary_total, steps_daily_total], axis = 1)
total.to_csv(save_path + "total_summary_new.csv")

In [None]:
steps_daily_df["n_days"] = steps_daily_df.groupby("participant")["date"].transform("count").astype(int)
steps_daily_df_total = steps_daily_df.drop(columns=["calendar_date", "night"]).groupby("participant").mean()
# Make "n_nights" the first column
cols = steps_daily_df_total.columns.tolist()
cols = cols[-1:] + cols[:-1]
steps_daily_df_total = steps_daily_df_total[cols]

KeyError: 'Column not found: calendar_date'

In [40]:
steps_daily_df.index = steps_daily_df["participant"]
steps_daily_df.drop(columns = ["participant"], inplace = True)

# Merge with total
total = pd.concat([total, steps_daily_df], axis = 1)

InvalidIndexError: Reindexing only valid with uniquely valued Index objects

In [19]:
from utils.compute_acc_metrics import compute_acc_SMV
acc_gen["acc_SMV"] = compute_acc_SMV(acc_gen)

plt.figure(figsize = (19,11))
plt.plot(acc_gen["acc_SMV"].resample("0.1s").mean())

[<matplotlib.lines.Line2D at 0x33165caa0>]

In [None]:
steps2, bouts = gait.define_bouts(steps=steps, freq=fs, start_time=acc_gen.index[0], max_break=3, min_steps=6,
                                    remove_unbouted=False)
# daily steps
daily_steps = gait.gait_stats(bouts, stat_type='daily', single_leg=False)

In [29]:
plt.figure(figsize = (19,11))
plt.plot(acc_gen["acc_SMV"].resample("0.1s").mean())
for i, bout in bouts.iterrows():
    # if bout["end_time"] - bout["start_time"] > pd.Timedelta("1 min"):
    plt.axvspan(bout["start_time"], bout["end_time"], color = "blue", alpha = 0.3)

In [None]:
wscwcmwmvcwmevkmqv


eee

Unnamed: 0,day_num,date,type,longest_bout_time,longest_bout_steps,bouts_over_3min,total_steps
0,1,2025-01-28,daily,1607,2069,6,9062
1,2,2025-01-29,daily,2150,2949,3,6411
2,3,2025-01-30,daily,3007,3986,4,8815
3,4,2025-01-31,daily,274,242,2,983
4,5,2025-02-01,daily,1630,2019,8,9641
5,6,2025-02-02,daily,1895,2344,5,9444
6,7,2025-02-03,daily,1757,2266,6,10000
7,8,2025-02-04,daily,212,170,1,1207
