In [1]:
import os

data_path = "/Users/augenpro/Documents/Age-IT/data/Bronze/" # path to the folder containing the subjects
silver_path = "/Users/augenpro/Documents/Age-IT/data/Silver/"
participants = sorted([p for p in os.listdir(data_path) if not p.startswith(".")]) # list of the participants

participants[6:]

['36765',
 '36920',
 '58319',
 '59794',
 '65381',
 '68503',
 '73496',
 '74003',
 '74913',
 '78936',
 '86693',
 '97060']

In [3]:
"""
Read .bin files coming from GENEActiv devices and save them as a pd.DataFrame in parquet format.

This module is based on the code from the scikit-digital-health python package (https://github.com/pfizer-opensource/scikit-digital-health)

IMP: scikit-digital-health create conflicts with versions needed for other packages. To solve this:
1. I tried to just copy the code can be copied and modified it to work as a standalone module -- however, scikit-digital-health 
    uses a lot of other modules (also implemented in .c) and it is not clear how to separate them without breaking the code.
2. Solution for now: he scikit-digital-health package is installed in a separate conda environment and the code is run from there.

"""


"""
IMP: Sometimes there is an unexpected behaviour. It is not clear why but the accelerometer data is not read correctly, and if it happens it happens
for all the recordings of all participants. 
If the message "Not enough still periods found for calibration" is printed, it means that the accelerometer data is not read correctly.
It doesn't make any sense - just restarting the PC and running the same code again solves the problem.
"""

import numpy as np
import pandas as pd
import os
from skdh.io import ReadBin
from skdh.preprocessing import CalibrateAccelerometer

import sys
sys.path.append("../")
from nonwear.DETACH import nimbaldetach

def bin2parquet(file_path, save_path, calibrate = True):
    """
    Read a .bin file from a GENEActiv device.
    """

    reader = ReadBin()
    data = reader.predict(file = file_path)
    # data_for_df = np.concatenate((data["accel"], data["temperature"].reshape(-1, 1), data["light"].reshape(-1, 1)), axis = 1)
    # acc_df = pd.DataFrame(data_for_df, 
    #                       columns = ["x", "y", "z", "temperature", "light"],
    #                       index = pd.to_datetime(data["time"], unit = "s"))
    # # save as parquet
    # acc_df.to_parquet(file_path.replace(".bin", ".parquet"))

    if (data["accel"].shape[0] > 72*60*60*100) & calibrate: # if the data is longer than 72 hours
        calibrator = CalibrateAccelerometer()
        print("Calibrating accelerometer...")
        try:
            acc_cal = calibrator.predict(time = data["time"], accel = data["accel"], temperature = data["temperature"])
        except KeyError:
            print("Not enough data to calibrate accelerometer.")
            return
        
        start_stop_nw, _ = nimbaldetach(data["accel"][:, 0], data["accel"][:, 1], data["accel"][:, 2], data["temperature"],
                                         accel_freq=100, temperature_freq=100, quiet=True)

        data_for_df = np.concatenate((acc_cal["accel"], data["temperature"].reshape(-1, 1), data["light"].reshape(-1, 1)), axis = 1)
        data_cal_df = pd.DataFrame(data_for_df,
                                   columns = ["x", "y", "z", "temperature", "light"],
                                   index = pd.to_datetime(data["time"], unit = "s"))
        
        # Remove non-wear periods
        for i, row in start_stop_nw.iterrows():
            datetime_start_nw = data_cal_df.index[row["Start Datapoint"]]
            datetime_end_nw = data_cal_df.index[row["End Datapoint"]]
            data_cal_df.loc[datetime_start_nw:datetime_end_nw] = np.nan

        data_cal_df.to_parquet(save_path.replace(".bin", "_preprocessed.parquet"))

    else: # do not calibrate
        start_stop_nw, _ = nimbaldetach(data["accel"][:, 0], data["accel"][:, 1], data["accel"][:, 2], data["temperature"],
                                         accel_freq=100, temperature_freq=100, quiet=True)
        data_for_df = np.concatenate((data["accel"], data["temperature"].reshape(-1, 1), data["light"].reshape(-1, 1)), axis = 1)
        acc_df = pd.DataFrame(data_for_df, 
                              columns = ["x", "y", "z", "temperature", "light"],
                              index = pd.to_datetime(data["time"], unit = "s"))
        
        # Remove non-wear periods
        for i, row in start_stop_nw.iterrows():
            datetime_start_nw = data_cal_df.index[row["Start Datapoint"]]
            datetime_end_nw = data_cal_df.index[row["End Datapoint"]]
            data_cal_df.loc[datetime_start_nw:datetime_end_nw] = np.nan

        data_cal_df.to_parquet(save_path.replace(".bin", ".parquet"))


if __name__ == "__main__":
    data_path = "/Users/augenpro/Documents/Age-IT/data/Bronze/" # path to the folder containing the subjects
    silver_path = "/Users/augenpro/Documents/Age-IT/data/Silver/"
    participants = sorted([p for p in os.listdir(data_path) if not p.startswith(".")]) # list of the participants
    participants = participants[6:]
    # participants = ["08667", "20603", "36765"]
    visit = "T0 (baseline)" # T0 (baseline), T1 (follow-up @ 6 months), T2 (follow-up @ 12 months)

    sensors = ["GeneActivPolso", "GeneActivCaviglia"]

    for participant in participants:
        print(participant)
        for sensor in sensors:
            path = os.path.join(data_path, participant, visit, sensor)
            save_path = os.path.join(silver_path, participant, visit, sensor)
            files = os.listdir(os.path.join(data_path, participant, visit, sensor))
            # if there are already parquet files, skip
            # if any([f.endswith(".parquet") for f in files]):
            #     continue
            for f in files:
                if f.endswith(".bin"):
                    try:
                        bin2parquet(os.path.join(path, f), os.path.join(save_path, f), calibrate = True)
                    except Exception as e:
                        print(f"Error with {f}: {e}")
                        continue

36765


  warn(


Calibrating accelerometer...


: 

In [None]:
"""
Read .bin files coming from GENEActiv devices and save them as a pd.DataFrame in parquet format.

This module is based on the code from the scikit-digital-health python package (https://github.com/pfizer-opensource/scikit-digital-health)

IMP: scikit-digital-health create conflicts with versions needed for other packages. To solve this:
1. I tried to just copy the code can be copied and modified it to work as a standalone module -- however, scikit-digital-health 
    uses a lot of other modules (also implemented in .c) and it is not clear how to separate them without breaking the code.
2. Solution for now: he scikit-digital-health package is installed in a separate conda environment and the code is run from there.

"""

import numpy as np
import pandas as pd
import os
from skdh.io import ReadBin
from skdh.preprocessing import CalibrateAccelerometer

def bin2parquet(file_path, calibrate = False):
    """
    Read a .bin file from a GENEActiv device.
    """
    reader = ReadBin()
    data = reader.predict(file = file_path)
    data_for_df = np.concatenate((data["accel"], data["temperature"].reshape(-1, 1), data["light"].reshape(-1, 1)), axis = 1)
    acc_df = pd.DataFrame(data_for_df, 
                          columns = ["x", "y", "z", "temperature", "light"],
                          index = pd.to_datetime(data["time"], unit = "s"))
    # save as parquet
    acc_df.to_parquet(file_path.replace(".bin", ".parquet"))

    calibrator = CalibrateAccelerometer()

    if calibrate:
        print("Calibrating accelerometer...")
        acc_cal = calibrator.predict(time = data["time"], accel = data["accel"], temperature = data["temperature"])
        data_for_df = np.concatenate((acc_cal["accel"], data["temperature"].reshape(-1, 1), data["light"].reshape(-1, 1)), axis = 1)
        data_cal_df = pd.DataFrame(data_for_df,
                                   columns = ["x", "y", "z", "temperature", "light"],
                                   index = pd.to_datetime(data["time"], unit = "s"))
        data_cal_df.to_parquet(file_path.replace(".bin", "_calibrated.parquet"))

if __name__ == "__main__":
    data_path = "/Users/augenpro/Documents/Age-IT/data/Bronze/" # path to the folder containing the subjects
    # participants = sorted([p for p in os.listdir(data_path) if not p.startswith(".")]) # list of the participants
    participants = ["08667"]
    visit = "T0 (baseline)" # T0 (baseline), T1 (follow-up @ 6 months), T2 (follow-up @ 12 months)

    sensors = ["GeneActivPolso", "GeneActivCaviglia"]

    for participant in participants:
        print(participant)
        for sensor in sensors:
            path = os.path.join(data_path, participant, visit, sensor)
            files = os.listdir(os.path.join(data_path, participant, visit, sensor))
            # if there are already parquet files, skip
            # if any([f.endswith(".parquet") for f in files]):
            #     continue
            for f in files:
                if f.endswith(".bin"):
                    bin2parquet(os.path.join(path, f), calibrate = False)

            break
        break

08667


  warn(


Calibrating accelerometer...


In [2]:
participants

['08623',
 '08667',
 '14219',
 '20603',
 '23483',
 '36644',
 '36765',
 '36920',
 '58319',
 '59794',
 '65381',
 '68503',
 '73496',
 '74003',
 '74913',
 '78936',
 '86693',
 '97060']

In [10]:

# acc = pd.read_parquet("/Users/augenpro/Documents/Age-IT/data/Bronze/08667/T0 (baseline)/GeneActivPolso/08667_left wrist_105524_2025-02-11 14-06-32.parquet")
import pandas as pd
acc_cal = pd.read_parquet("/Users/augenpro/Documents/Age-IT/data/Silver/36644/T0 (baseline)/GeneActivCaviglia/36644_right ankle_106296_2025-02-04 18-17-15_preprocessed.parquet")
acc_cal.head()

Unnamed: 0,x,y,z,temperature,light
2025-01-28 13:10:40.000000000,-0.008647,0.002249,-1.00203,33.5,439.0
2025-01-28 13:10:40.009999990,-0.012499,0.006136,-1.00203,33.5,439.0
2025-01-28 13:10:40.019999981,-0.020204,-0.001638,-0.990396,33.5,439.0
2025-01-28 13:10:40.029999971,-0.016351,0.006136,-1.00203,33.5,439.0
2025-01-28 13:10:40.039999962,-0.004795,-0.001638,-0.990396,33.5,439.0


In [4]:
acc_cal.head()

Unnamed: 0,x,y,z,temperature,light
2025-02-05 14:06:21.000000000,0.001117,-1.01,-0.136354,24.5,212.0
2025-02-05 14:06:21.009999990,0.017059,-0.990199,-0.120496,24.5,212.0
2025-02-05 14:06:21.019999981,0.009088,-0.99416,-0.120496,24.5,212.0
2025-02-05 14:06:21.029999971,0.009088,-0.99812,-0.12446,24.5,212.0
2025-02-05 14:06:21.039999962,0.013073,-0.99812,-0.132389,24.5,212.0


In [9]:
import matplotlib.pyplot as plt
%matplotlib qt

def compute_acc_SMV(acc_df):
    return np.sqrt(acc_df.iloc[:,0]**2 + acc_df.iloc[:,1]**2 + acc_df.iloc[:,2]**2)

acc_SMV = compute_acc_SMV(acc)
acc_cal_SMV = compute_acc_SMV(acc_cal)

plt.figure(figsize = (15, 5))
plt.plot(acc_SMV, label = "Raw")
plt.plot(acc_cal_SMV, label = "Calibrated")
plt.legend(loc = "upper right")

<matplotlib.legend.Legend at 0x13e6bf200>

In [6]:
plt.close()