In [1]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt

import pickle

import seaborn as sns

%matplotlib qt
mpl.rcParams['lines.linewidth'] = 0.91
plt.style.use('seaborn-v0_8-whitegrid')

In [2]:
path = "/Users/marcellosicbaldi/Gitlab/acc_hr_corr_sleep/"

df_ll = pd.read_csv(path + 'bursts_ll.csv')[['start', 'end']]
df_lw = pd.read_csv(path + 'bursts_lw.csv')[['start', 'end']]
df_rl = pd.read_csv(path + 'bursts_rl.csv')[['start', 'end']]
df_rw = pd.read_csv(path + 'bursts_rw.csv')[['start', 'end']]
df_trunk = pd.read_csv(path + 'bursts_trunk.csv')[['start', 'end']]

In [7]:
df_lw.shape[0] + df_rl.shape[0] + df_rw.shape[0] + df_trunk.shape[0] + df_ll.shape[0]

1297

In [None]:
plt.figure()

In [14]:
intervals[0], intervals[2]

(('2024-03-20 23:17:40.274266243', '2024-03-20 23:17:46.034266233', 'LL'),
 ('2024-03-20 23:18:34.394266367', '2024-03-20 23:18:36.414266348', 'LL'))

In [8]:
from collections import defaultdict

# Combine all intervals into a list along with limb identifiers
intervals = []
intervals.extend((row['start'], row['end'], 'LL') for index, row in df_ll.iterrows())
intervals.extend((row['start'], row['end'], 'LW') for index, row in df_lw.iterrows())
intervals.extend((row['start'], row['end'], 'RL') for index, row in df_rl.iterrows())
intervals.extend((row['start'], row['end'], 'RW') for index, row in df_rw.iterrows())
intervals.extend((row['start'], row['end'], 'T') for index, row in df_trunk.iterrows())

# Sort intervals by start time
intervals.sort(key=lambda x: x[0])

# Merge overlapping intervals and label them
merged_intervals = []
current_start, current_end, current_limb = intervals[0]

for start, end, limb in intervals[1:]:
    if start <= current_end:  # There is an overlap
        current_end = max(current_end, end)
        if limb not in current_limb:
            current_limb += '+' + limb
    else:
        merged_intervals.append((current_start, current_end, current_limb))
        current_start, current_end, current_limb = start, end, limb

# Append the last interval
merged_intervals.append((current_start, current_end, current_limb))

# Create a DataFrame for a cleaner view of the merged intervals
df_merged_intervals = pd.DataFrame(merged_intervals, columns=['Start', 'End', 'Limbs'])
df_merged_intervals

Unnamed: 0,Start,End,Limbs
0,2024-03-20 23:17:40.274266243,2024-03-20 23:17:46.034266233,LL
1,2024-03-20 23:18:34.352916241,2024-03-20 23:18:36.815256357,RL+LL+LW+T
2,2024-03-20 23:20:42.682284117,2024-03-20 23:20:45.294266224,LW+RL+LL+T
3,2024-03-20 23:22:28.934266329,2024-03-20 23:22:43.062916517,LL+LW+T+RL+RW
4,2024-03-20 23:24:13.829737425,2024-03-20 23:24:24.655256510,RW+LL+LW+T+RL
...,...,...,...
335,2024-03-21 07:11:42.364266157,2024-03-21 07:11:48.634266376,LL+RL+LW+T+RW
336,2024-03-21 07:11:58.762916088,2024-03-21 07:12:09.049737692,RL+LW+LL+T+RW
337,2024-03-21 07:12:29.125256299,2024-03-21 07:12:34.594266176,T+RL+RW+LL+LW
338,2024-03-21 07:12:47.612916231,2024-03-21 07:12:50.812283754,RL+LL+LW+T


In [10]:
df_merged_intervals.value_counts('Limbs')

Limbs
LW               25
LL               19
RL               18
RL+LL+LW+T+RW    12
LW+RW            11
                 ..
RL+LW             1
RL+LL+RW+T        1
RL+LL+RW+LW+T     1
LW+T+LL+RL        1
T+RW+RL+LL+LW     1
Name: count, Length: 122, dtype: int64

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#### Functions to detect bursts in acceleration signal ####

def hl_envelopes_idx(s, dmin=1, dmax=1, split=False):
    """
    Compute high and low envelopes of a signal s
    Parameters
    ----------
    s: 1d-array, data signal from which to extract high and low envelopes
    dmin, dmax: int, optional, size of chunks, use this if the size of the input signal is too big
    split: bool, optional, if True, split the signal in half along its mean, might help to generate the envelope in some cases

    Returns
    -------
    lmin,lmax : high/low envelope idx of input signal s
    """

    # locals min      
    lmin = (np.diff(np.sign(np.diff(s))) > 0).nonzero()[0] + 1 
    # locals max
    lmax = (np.diff(np.sign(np.diff(s))) < 0).nonzero()[0] + 1 
    
    if split:
        # s_mid is zero if s centered around x-axis or more generally mean of signal
        s_mid = np.mean(s) 
        # pre-sorting of locals min based on relative position with respect to s_mid 
        lmin = lmin[s[lmin]<s_mid]
        # pre-sorting of local max based on relative position with respect to s_mid 
        lmax = lmax[s[lmax]>s_mid]

    # global min of dmin-chunks of locals min 
    lmin = lmin[[i+np.argmin(s[lmin[i:i+dmin]]) for i in range(0,len(lmin),dmin)]]
    # global max of dmax-chunks of locals max 
    lmax = lmax[[i+np.argmax(s[lmax[i:i+dmax]]) for i in range(0,len(lmax),dmax)]]
    
    return lmin,lmax

def detect_bursts(acc, envelope = True, plot = False, alfa = 15):
    """
    Detect bursts in acceleration signal

    Parameters
    ----------
    std_acc : pd.Series
        Standard deviation of acceleration signal with a 1 s resolution
    envelope : bool, optional
        If True, detect bursts based on the envelope of the signal
        If False, detect bursts based on the std of the signal

    Returns
    -------
    bursts : pd.Series
        pd.DataFrame with burst start times, end times, and duration
    """

    if envelope:
        lmin, lmax = hl_envelopes_idx(acc.values, dmin=9, dmax=9)
        # adjust shapes
        if len(lmin) > len(lmax):
            lmin = lmin[:-1]
        if len(lmax) > len(lmin):
            lmax = lmax[1:]
        th = np.percentile(acc.values[lmax] - acc.values[lmin], 10) * alfa
        std_acc = pd.Series(acc.values[lmax] - acc.values[lmin], index = acc.index[lmax])
    else:
        std_acc = acc.resample("1 s").std()
        std_acc.index.round("1 s")
        th = np.percentile(std_acc, 10) * alfa

    if plot:
        plt.figure()
        plt.plot(std_acc, color = 'b')
        plt.axhline(th, color = 'r')

    bursts1 = (std_acc > th).astype(int)
    start_burst = bursts1.where(bursts1.diff()==1).dropna()
    end_burst = bursts1.where(bursts1.diff()==-1).dropna()
    if bursts1.iloc[0] == 1:
            start_burst = pd.concat([pd.Series(0, index = [bursts1.index[0]]), start_burst])
    if bursts1.iloc[-1] == 1:
        end_burst = pd.concat([end_burst, pd.Series(0, index = [bursts1.index[-1]])])
    bursts_df = pd.DataFrame({"duration": end_burst.index - start_burst.index}, index = start_burst.index)

    start = bursts_df.index
    end = pd.to_datetime((bursts_df.index + bursts_df["duration"]).values)

    end = end.to_series().reset_index(drop = True)
    start = start.to_series().reset_index(drop = True)

    duration_between_bursts = (start.iloc[1:].values - end.iloc[:-1].values)

    for i in range(len(start)-1):
        if duration_between_bursts[i] < pd.Timedelta("5 s"):
            end[i] = np.nan
            start[i+1] = np.nan
    end.dropna(inplace = True)
    start.dropna(inplace = True)

    # extract amplitude of the bursts
    bursts = pd.DataFrame({"start": start.reset_index(drop = True), "end": end.reset_index(drop = True)})
    burst_amplitude1 = []
    burst_amplitude2 = []
    for i in range(len(bursts)):
        # peak-to-peak amplitude of bp acceleration
        burst_amplitude1.append(acc.loc[bursts["start"].iloc[i]:bursts["end"].iloc[i]].max() - acc.loc[bursts["start"].iloc[i]:bursts["end"].iloc[i]].min())
        # AUC of std_acc
        burst_amplitude2.append(np.trapz(std_acc.loc[bursts["start"].iloc[i]:bursts["end"].iloc[i]]))
    bursts["duration"] = bursts["end"] - bursts["start"]
    bursts["peak-to-peak"] = burst_amplitude1
    bursts["AUC"] = burst_amplitude2
    return bursts

#### Functions to filter bursts that are too close to each other ####

def filter_bursts(data):
    """
    Filter bursts that are neither preceded nor followed by another movement for at least 30 seconds.

    Parameters:
    - data (pd.DataFrame): DataFrame containing 'start', 'end', and 'duration' columns.

    Returns:
    - pd.DataFrame: Filtered DataFrame.
    """
    
    # Calculate the time difference between movements
    data['next_start_diff'] = data['start'].shift(-1) - data['end']
    data['prev_end_diff'] = data['start'] - data['end'].shift(1)
    
    # Convert differences to total seconds for comparison
    data['next_start_diff_seconds'] = data['next_start_diff'].dt.total_seconds()
    data['prev_end_diff_seconds'] = data['prev_end_diff'].dt.total_seconds()
    
    # Filter movements with at least 30 seconds separation from both previous and next movements
    filtered_data = data[(data['next_start_diff_seconds'] > 30) & (data['prev_end_diff_seconds'] > 30)]

    data.drop(columns=['next_start_diff', 'prev_end_diff', 'next_start_diff_seconds', 'prev_end_diff_seconds'], inplace=True)
    
    # Return the filtered data, dropping the temporary columns used for filtering
    return filtered_data.drop(columns=['next_start_diff', 'prev_end_diff', 'next_start_diff_seconds', 'prev_end_diff_seconds'])

#### Functions to find combination of bursts happening at different limbs ####

# For now, implemented for 
# - all 5 limbs together
# - every combination?


def is_isolated(start, end, df):
    # Check if the start or end of an interval falls within any interval in the dataframe
    overlap = df[(df['start'] <= end) & (df['end'] >= start)]
    return overlap.empty

def merge_excluding(current_df):
    df_list = [bursts_ll, bursts_rl, bursts_lw, bursts_rw, bursts_trunk]  # TODO: make this a function argument...
    combined_df = pd.concat([df for df in df_list if not df.equals(current_df)], ignore_index=True)
    return combined_df

def find_isolated_combination(dfs_to_combine, dfs_to_isolate):
    # Merge dataframes that should be combined
    combined_df = pd.concat(dfs_to_combine, ignore_index=True).sort_values(by='start')
    # Merge dataframes from which isolation is required
    isolate_df = pd.concat(dfs_to_isolate, ignore_index=True).sort_values(by='start')

    # Finding overlaps within combined_df
    overlaps = []
    for i, row in combined_df.iterrows():
        overlapping_rows = combined_df[
            (combined_df['start'] <= row['end']) &
            (combined_df['end'] >= row['start']) &
            (combined_df.index != i)
        ]
        if not overlapping_rows.empty:
            # Check isolation from other dataframes
            if is_isolated(row['start'], row['end'], isolate_df):
                overlaps.append(row)

    return pd.DataFrame(overlaps)

def find_combined_movements_all_limbs(dfs):
    # Merging all limb dataframes
    merged_df = pd.concat(dfs, ignore_index=True)
    # Sorting by start time
    merged_df.sort_values(by='start', inplace=True)
    
    # Finding overlapping intervals for all limbs
    overlaps = []
    current_overlap = None
    for index, row in merged_df.iterrows():
        if current_overlap is None:
            current_overlap = {
                'start': row['start'],
                'end': row['end'],
                'limbs_involved': {row['limb']}
            }
        else:
            # Check if the current row overlaps with the current overlapping period
            if row['start'] <= current_overlap['end']:
                current_overlap['limbs_involved'].add(row['limb'])
                # Update the end time to the latest end time
                if row['end'] > current_overlap['end']:
                    current_overlap['end'] = row['end']
            else:
                # Check if the previous overlap involved all limbs
                if current_overlap['limbs_involved'] == {'lw', 'rw', 'll', 'rl', 'trunk'}:
                    overlaps.append(current_overlap)
                # Start a new overlap
                current_overlap = {
                    'start': row['start'],
                    'end': row['end'],
                    'limbs_involved': {row['limb']}
                }
    
    # Final check at the end of the loop
    if current_overlap and current_overlap['limbs_involved'] == {'lw', 'rw', 'll', 'rl', 'trunk'}:
        overlaps.append(current_overlap)
    
    return pd.DataFrame(overlaps)

In [3]:
# Description: This script detects bursts in the accelerometer data of the trunk and limbs of the subjects, and save the results in a pickle file. 
# The bursts are detected using the Hilbert envelope method, and the isolated movements are extracted for each limb. 
# The script also computes the area under the curve of the Hilbert envelope for each burst, and detects posture changes from the trunk accelerometer data. 
# The results are saved in a dictionary with keys for each combination of limbs.

import numpy as np
import pandas as pd
import neurokit2 as nk
import pickle

from functions.acc_utils import compute_acc_norm
from functions.posture import compute_spherical_coordinates, detect_posture_changes


diary_SPT = {    
    "158": [pd.Timestamp('2024-02-28 23:00:00'), pd.Timestamp('2024-02-29 07:15:00')], # 158 OK
    "633": [pd.Timestamp('2024-03-07 00:05:00'), pd.Timestamp('2024-03-07 06:36:00')], # 633 OK
    "906": [pd.Timestamp('2024-03-07 00:30:00'), pd.Timestamp('2024-03-07 07:30:00')], # 906 OK
    "958": [pd.Timestamp('2024-03-13 22:00:00'), pd.Timestamp('2024-03-14 06:00:00')], # 958 OK
    "127": [pd.Timestamp('2024-03-13 23:15:00'), pd.Timestamp('2024-03-14 06:50:00')], # 127 OK
    "098": [pd.Timestamp('2024-03-16 02:01:00'), pd.Timestamp('2024-03-16 09:50:00')], # 098 OK
    "547": [pd.Timestamp('2024-03-16 01:04:00'), pd.Timestamp('2024-03-16 07:40:00')], # 547 OK
    "815": [pd.Timestamp('2024-03-20 23:00:00'), pd.Timestamp('2024-03-21 07:30:00')], # 815 OK
    "914": [pd.Timestamp('2024-03-20 21:50:00'), pd.Timestamp('2024-03-21 05:50:00')], # 914 OK
    "971": [pd.Timestamp('2024-03-20 23:50:00'), pd.Timestamp('2024-03-21 07:50:00')], # 971 OK
    "279": [pd.Timestamp('2024-03-28 00:10:00'), pd.Timestamp('2024-03-28 07:27:00')], # 279 OK
    "965": [pd.Timestamp('2024-03-28 01:25:00'), pd.Timestamp('2024-03-28 09:20:00')], # 965 OK
}

diary_TIB = {
    "158": [pd.Timestamp('2024-02-28 22:15:00'), pd.Timestamp('2024-02-29 07:45:00')], # 158 OK
    "633": [pd.Timestamp('2024-03-06 23:39:00'), pd.Timestamp('2024-03-07 08:00:00')], # 633 OK 
    "906": [pd.Timestamp('2024-03-07 00:15:00'), pd.Timestamp('2024-03-07 07:35:00')], # 906 OK
    "958": [pd.Timestamp('2024-03-13 21:30:00'), pd.Timestamp('2024-03-14 06:30:00')], # 958 OK
    "127": [pd.Timestamp('2024-03-13 22:00:00'), pd.Timestamp('2024-03-14 07:10:00')], # 127 OK 
    "098": [pd.Timestamp('2024-03-16 01:49:00'), pd.Timestamp('2024-03-16 09:52:00')], # 098 OK 
    "547": [pd.Timestamp('2024-03-16 00:26:00'), pd.Timestamp('2024-03-16 08:20:00')], # 547 OK 
    "815": [pd.Timestamp('2024-03-20 22:00:00'), pd.Timestamp('2024-03-21 07:30:00')], # 815 OK 
    "914": [pd.Timestamp('2024-03-20 21:30:00'), pd.Timestamp('2024-03-21 06:20:00')], # 914 OK 
    "971": [pd.Timestamp('2024-03-20 23:30:00'), pd.Timestamp('2024-03-21 08:08:00')], # 971 OK 
    "279": [pd.Timestamp('2024-03-28 00:04:00'), pd.Timestamp('2024-03-28 07:41:00')], # 279 OK
    "965": [pd.Timestamp('2024-03-28 01:22:00'), pd.Timestamp('2024-03-28 09:22:00')], # 965 OK
}

subjects = ["158", "098", "633", "279", "906", "547", "971", "958", "815"]

for i, sub in enumerate(subjects):

    print(sub)

    with open(f'/Volumes/Untitled/rehab/data/{sub}/ax_data.pkl', 'rb') as f:
        ax_data = pickle.load(f)

    print("Loaded ax_data!")

    trunk_df = pd.Series(compute_acc_norm(ax_data["trunk"][["x", "y", "z"]].values), index = pd.to_datetime(ax_data["trunk"]["time"], unit = "s") + pd.Timedelta(hours = 1))
    ll_df = pd.Series(compute_acc_norm(ax_data["la"][["x", "y", "z"]].values), index = pd.to_datetime(ax_data["la"]["time"], unit = "s") + pd.Timedelta(hours = 1))
    rl_df = pd.Series(compute_acc_norm(ax_data["ra"][["x", "y", "z"]].values), index = pd.to_datetime(ax_data["ra"]["time"], unit = "s") + pd.Timedelta(hours = 1))
    lw_df = pd.Series(compute_acc_norm(ax_data["lw"][["x", "y", "z"]].values), index = pd.to_datetime(ax_data["lw"]["time"], unit = "s") + pd.Timedelta(hours = 1))
    rw_df = pd.Series(compute_acc_norm(ax_data["rw"][["x", "y", "z"]].values), index = pd.to_datetime(ax_data["rw"]["time"], unit = "s") + pd.Timedelta(hours = 1))

    start_sleep, end_sleep = diary_SPT[sub]

    trunk_df = trunk_df.loc[start_sleep:end_sleep]
    ll_df = ll_df.loc[start_sleep:end_sleep]
    rl_df = rl_df.loc[start_sleep:end_sleep]
    lw_df = lw_df.loc[start_sleep:end_sleep]
    rw_df = rw_df.loc[start_sleep:end_sleep]

    # TODO: Modify sampling rate to 100 Hz

    lw_df_bp = pd.Series(nk.signal_filter(lw_df.values, sampling_rate = 50, lowcut=0.1, highcut=5, method='butterworth', order=8), index = lw_df.index)
    rw_df_bp = pd.Series(nk.signal_filter(rw_df.values, sampling_rate = 50, lowcut=0.1, highcut=5, method='butterworth', order=8), index = rw_df.index)
    ll_df_bp = pd.Series(nk.signal_filter(ll_df.values, sampling_rate = 50, lowcut=0.1, highcut=5, method='butterworth', order=8), index = ll_df.index)
    rl_df_bp = pd.Series(nk.signal_filter(rl_df.values, sampling_rate = 50, lowcut=0.1, highcut=5, method='butterworth', order=8), index = rl_df.index)
    trunk_df_bp = pd.Series(nk.signal_filter(trunk_df.values, sampling_rate = 50, lowcut=0.1, highcut=5, method='butterworth', order=8), index = trunk_df.index)
    bursts_lw = detect_bursts(lw_df_bp, plot = False, alfa = 7)
    bursts_rw = detect_bursts(rw_df_bp, plot = False, alfa = 7)
    bursts_ll = detect_bursts(ll_df_bp, plot = False, alfa = 6)
    bursts_rl = detect_bursts(rl_df_bp, plot = False, alfa = 6)
    bursts_trunk = detect_bursts(trunk_df_bp, plot = False, alfa = 5)

    break

    # Isolation checks
    bursts_ll['isolated'] = bursts_ll.apply(lambda x: is_isolated(x['start'], x['end'], merge_excluding(bursts_ll)), axis=1)
    bursts_rl['isolated'] = bursts_rl.apply(lambda x: is_isolated(x['start'], x['end'], merge_excluding(bursts_rl)), axis=1)
    bursts_lw['isolated'] = bursts_lw.apply(lambda x: is_isolated(x['start'], x['end'], merge_excluding(bursts_lw)), axis=1)
    bursts_rw['isolated'] = bursts_rw.apply(lambda x: is_isolated(x['start'], x['end'], merge_excluding(bursts_rw)), axis=1)
    bursts_trunk['isolated'] = bursts_trunk.apply(lambda x: is_isolated(x['start'], x['end'], merge_excluding(bursts_trunk)), axis=1)

    # Extract isolated movements for each limb
    bursts_ll_isolated = bursts_ll[bursts_ll['isolated']]
    bursts_rl_isolated = bursts_rl[bursts_rl['isolated']]
    bursts_lw_isolated = bursts_lw[bursts_lw['isolated']]
    bursts_rw_isolated = bursts_rw[bursts_rw['isolated']]
    bursts_trunk_isolated = bursts_trunk[bursts_trunk['isolated']]

    bursts_wrists_isolated = pd.concat([bursts_lw_isolated, bursts_rw_isolated], ignore_index=True)
    bursts_legs_isolated = pd.concat([bursts_ll_isolated, bursts_rl_isolated], ignore_index=True)

    bursts_both_wrists = find_isolated_combination([bursts_lw, bursts_rw], [bursts_ll, bursts_rl, bursts_trunk]).iloc[::2].reset_index(drop=True)

    # Finding isolated movements for both legs alone (no wrists or trunk)
    bursts_both_legs = find_isolated_combination([bursts_ll, bursts_rl], [bursts_lw, bursts_rw, bursts_trunk]).iloc[::2].reset_index(drop=True)

    bursts_lw["limb"] = "lw"
    bursts_rw["limb"] = "rw"
    bursts_ll["limb"] = "ll"
    bursts_rl["limb"] = "rl"
    bursts_trunk["limb"] = "trunk"
    bursts_all_limbs_combined = find_combined_movements_all_limbs([bursts_lw, bursts_rw, bursts_ll, bursts_rl, bursts_trunk])

    bursts_all_limbs_combined["AUC"] = np.nan

    lmin, lmax = hl_envelopes_idx(lw_df_bp.values, dmin=9, dmax=9)
    if len(lmin) > len(lmax):
        lmin = lmin[:-1]
    if len(lmax) > len(lmin):
        lmax = lmax[1:]
    env_diff_lw = pd.Series(lw_df_bp.values[lmax] - lw_df_bp.values[lmin], index = lw_df_bp.index[lmax])

    lmin, lmax = hl_envelopes_idx(rw_df_bp.values, dmin=9, dmax=9)
    if len(lmin) > len(lmax):
        lmin = lmin[:-1]
    if len(lmax) > len(lmin):
        lmax = lmax[1:]
    env_diff_rw = pd.Series(rw_df_bp.values[lmax] - rw_df_bp.values[lmin], index = rw_df_bp.index[lmax])

    lmin, lmax = hl_envelopes_idx(ll_df_bp.values, dmin=9, dmax=9)
    if len(lmin) > len(lmax):
        lmin = lmin[:-1]
    if len(lmax) > len(lmin):
        lmax = lmax[1:]
    env_diff_ll = pd.Series(ll_df_bp.values[lmax] - ll_df_bp.values[lmin], index = ll_df_bp.index[lmax])

    lmin, lmax = hl_envelopes_idx(rl_df_bp.values, dmin=9, dmax=9)
    if len(lmin) > len(lmax):
        lmin = lmin[:-1]
    if len(lmax) > len(lmin):
        lmax = lmax[1:]
    env_diff_rl = pd.Series(rl_df_bp.values[lmax] - rl_df_bp.values[lmin], index = rl_df_bp.index[lmax])

    lmin, lmax = hl_envelopes_idx(trunk_df_bp.values, dmin=9, dmax=9)
    if len(lmin) > len(lmax):
        lmin = lmin[:-1]
    if len(lmax) > len(lmin):
        lmax = lmax[1:]
    env_diff_trunk = pd.Series(trunk_df_bp.values[lmax] - trunk_df_bp.values[lmin], index = trunk_df_bp.index[lmax])

    for i, b in enumerate(range(len(bursts_all_limbs_combined))):
        bursts_all_limbs_combined.loc[i, "AUC"] = np.trapz(env_diff_lw.loc[bursts_all_limbs_combined["start"].iloc[i]:bursts_all_limbs_combined["end"].iloc[i]]) 
        + np.trapz(env_diff_rw.loc[bursts_all_limbs_combined["start"].iloc[i]:bursts_all_limbs_combined["end"].iloc[i]]) 
        + np.trapz(env_diff_ll.loc[bursts_all_limbs_combined["start"].iloc[i]:bursts_all_limbs_combined["end"].iloc[i]]) 
        + np.trapz(env_diff_rl.loc[bursts_all_limbs_combined["start"].iloc[i]:bursts_all_limbs_combined["end"].iloc[i]]) 
        + np.trapz(env_diff_trunk.loc[bursts_all_limbs_combined["start"].iloc[i]:bursts_all_limbs_combined["end"].iloc[i]]) 

    bursts_both_wrists["AUC"] = np.nan
    for i, b in enumerate(range(len(bursts_both_wrists))):
        bursts_both_wrists.loc[i, "AUC"] = np.trapz(env_diff_lw.loc[bursts_both_wrists["start"].iloc[i]:bursts_both_wrists["end"].iloc[i]]) 
        + np.trapz(env_diff_rw.loc[bursts_both_wrists["start"].iloc[i]:bursts_both_wrists["end"].iloc[i]])

    bursts_both_legs["AUC"] = np.nan
    for i, b in enumerate(range(len(bursts_both_legs))):
        bursts_both_legs.loc[i, "AUC"] = np.trapz(env_diff_ll.loc[bursts_both_legs["start"].iloc[i]:bursts_both_legs["end"].iloc[i]]) 
        + np.trapz(env_diff_rl.loc[bursts_both_legs["start"].iloc[i]:bursts_both_legs["end"].iloc[i]])

    # Trunk - I need xyz
    ax_data['trunk'].index = pd.to_datetime(ax_data['trunk']['time'], unit='s') + pd.Timedelta(hours = 1)
    ax_data['trunk'].drop(columns=['time'], inplace=True)
    trunk_acc_df = ax_data['trunk'].loc[start_sleep:end_sleep]
    del ax_data

    phi, theta = compute_spherical_coordinates(trunk_acc_df.resample('10s').median())
    trunk_acc_sph = pd.DataFrame({"phi": phi * 180 / np.pi, "theta": theta * 180 / np.pi}, index=trunk_acc_df.resample('10s').median().index)
    updated_df = detect_posture_changes(trunk_acc_sph.copy())
    time_posture_change30 = updated_df[updated_df['posture_change30']].index
    time_posture_change10 = updated_df[updated_df['posture_change10']].index
    # join bursts from all limbs and posture changes

    bursts_all_limbs_combined["posture_change"] = np.nan

    for time in time_posture_change10:
        for i in range(len(bursts_all_limbs_combined)):
                if time > bursts_all_limbs_combined["start"].iloc[i]-pd.Timedelta(seconds = 5) and time < bursts_all_limbs_combined["end"].iloc[i]+pd.Timedelta(seconds = 5):
                    bursts_all_limbs_combined["posture_change"].iloc[i] = updated_df.loc[time, "posture_change_degrees10"]
    # join bursts and posture changes

    bursts_lw["posture_change"] = np.nan
    bursts_rw["posture_change"] = np.nan
    bursts_ll["posture_change"] = np.nan
    bursts_rl["posture_change"] = np.nan
    bursts_trunk["posture_change"] = np.nan

    for time in time_posture_change30:
        for i in range(len(bursts_lw)):
            if time > bursts_lw["start"].iloc[i]-pd.Timedelta(seconds = 5) and time < bursts_lw["end"].iloc[i]+pd.Timedelta(seconds = 5):
                bursts_lw["posture_change"].iloc[i] = updated_df.loc[time, "posture_change_degrees30"]
        for i in range(len(bursts_rw)):
            if time > bursts_rw["start"].iloc[i]-pd.Timedelta(seconds = 5) and time < bursts_rw["end"].iloc[i]+pd.Timedelta(seconds = 5):
                bursts_rw["posture_change"].iloc[i] = updated_df.loc[time, "posture_change_degrees30"]
        for i in range(len(bursts_ll)):
            if time > bursts_ll["start"].iloc[i]-pd.Timedelta(seconds = 5) and time < bursts_ll["end"].iloc[i]+pd.Timedelta(seconds = 5):
                bursts_ll["posture_change"].iloc[i] = updated_df.loc[time, "posture_change_degrees30"]
        for i in range(len(bursts_rl)):
            if time > bursts_rl["start"].iloc[i]-pd.Timedelta(seconds = 5) and time < bursts_rl["end"].iloc[i]+pd.Timedelta(seconds = 5):
                bursts_rl["posture_change"].iloc[i] = updated_df.loc[time, "posture_change_degrees30"]
        for i in range(len(bursts_trunk)):
            if time > bursts_trunk["start"].iloc[i]-pd.Timedelta(seconds = 5) and time < bursts_trunk["end"].iloc[i]+pd.Timedelta(seconds = 5):
                bursts_trunk["posture_change"].iloc[i] = updated_df.loc[time, "posture_change_degrees30"]

    for time in time_posture_change10:
        for i in range(len(bursts_lw)):
            if time > bursts_lw["start"].iloc[i]-pd.Timedelta(seconds = 5) and time < bursts_lw["end"].iloc[i]+pd.Timedelta(seconds = 5):
                bursts_lw["posture_change"].iloc[i] = updated_df.loc[time, "posture_change_degrees10"]
        for i in range(len(bursts_rw)):
            if time > bursts_rw["start"].iloc[i]-pd.Timedelta(seconds = 5) and time < bursts_rw["end"].iloc[i]+pd.Timedelta(seconds = 5):
                bursts_rw["posture_change"].iloc[i] = updated_df.loc[time, "posture_change_degrees10"]
        for i in range(len(bursts_ll)):
            if time > bursts_ll["start"].iloc[i]-pd.Timedelta(seconds = 5) and time < bursts_ll["end"].iloc[i]+pd.Timedelta(seconds = 5):
                bursts_ll["posture_change"].iloc[i] = updated_df.loc[time, "posture_change_degrees10"]
        for i in range(len(bursts_rl)):
            if time > bursts_rl["start"].iloc[i]-pd.Timedelta(seconds = 5) and time < bursts_rl["end"].iloc[i]+pd.Timedelta(seconds = 5):
                bursts_rl["posture_change"].iloc[i] = updated_df.loc[time, "posture_change_degrees10"]
        for i in range(len(bursts_trunk)):
            if time > bursts_trunk["start"].iloc[i]-pd.Timedelta(seconds = 5) and time < bursts_trunk["end"].iloc[i]+pd.Timedelta(seconds = 5):
                bursts_trunk["posture_change"].iloc[i] = updated_df.loc[time, "posture_change_degrees10"]

    # summarize all the bursts in a dict, with a key for each combination of limbs

    bursts = {
        "lw": bursts_lw,
        "rw": bursts_rw,
        "ll": bursts_ll,
        "rl": bursts_rl,
        "trunk": bursts_trunk,
        "wrists": bursts_wrists_isolated,
        "legs": bursts_legs_isolated,
        "trunk_isolated": bursts_trunk_isolated,
        "both_wrists": bursts_both_wrists,
        "both_legs": bursts_both_legs,
        "all_limbs": bursts_all_limbs_combined
    }

    # SAVE
    with open(f'/Volumes/Untitled/rehab/data/{sub}/bursts_TIB.pkl', 'wb') as f:
        pickle.dump(bursts, f)

158
Loaded ax_data!


In [11]:
plt.figure(figsize=(19, 12))
plt.subplot(5, 1, 1)
plt.plot(lw_df)
for i in range(len(bursts_lw)):
    plt.axvspan(bursts_lw["start"].iloc[i], bursts_lw["end"].iloc[i], color = 'b', alpha = 0.3)
plt.ylabel("ACC (g)", fontsize = 16)
plt.xticks(fontsize = 16)
plt.yticks(fontsize = 16)
plt.legend(["LW ACC", "Movement"], loc = "upper right", fontsize = 16)

plt.subplot(5, 1, 2, sharex = plt.subplot(5, 1, 1), sharey = plt.subplot(5, 1, 1))
plt.plot(rw_df)
for i in range(len(bursts_rw)):
    plt.axvspan(bursts_rw["start"].iloc[i], bursts_rw["end"].iloc[i], color = 'b', alpha = 0.3)
plt.ylabel("ACC (g)", fontsize = 16)
plt.xticks(fontsize = 16)
plt.yticks(fontsize = 16)
plt.legend(["RW ACC", "Movement"], loc = "upper right", fontsize = 16)

plt.subplot(5, 1, 3, sharex = plt.subplot(5, 1, 1), sharey = plt.subplot(5, 1, 1))
plt.plot(ll_df)
for i in range(len(bursts_ll)):
    plt.axvspan(bursts_ll["start"].iloc[i], bursts_ll["end"].iloc[i], color = 'b', alpha = 0.3)
plt.ylabel("ACC (g)", fontsize = 16)
plt.xticks(fontsize = 16)
plt.yticks(fontsize = 16)
plt.legend(["LL ACC", "Movement"], loc = "upper right", fontsize = 16)

plt.subplot(5, 1, 4, sharex = plt.subplot(5, 1, 1), sharey = plt.subplot(5, 1, 1))
plt.plot(rl_df)
for i in range(len(bursts_rl)):
    plt.axvspan(bursts_rl["start"].iloc[i], bursts_rl["end"].iloc[i], color = 'b', alpha = 0.3)
plt.ylabel("ACC (g)", fontsize = 16)
plt.xticks(fontsize = 16)
plt.yticks(fontsize = 16)
plt.legend(["RL ACC", "Movement"], loc = "upper right", fontsize = 16)

plt.subplot(5, 1, 5, sharex = plt.subplot(5, 1, 1), sharey = plt.subplot(5, 1, 1))
plt.plot(trunk_df)
for i in range(len(bursts_trunk)):
    plt.axvspan(bursts_trunk["start"].iloc[i], bursts_trunk["end"].iloc[i], color = 'b', alpha = 0.3)
plt.ylabel("ACC (g)", fontsize = 16)
plt.xticks(fontsize = 16)
plt.yticks(fontsize = 16)
plt.legend(["Trunk ACC", "Movement"], loc = "upper right", fontsize = 16)

<matplotlib.legend.Legend at 0x7fb1e4edbca0>

In [4]:
# Combine all intervals into a list along with limb identifiers
intervals = []
intervals.extend((row['start'], row['end'], 'LL') for index, row in bursts_ll.iterrows())
intervals.extend((row['start'], row['end'], 'LW') for index, row in bursts_lw.iterrows())
intervals.extend((row['start'], row['end'], 'RL') for index, row in bursts_rl.iterrows())
intervals.extend((row['start'], row['end'], 'RW') for index, row in bursts_rw.iterrows())
intervals.extend((row['start'], row['end'], 'T') for index, row in bursts_trunk.iterrows())

# Sort intervals by start time
intervals.sort(key=lambda x: x[0])

In [5]:
intervals[0], intervals[1], intervals[2], intervals[3], intervals[4]

((Timestamp('2024-02-28 23:00:01.590290070'),
  Timestamp('2024-02-28 23:00:02.890290022'),
  'LL'),
 (Timestamp('2024-02-28 23:00:01.781500101'),
  Timestamp('2024-02-28 23:00:03.341500044'),
  'RL'),
 (Timestamp('2024-02-28 23:00:01.804869890'),
  Timestamp('2024-02-28 23:00:04.064870119'),
  'T'),
 (Timestamp('2024-02-28 23:00:30.990289927'),
  Timestamp('2024-02-28 23:00:31.560290098'),
  'LL'),
 (Timestamp('2024-02-28 23:00:43.584870100'),
  Timestamp('2024-02-28 23:00:44.704869986'),
  'T'))

In [7]:
# Merge overlapping intervals and label them
merged_intervals = []
current_start, current_end, current_limb = intervals[0]
# current_limb = current_limb
# print(current_limb)

for start, end, limb in intervals[1:]:
    if start <= current_end:  # There is an overlap
        current_end = max(current_end, end)
        if limb not in current_limb:
            # print(current_limb)
            # current_limb.add(limb)
            current_limb += '+' + limb
    else:
        merged_intervals.append((current_start, current_end, current_limb))
        current_start, current_end, current_limb = start, end, limb

# Append the last interval
merged_intervals.append((current_start, current_end, current_limb))
merged_intervals = [(start, end, set(limbs_str.split('+'))) for start, end, limbs_str in merged_intervals]

# Create a DataFrame for a cleaner view of the merged intervals
df_merged_intervals = pd.DataFrame(merged_intervals, columns=['Start', 'End', 'Limbs'])
df_merged_intervals

Unnamed: 0,Start,End,Limbs
0,2024-02-28 23:00:01.590290070,2024-02-28 23:00:04.064870119,"{T, RL, LL}"
1,2024-02-28 23:00:30.990289927,2024-02-28 23:00:31.560290098,{LL}
2,2024-02-28 23:00:43.584870100,2024-02-28 23:00:44.704869986,{T}
3,2024-02-28 23:02:29.704869986,2024-02-28 23:02:31.704869986,"{T, LW}"
4,2024-02-28 23:03:31.105629921,2024-02-28 23:03:32.335629940,"{T, LW}"
...,...,...,...
143,2024-02-29 06:54:00.270289898,2024-02-29 06:54:01.020289898,{LL}
144,2024-02-29 07:00:06.070290089,2024-02-29 07:00:28.191499949,"{T, LL, RL, RW, LW}"
145,2024-02-29 07:03:13.620290041,2024-02-29 07:03:16.340290070,{LL}
146,2024-02-29 07:04:31.660290003,2024-02-29 07:04:32.970289946,{LL}


## All subjects

In [None]:
from collections import Counter

In [27]:
bursts_lw = {sub: 0 for sub in subjects}
bursts_rw = {sub: 0 for sub in subjects}
bursts_ll = {sub: 0 for sub in subjects}
bursts_rl = {sub: 0 for sub in subjects}
bursts_trunk = {sub: 0 for sub in subjects}
SIB = {sub: 0 for sub in subjects}
limbs_combinations = {sub: 0 for sub in subjects}
for i, sub in enumerate(subjects):
    with open(f'/Volumes/Untitled/rehab/data/{sub}/bursts.pkl', 'rb') as f:
        bursts = pickle.load(f)
    bursts_lw[sub] = bursts["lw"]
    bursts_rw[sub] = bursts["rw"]
    bursts_ll[sub] = bursts["ll"]
    bursts_rl[sub] = bursts["rl"]
    bursts_trunk[sub] = bursts["trunk"]

    # Combine all intervals into a list along with limb identifiers
    intervals = []
    intervals.extend((row['start'], row['end'], 'LL') for index, row in bursts_ll[sub].iterrows())
    intervals.extend((row['start'], row['end'], 'LW') for index, row in bursts_lw[sub].iterrows())
    intervals.extend((row['start'], row['end'], 'RL') for index, row in bursts_rl[sub].iterrows())
    intervals.extend((row['start'], row['end'], 'RW') for index, row in bursts_rw[sub].iterrows())
    intervals.extend((row['start'], row['end'], 'T') for index, row in bursts_trunk[sub].iterrows())

    # Sort intervals by start time
    intervals.sort(key=lambda x: x[0])

    # Merge overlapping intervals and label them
    merged_intervals = []
    current_start, current_end, current_limb = intervals[0]
    # current_limb = current_limb
    # print(current_limb)

    for start, end, limb in intervals[1:]:
        if start <= current_end:  # There is an overlap
            current_end = max(current_end, end)
            if limb not in current_limb:
                current_limb += '+' + limb
        else:
            merged_intervals.append((current_start, current_end, current_limb))
            current_start, current_end, current_limb = start, end, limb

    # Append the last interval
    merged_intervals.append((current_start, current_end, current_limb))
    merged_intervals = [(start, end, set(limbs_str.split('+'))) for start, end, limbs_str in merged_intervals]

    # Create a DataFrame for a cleaner view of the merged intervals
    df_merged_intervals = pd.DataFrame(merged_intervals, columns=['Start', 'End', 'Limbs'])

    limbs_comb = Counter(tuple(sorted(limbs)) for _, _, limbs in merged_intervals)
    limbs_combinations_df = pd.DataFrame(limbs_comb.items(), columns=['Limbs', 'Count']).sort_values(by='Count', ascending=False).reset_index(drop=True)
    limbs_combinations_df["sub_ID"] = sub
    limbs_combinations[sub] = limbs_combinations_df

In [53]:
limbs_combinations_df_ALL = pd.concat(limbs_combinations.values(), ignore_index=True)

In [74]:
limbs_comb_groupby = limbs_combinations_df_ALL.groupby(["Limbs", "sub_ID"]).sum().sort_values(by = "Count", ascending = False)

# Plot this information
plt.figure(figsize=(12, 6))
sns.barplot(data = limbs_comb_groupby.reset_index(), x = "sub_ID", y = "Count", hue = "Limbs")

<Axes: xlabel='sub_ID', ylabel='Count'>

In [79]:
plt.figure()
plt.pie(limbs_combinations_df_ALL.groupby('Limbs').sum().sort_values(by='Count', ascending=False)['Count'], autopct='%1.1f%%');#, labels = limbs_combinations_df_ALL.groupby('Limbs').sum().sort_values(by='Count', ascending=False).index, autopct='%1.1f%%')


Total number of possible combinations
- 1 limb (size 1): 5 combinations, one for each element.
- 2 limbs (size 2): 10 combinations, each pair of elements.
- 3 limbs (size 3): 10 combinations, each trio of elements.
- 4 limbs (size 4): 5 combinations, each group excluding one element.
- 5 limbs (size 5): 1 combination, all elements together.

Total of 5+10+10+5+1 = 31 combinations