In [1]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt

import pickle

import seaborn as sns

%matplotlib qt
mpl.rcParams['lines.linewidth'] = 0.91
plt.style.use('seaborn-v0_8-whitegrid')

In [2]:
diary_SPT = {    
    "158": [pd.Timestamp('2024-02-28 23:00:00'), pd.Timestamp('2024-02-29 07:15:00')], # 158 OK
    "633": [pd.Timestamp('2024-03-07 00:05:00'), pd.Timestamp('2024-03-07 06:36:00')], # 633 OK
    "906": [pd.Timestamp('2024-03-07 00:30:00'), pd.Timestamp('2024-03-07 07:30:00')], # 906 OK
    "958": [pd.Timestamp('2024-03-13 22:00:00'), pd.Timestamp('2024-03-14 06:00:00')], # 958 OK
    "127": [pd.Timestamp('2024-03-13 23:15:00'), pd.Timestamp('2024-03-14 06:50:00')], # 127 OK
    "098": [pd.Timestamp('2024-03-16 02:01:00'), pd.Timestamp('2024-03-16 09:50:00')], # 098 OK
    "547": [pd.Timestamp('2024-03-16 01:04:00'), pd.Timestamp('2024-03-16 07:40:00')], # 547 OK
    "815": [pd.Timestamp('2024-03-20 23:00:00'), pd.Timestamp('2024-03-21 07:30:00')], # 815 OK
    "914": [pd.Timestamp('2024-03-20 21:50:00'), pd.Timestamp('2024-03-21 05:50:00')], # 914 OK
    "971": [pd.Timestamp('2024-03-20 23:50:00'), pd.Timestamp('2024-03-21 07:50:00')], # 971 OK
    "279": [pd.Timestamp('2024-03-28 00:10:00'), pd.Timestamp('2024-03-28 07:27:00')], # 279 OK
    "965": [pd.Timestamp('2024-03-28 01:25:00'), pd.Timestamp('2024-03-28 09:20:00')], # 965 OK
}

comb_location = {
    "158": ["la", "trunk", "rw"],
    "633": ["trunk", "ra", "lw"],
    "906": ["rw", "la", "trunk"],
    "958": ["ra", "trunk", "lw"],
    "127": ["la", "trunk", "rw"],
    "098": ["trunk", "lw", "ra"],
    "547": ["la", "lw", "trunk"],
    "815": ["trunk", "ra", "lw"],
    "914": ["ra", "trunk", "lw"],
    "971": ["la", "trunk", "rw"],
    "279": ["trunk", "la", "rw"],
    "965": ["rw", "trunk", "la"]
}

## Confronto con l'algoritmo

In [4]:
path_marcello = "/Users/marcellosicbaldi/Library/CloudStorage/OneDrive-AlmaMaterStudiorumUniversitàdiBologna/General - LG-MIAR (rehab)/SCORING_bursts"

with open(f"{path_marcello}/final_database/wrist_5_40_2.5.pkl", "rb") as f:
    wrist = pickle.load(f)
with open(f"{path_marcello}/final_database/ankle_5_40_2.5.pkl", "rb") as f:
    ankle = pickle.load(f)
with open(f"{path_marcello}/final_database/trunk_5_40_2.5.pkl", "rb") as f:
    trunk = pickle.load(f)

In [5]:
# calculate sensitivity, specificity, PPV, F1

subjects = ["158", "098", "633", "906", "279", "547", "971", "958", "815", "127", "914", "965"]

wrist_results = {}
ankle_results = {}
trunk_results = {}

alphas = np.arange(5,41,2.5)

for alpha in alphas:
    wrist_results[alpha] = {"acc": [], "sens": [], "spec": [], "ppv": [], "f1": []}
    ankle_results[alpha] = {"acc": [], "sens": [], "spec": [], "ppv": [], "f1": []}
    trunk_results[alpha] = {"acc": [], "sens": [], "spec": [], "ppv": [], "f1": []}

for alpha in alphas:
    for sub in subjects:
        TP_wrist = wrist[sub][alpha]["TP"]
        FP_wrist = wrist[sub][alpha]["FP"]
        FN_wrist = wrist[sub][alpha]["FN"]
        TP_ankle = ankle[sub][alpha]["TP"]
        FP_ankle = ankle[sub][alpha]["FP"]
        FN_ankle = ankle[sub][alpha]["FN"]
        TP_trunk = trunk[sub][alpha]["TP"]
        FP_trunk = trunk[sub][alpha]["FP"]
        FN_trunk = trunk[sub][alpha]["FN"]

        acc_wrist = (TP_wrist + FN_wrist) / (TP_wrist + FP_wrist + FN_wrist)
        sens_wrist = TP_wrist / (TP_wrist + FN_wrist)
        spec_wrist = TP_wrist / (TP_wrist + FP_wrist)
        ppv_wrist = TP_wrist / (TP_wrist + FP_wrist)
        f1_wrist = 2 * (ppv_wrist * sens_wrist) / (ppv_wrist + sens_wrist)

        acc_ankle = (TP_ankle + FN_ankle) / (TP_ankle + FP_ankle + FN_ankle)
        sens_ankle = TP_ankle / (TP_ankle + FN_ankle)
        spec_ankle = TP_ankle / (TP_ankle + FP_ankle)
        ppv_ankle = TP_ankle / (TP_ankle + FP_ankle)
        f1_ankle = 2 * (ppv_ankle * sens_ankle) / (ppv_ankle + sens_ankle)

        acc_trunk = (TP_trunk + FN_trunk) / (TP_trunk + FP_trunk + FN_trunk)
        sens_trunk = TP_trunk / (TP_trunk + FN_trunk)
        spec_trunk = TP_trunk / (TP_trunk + FP_trunk)
        ppv_trunk = TP_trunk / (TP_trunk + FP_trunk)
        f1_trunk = 2 * (ppv_trunk * sens_trunk) / (ppv_trunk + sens_trunk)

        wrist_results[alpha]["acc"].append(acc_wrist)
        wrist_results[alpha]["sens"].append(sens_wrist)
        wrist_results[alpha]["spec"].append(spec_wrist)
        wrist_results[alpha]["ppv"].append(ppv_wrist)
        wrist_results[alpha]["f1"].append(f1_wrist)

        ankle_results[alpha]["acc"].append(acc_ankle)
        ankle_results[alpha]["sens"].append(sens_ankle)
        ankle_results[alpha]["spec"].append(spec_ankle)
        ankle_results[alpha]["ppv"].append(ppv_ankle)
        ankle_results[alpha]["f1"].append(f1_ankle)

        trunk_results[alpha]["acc"].append(acc_trunk)
        trunk_results[alpha]["sens"].append(sens_trunk)
        trunk_results[alpha]["spec"].append(spec_trunk)
        trunk_results[alpha]["ppv"].append(ppv_trunk)
        trunk_results[alpha]["f1"].append(f1_trunk)

In [6]:
plt.style.use("ggplot")
plt.style.use("seaborn-v0_8-whitegrid")
sns.set_context("talk")

In [24]:
# plot results (f1)
fig, ax = plt.subplots(1, 3, figsize=(19, 5))
for alpha in alphas:
    ax[0].boxplot(wrist_results[alpha]["f1"], positions = [alpha], widths = 1.2, showmeans = True, patch_artist = False)
    ax[1].boxplot(ankle_results[alpha]["f1"], positions = [alpha], widths = 1.2, showmeans = True, patch_artist = False)
    ax[2].boxplot(trunk_results[alpha]["f1"], positions = [alpha], widths = 1.2, showmeans = True, patch_artist = False)
for i in range(3):
    ax[i].set_xticks([alpha for alpha in alphas[::2]])
    ax[i].set_xticklabels([str(alpha) for alpha in alphas[::2]])
    ax[i].set_xlabel("Threshold (mg)")
ax[0].set_title("Wrist")
ax[1].set_title("Ankle")
ax[2].set_title("Trunk")
plt.suptitle("F1 score")
plt.tight_layout()
plt.savefig(path_marcello + "/final_database/figures/f1_score.png", dpi = 300, bbox_inches = "tight")

# plot results (senstivity)
fig, ax = plt.subplots(1, 3, figsize=(19, 5))
for alpha in alphas:
    ax[0].boxplot(wrist_results[alpha]["sens"], positions = [alpha], widths = 1.2, showmeans = True, patch_artist = False)
    ax[1].boxplot(ankle_results[alpha]["sens"], positions = [alpha], widths = 1.2, showmeans = True, patch_artist = False)
    ax[2].boxplot(trunk_results[alpha]["sens"], positions = [alpha], widths = 1.2, showmeans = True, patch_artist = False)
for i in range(3):
    ax[i].set_xticks([alpha for alpha in alphas[::2]])
    ax[i].set_xticklabels([str(alpha) for alpha in alphas[::2]])
    ax[i].set_xlabel("Threshold (mg)")
ax[0].set_title("Wrist")
ax[1].set_title("Ankle")
ax[2].set_title("Trunk")
plt.suptitle("Sensitivity")
plt.tight_layout()
plt.savefig(path_marcello + "/final_database/figures/sensitivity.png", dpi = 300, bbox_inches = "tight")

# plot results (specificity)
fig, ax = plt.subplots(1, 3, figsize=(19, 5))
for alpha in alphas:
    ax[0].boxplot(wrist_results[alpha]["spec"], positions = [alpha], widths = 1.2, showmeans = True, patch_artist = False)
    ax[1].boxplot(ankle_results[alpha]["spec"], positions = [alpha], widths = 1.2, showmeans = True, patch_artist = False)
    ax[2].boxplot(trunk_results[alpha]["spec"], positions = [alpha], widths = 1.2, showmeans = True, patch_artist = False)
for i in range(3):
    ax[i].set_xticks([alpha for alpha in alphas[::2]])
    ax[i].set_xticklabels([str(alpha) for alpha in alphas[::2]])
    ax[i].set_xlabel("Threshold (mg)")

ax[0].set_title("Wrist")
ax[1].set_title("Ankle")
ax[2].set_title("Trunk")

plt.suptitle("Precision")
plt.tight_layout()

plt.savefig(path_marcello + "/final_database/figures/precision.png", dpi = 300, bbox_inches = "tight")

In [21]:
np.mean(wrist_results[20]["f1"])*100, np.std(wrist_results[20]["f1"])*100, np.mean(wrist_results[20]["sens"])*100, np.std(wrist_results[20]["sens"])*100, np.mean(wrist_results[20]["spec"])*100, np.std(wrist_results[20]["spec"])*100

(90.58082910391124,
 3.0818482636946984,
 93.48262576795184,
 6.727717894402637,
 88.81935275273631,
 7.84669695913838)

In [22]:
np.mean(ankle_results[15]["f1"])*100, np.std(ankle_results[15]["f1"])*100, np.mean(ankle_results[15]["sens"])*100, np.std(ankle_results[15]["sens"])*100, np.mean(ankle_results[15]["spec"])*100, np.std(ankle_results[15]["spec"])*100


(93.65845382414116,
 6.683084598630604,
 94.66274881497915,
 7.183698432911247,
 93.16591856203871,
 8.654082508892845)

In [23]:
np.mean(trunk_results[15]["f1"])*100, np.std(trunk_results[15]["f1"])*100, np.mean(trunk_results[15]["sens"])*100, np.std(trunk_results[15]["sens"])*100, np.mean(trunk_results[15]["spec"])*100, np.std(trunk_results[15]["spec"])*100

(94.40889112982632,
 4.5255544249623565,
 94.56866184300262,
 5.928426645027572,
 95.10005899563274,
 8.397945489664641)

In [None]:
from scipy.interpolate import interp1d

In [1]:
def hl_envelopes_idx(s, dmin=1, dmax=1, split=False, plot = True):
    """
    Compute high and low envelopes of a signal s
    Parameters
    ----------
    s: 1d-array, data signal from which to extract high and low envelopes
    dmin, dmax: int, optional, size of chunks, use this if the size of the input signal is too big
    split: bool, optional, if True, split the signal in half along its mean, might help to generate the envelope in some cases
    resample: bool, optional, if True, resample the signal to the original size

    Returns
    -------
    lmin,lmax : high/low envelope idx of input signal s
    """

    # locals min      
    lmin = (np.diff(np.sign(np.diff(s))) > 0).nonzero()[0] + 1 
    # locals max
    lmax = (np.diff(np.sign(np.diff(s))) < 0).nonzero()[0] + 1 
    
    if split:
        # s_mid is zero if s centered around x-axis or more generally mean of signal
        s_mid = np.mean(s) 
        # pre-sorting of locals min based on relative position with respect to s_mid 
        lmin = lmin[s[lmin]<s_mid]
        # pre-sorting of local max based on relative position with respect to s_mid 
        lmax = lmax[s[lmax]>s_mid]

    # global min of dmin-chunks of locals min 
    lmin = lmin[[i+np.argmin(s[lmin[i:i+dmin]]) for i in range(0,len(lmin),dmin)]]
    # global max of dmax-chunks of locals max 
    lmax = lmax[[i+np.argmax(s[lmax[i:i+dmax]]) for i in range(0,len(lmax),dmax)]]
    
    return lmin,lmax

def detect_bursts(acc, resample_envelope = False, plot = False, alfa = 15):
    """
    Detect bursts in acceleration signal

    Parameters
    ----------
    acc : pd.Series
        # Signal magnitude of raw acceleration 

    Returns
    -------
    bursts : pd.Series
        pd.DataFrame with burst start times, end times, and duration
    """

    if envelope:
        lmin, lmax = hl_envelopes_idx(acc.values, dmin=10, dmax=10)
        # adjust shapes
        if len(lmin) > len(lmax):
            lmin = lmin[:-1]
        if len(lmax) > len(lmin):
            lmax = lmax[1:]
        upper_envelope = acc.values[lmax]
        lower_envelope = acc.values[lmin]
        # resample the envelopes to the original size
        if resample_envelope:
            upper_envelope_res = np.interp(np.arange(len(acc)), lmax, upper_envelope)
            lower_envelope_res = np.interp(np.arange(len(acc)), lmin, lower_envelope)
            env_diff = pd.Series(upper_envelope_res - lower_envelope_res, index = acc.index)
        else:
            env_diff = pd.Series(upper_envelope - lower_envelope, index = acc.index[lmax])
        print(len(env_diff))
        th = np.percentile(env_diff.values, 10) * alfa
    else:
        std_acc = acc.resample("1 s").std()
        std_acc.index.round("1 s")
        th = np.percentile(std_acc, 10) * alfa
        env_diff = std_acc
        
    if plot:
        plt.figure()
        plt.subplot(2,1,1)
        plt.plot(acc.values, color = 'k')
        if resample_envelope:
            plt.plot(lower_envelope_res, '-o')
            plt.plot(upper_envelope_res, '-o')
        else:
            plt.plot(lmin, acc.values[lmin], '-o')
            plt.plot(lmax, acc.values[lmax], '-o')
        plt.subplot(2,1,2, sharex = plt.subplot(2,1,1))
        plt.plot(env_diff.values, color = 'b')
        plt.axhline(th, color = 'r')

    bursts1 = (env_diff > th).astype(int)
    start_burst = bursts1.where(bursts1.diff()==1).dropna()
    end_burst = bursts1.where(bursts1.diff()==-1).dropna()
    if bursts1.iloc[0] == 1:
            start_burst = pd.concat([pd.Series(0, index = [bursts1.index[0]]), start_burst])
    if bursts1.iloc[-1] == 1:
        end_burst = pd.concat([end_burst, pd.Series(0, index = [bursts1.index[-1]])])
    bursts_df = pd.DataFrame({"duration": end_burst.index - start_burst.index}, index = start_burst.index)

    start = bursts_df.index
    end = pd.to_datetime((bursts_df.index + bursts_df["duration"]).values)

    end = end.to_series().reset_index(drop = True)
    start = start.to_series().reset_index(drop = True)

    duration_between_bursts = (start.iloc[1:].values - end.iloc[:-1].values)

    # If two bursts are too close to each other (5s), consider them as one burst
    for i in range(len(start)-1):
        if duration_between_bursts[i] < pd.Timedelta("5 s"):
            end[i] = np.nan
            start[i+1] = np.nan
    end.dropna(inplace = True)
    start.dropna(inplace = True)

    # extract amplitude of the bursts
    bursts = pd.DataFrame({"Start": start.reset_index(drop = True), "End": end.reset_index(drop = True)})
    burst_amplitude1 = []
    burst_amplitude2 = []
    for i in range(len(bursts)):
        # peak-to-peak amplitude of bp acceleration
        burst_amplitude1.append(acc.loc[bursts["Start"].iloc[i]:bursts["End"].iloc[i]].max() - acc.loc[bursts["Start"].iloc[i]:bursts["End"].iloc[i]].min())
        # AUC of env_diff
        burst_amplitude2.append(np.trapz(env_diff.loc[bursts["Start"].iloc[i]:bursts["End"].iloc[i]]))
    bursts["duration"] = bursts["End"] - bursts["Start"]
    bursts["peak-to-peak"] = burst_amplitude1
    bursts["AUC"] = burst_amplitude2
    return bursts

In [None]:
subjects = ["158"]

for i, sub in enumerate(subjects):
    locations = comb_location[sub]
    print(sub)

    save_path = "/Users/marcellosicbaldi/Library/CloudStorage/OneDrive-AlmaMaterStudiorumUniversitàdiBologna/General - LG-MIAR (rehab)/SCORING_bursts"

    acc_norm_raw = pd.read_pickle(save_path+ "/" + sub + "/" + locations[1] + "/" + locations[1] + ".pkl")
    acc_norm_raw1 = pd.Series(nk.signal_filter(acc_norm_raw.values, sampling_rate = 50, lowcut=0.1, highcut=5, method='butterworth', order=8), index = acc_norm_raw.index)
    acc_norm_raw2 = pd.Series(nk.signal_filter(acc_norm_raw1.values, sampling_rate = 100, lowcut=0.1, highcut=10, method='butterworth', order=8), index = acc_norm_raw1.index)
    start_sleep, end_sleep = diary_SPT[sub]

    # Split the data according to the sleep midpoint
    sleep_midPoint = start_sleep + (end_sleep - start_sleep) / 2

    # First location
    # loc1_df_1 = acc_norm_raw.loc[sleep_midPoint - pd.Timedelta(hours = 1):sleep_midPoint]
    # loc1_df_2 = acc_norm_raw.loc[sleep_midPoint:sleep_midPoint + pd.Timedelta(hours = 1)]

    # # Second location
    loc1_df_1 = acc_norm_raw.loc[sleep_midPoint - pd.Timedelta(hours = 2):sleep_midPoint - pd.Timedelta(hours = 1)]
    loc1_df_2 = acc_norm_raw.loc[sleep_midPoint + pd.Timedelta(hours = 1):sleep_midPoint + pd.Timedelta(hours = 2)]

    # # Third location
    # loc1_df_1 = acc_norm_raw.loc[sleep_midPoint - pd.Timedelta(hours = 3):sleep_midPoint - pd.Timedelta(hours = 2)]
    # loc1_df_2 = acc_norm_raw.loc[sleep_midPoint + pd.Timedelta(hours = 2):sleep_midPoint + pd.Timedelta(hours = 3)]

    # concatenate the two dataframes
    current_acc_1 = pd.concat([loc1_df_1, loc1_df_2])

    annot_marcello1 = pd.read_csv(f"{path_marcello}/{sub}/{locations[0]}/bursts_ANNOT.csv")
    annot_marcello2 = pd.read_csv(f"{path_marcello}/{sub}/{locations[1]}/bursts_ANNOT.csv")
    annot_marcello3 = pd.read_csv(f"{path_marcello}/{sub}/{locations[2]}/bursts_ANNOT.csv")

    bursts = detect_bursts(current_acc_1, resample_envelope = False, plot = False, alfa = 6)

    # # plot annot1 as axvspans
    # plt.figure(figsize=(20, 12))
    # plt.plot(current_acc_1)
    # for i, row in annot_marcello2.iterrows():
    #     plt.axvspan(pd.to_datetime(row["Start"]), pd.to_datetime(row["End"]), alpha=0.5, color='blue')
    # for i, row in bursts.iterrows():
    #     plt.axvspan(row["start"], row["end"], alpha=0.5, color='red')

    agreement, disagreement = compare_annotations(annot_marcello2, bursts)

158
18200


TypeError: '<=' not supported between instances of 'str' and 'Timestamp'

In [None]:
bursts

Unnamed: 0,start,end,duration,peak-to-peak,AUC
0,2024-02-29 01:15:54.014869928,2024-02-29 01:15:54.444869995,0 days 00:00:00.430000067,0.028314,0.025932
1,2024-02-29 01:18:00.644870043,2024-02-29 01:18:09.494869947,0 days 00:00:08.849999904,0.129128,0.396791
2,2024-02-29 01:18:15.114870071,2024-02-29 01:18:16.464869976,0 days 00:00:01.349999905,0.063694,0.09085
3,2024-02-29 01:18:24.174870014,2024-02-29 01:18:37.784869909,0 days 00:00:13.609999895,0.181678,0.97312
4,2024-02-29 01:21:48.164870024,2024-02-29 01:21:48.814870119,0 days 00:00:00.650000095,0.027165,0.025596
5,2024-02-29 01:24:32.034869909,2024-02-29 01:24:36.734869957,0 days 00:00:04.700000048,0.087386,0.439976
6,2024-02-29 01:29:52.954869986,2024-02-29 01:29:54.684870005,0 days 00:00:01.730000019,0.055484,0.063021
7,2024-02-29 01:30:12.324870110,2024-02-29 01:30:36.414870024,0 days 00:00:24.089999914,0.875546,3.982242
8,2024-02-29 01:32:16.654870033,2024-02-29 01:32:19.424870014,0 days 00:00:02.769999981,0.047771,0.169452
9,2024-02-29 01:40:29.964869976,2024-02-29 01:40:30.724869967,0 days 00:00:00.759999991,0.033871,0.020757


In [None]:
plt.figure()
plt.plot(acc_norm_raw)
plt.plot(acc_norm_raw1)
plt.plot(acc_norm_raw2)

[<matplotlib.lines.Line2D at 0x7f98107acf70>]

In [3]:
diary_SPT = {    
    "158": [pd.Timestamp('2024-02-28 23:00:00'), pd.Timestamp('2024-02-29 07:15:00')], # 158 OK
    "633": [pd.Timestamp('2024-03-07 00:05:00'), pd.Timestamp('2024-03-07 06:36:00')], # 633 OK
    "906": [pd.Timestamp('2024-03-07 00:30:00'), pd.Timestamp('2024-03-07 07:30:00')], # 906 OK
    "958": [pd.Timestamp('2024-03-13 22:00:00'), pd.Timestamp('2024-03-14 06:00:00')], # 958 OK
    "127": [pd.Timestamp('2024-03-13 23:15:00'), pd.Timestamp('2024-03-14 06:50:00')], # 127 OK
    "098": [pd.Timestamp('2024-03-16 02:01:00'), pd.Timestamp('2024-03-16 09:50:00')], # 098 OK
    "547": [pd.Timestamp('2024-03-16 01:04:00'), pd.Timestamp('2024-03-16 07:40:00')], # 547 OK
    "815": [pd.Timestamp('2024-03-20 23:00:00'), pd.Timestamp('2024-03-21 06:25:00')], # 815 OK
    "914": [pd.Timestamp('2024-03-20 21:50:00'), pd.Timestamp('2024-03-21 05:50:00')], # 914 OK
    "971": [pd.Timestamp('2024-03-20 23:50:00'), pd.Timestamp('2024-03-21 07:50:00')], # 971 OK
    "279": [pd.Timestamp('2024-03-28 00:10:00'), pd.Timestamp('2024-03-28 07:27:00')], # 279 OK
    "965": [pd.Timestamp('2024-03-28 01:25:00'), pd.Timestamp('2024-03-28 09:20:00')], # 965 OK
}

diary_TIB = {
    "158": [pd.Timestamp('2024-02-28 22:15:00'), pd.Timestamp('2024-02-29 07:45:00')], # 158 OK
    "633": [pd.Timestamp('2024-03-06 23:39:00'), pd.Timestamp('2024-03-07 08:00:00')], # 633 OK 
    "906": [pd.Timestamp('2024-03-07 00:15:00'), pd.Timestamp('2024-03-07 07:35:00')], # 906 OK
    "958": [pd.Timestamp('2024-03-13 21:30:00'), pd.Timestamp('2024-03-14 06:30:00')], # 958 OK
    "127": [pd.Timestamp('2024-03-13 22:00:00'), pd.Timestamp('2024-03-14 07:10:00')], # 127 OK 
    "098": [pd.Timestamp('2024-03-16 01:49:00'), pd.Timestamp('2024-03-16 09:52:00')], # 098 OK 
    "547": [pd.Timestamp('2024-03-16 00:26:00'), pd.Timestamp('2024-03-16 08:20:00')], # 547 OK 
    "815": [pd.Timestamp('2024-03-20 22:00:00'), pd.Timestamp('2024-03-21 07:30:00')], # 815 OK 
    "914": [pd.Timestamp('2024-03-20 21:30:00'), pd.Timestamp('2024-03-21 06:20:00')], # 914 OK 
    "971": [pd.Timestamp('2024-03-20 23:30:00'), pd.Timestamp('2024-03-21 08:08:00')], # 971 OK 
    "279": [pd.Timestamp('2024-03-28 00:04:00'), pd.Timestamp('2024-03-28 07:41:00')], # 279 OK
    "965": [pd.Timestamp('2024-03-28 01:22:00'), pd.Timestamp('2024-03-28 09:22:00')], # 965 OK
}

In [None]:
annot_paola2['Start'] = pd.to_datetime(annot_paola2['Start'])
annot_paola2['End'] = pd.to_datetime(annot_paola2['End'])
bursts['Start'] = pd.to_datetime(bursts['start'])
bursts['End'] = pd.to_datetime(bursts['end'])

# Function to check if two intervals overlap
def is_overlap(start1, end1, start2, end2):
    return (start1 <= end2) and (start2 <= end1)

# Initialize counters for agreement and disagreement
agreement_count = 0
disagreement_count = 0

# Check each burst in algo data against all bursts in marcello rater's data
for i, row_algo in bursts.iterrows():
    overlap_found = False
    for j, row_other in annot_paola2.iterrows():
        if is_overlap(row_algo['Start'], row_algo['End'], row_other['Start'].tz_localize(None), row_other['End'].tz_localize(None)):
            agreement_count += 1
            overlap_found = True
            break
    if not overlap_found:
        disagreement_count += 1

# Check each burst in the marcello rater's data against all bursts in algo data
for j, row_other in annot_paola2.iterrows():
    overlap_found = False
    for i, row_algo in bursts.iterrows():
        if is_overlap(row_algo['Start'], row_algo['End'], row_other['Start'].tz_localize(None), row_other['End'].tz_localize(None)):
            overlap_found = True
            break
    if not overlap_found:
        disagreement_count += 1


agreement_count, disagreement_count

(43, 3)

In [None]:
# plot annot1 as axvspans
fig, ax = plt.subplots()
plt.plot(current_acc_1)
for i, row in annot_paola2.iterrows():
    ax.axvspan(pd.to_datetime(row["Start"].tz_localize(None)), pd.to_datetime(row["End"].tz_localize(None)), alpha=0.5, color='red')
for i, row in bursts.iterrows():
    ax.axvspan(pd.to_datetime(row["Start"]), pd.to_datetime(row["End"]), alpha=0.5, color='blue')

In [None]:
row_algo["Start"], row_other['Start']

(Timestamp('2024-03-07 00:29:03.959481001'),
 Timestamp('2024-03-07 03:09:37.218347+0000', tz='UTC'))