In [1]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt

import pickle

import neurokit2 as nk

import seaborn as sns

%matplotlib qt
mpl.rcParams['lines.linewidth'] = 0.91
plt.style.use('ggplot')
plt.style.use('seaborn-v0_8-whitegrid')

# sns.set_context("talk")
sns.set_palette("Set1")

In [3]:
import pyreadr
from functions.bursts import characterize_bursts, filter_bursts, compute_envelope
from sleep_diary import diary_SPT, diary_TIB

subjects = ["158", "098", "633", "279", "906", "547", "971", "958", "815"]

In [5]:
part3_outputFolder = "/Volumes/Untitled/rehab/GGIR/GGIR_output_lw_TIB/output_lw_data/meta/ms3.out/"
SIB_GGIR = {sub: pyreadr.read_r(part3_outputFolder + "LW_" + sub + ".CWA.RData")['sib.cla.sum'][["sib.onset.time", "sib.end.time"]] for sub in subjects}

SIB = {sub: 0 for sub in subjects}

bursts_df = pd.DataFrame()

for i, sub in enumerate(subjects):
    SIB_GGIR[sub]["sib.onset.time"] = pd.to_datetime(SIB_GGIR[sub]["sib.onset.time"].values).tz_localize(None)
    SIB_GGIR[sub]["sib.end.time"] = pd.to_datetime(SIB_GGIR[sub]["sib.end.time"].values).tz_localize(None)
    SIB_GGIR[sub]["sib.duration"] = SIB_GGIR[sub]["sib.end.time"] - SIB_GGIR[sub]["sib.onset.time"]

    with open(f'/Volumes/Untitled/rehab/data/{sub}/bursts_FINAL_envInterp_p2p.pkl', 'rb') as f:
        bursts = pickle.load(f)

    df_merged_intervals = characterize_bursts(bursts)
    spt_start = diary_SPT[sub][0] - pd.Timedelta('10 min')
    spt_end = diary_TIB[sub][1] + pd.Timedelta('5 min')

    SIB[sub] = SIB_GGIR[sub][(SIB_GGIR[sub]["sib.onset.time"] >= spt_start) & (SIB_GGIR[sub]["sib.end.time"] <= spt_end)].reset_index(drop=True)
    SIB[sub] = SIB_GGIR[sub][(SIB_GGIR[sub]["sib.onset.time"] >= spt_start) & (SIB_GGIR[sub]["sib.end.time"] <= spt_end)].reset_index(drop=True)

    # Take df_merged_intervals between spt_start and spt_end
    df_merged_intervals = df_merged_intervals[(df_merged_intervals["Start"] >= spt_start) & (df_merged_intervals["End"] <= spt_end)].reset_index(drop=True) 

    SIB[sub]["awake.duration"] = SIB[sub]["sib.onset.time"].shift(-1) - SIB[sub]["sib.end.time"]
    SIB[sub]["sub_ID"] = sub

    df_merged_intervals["SIB"] = 0
    for i, row in SIB[sub].iterrows():
        df_merged_intervals.loc[(df_merged_intervals["Start"] >= row["sib.onset.time"] + pd.Timedelta("5s")) & (df_merged_intervals["End"] <= row["sib.end.time"] - pd.Timedelta("5s")), "SIB"] = 1

    df_merged_intervals["sub_ID"] = sub

    start_sleep = diary_SPT[sub][0]
    end_sleep = diary_SPT[sub][1]

    df_merged_intervals = df_merged_intervals.loc[(df_merged_intervals["Start"] >= start_sleep) & (df_merged_intervals["End"] <= end_sleep)]

    bursts_df = pd.concat([bursts_df, df_merged_intervals])

bursts_df.reset_index(drop=True, inplace=True)

In [6]:
bursts_df.head()

Unnamed: 0,Start,End,AUC,p2p,PC,transition,Limbs,SIB,sub_ID
0,2024-02-28 23:00:01.360290051,2024-02-28 23:00:02.460289955,4478.029974,59.446242,0.0,,"{LL, T, RL}",1,158
1,2024-02-28 23:00:30.130290031,2024-02-28 23:00:32.030289888,5565.732278,48.247713,0.0,,{LL},1,158
2,2024-02-28 23:00:43.494869947,2024-02-28 23:00:43.874870062,593.812924,16.318746,0.0,,{T},1,158
3,2024-02-28 23:02:28.765630007,2024-02-28 23:02:31.105629921,6965.555964,44.916305,0.0,,"{LW, T}",1,158
4,2024-02-28 23:03:30.215630054,2024-02-28 23:03:32.105629921,6534.732075,50.164798,0.0,,"{LW, T}",1,158


In [7]:
def is_overlap(start1, end1, start2, end2):
    return (start1 <= end2) and (start2 <= end1)

In [6]:
artifacts_discarded = {}
artifacts_interpolated = {}

for i, sub in enumerate(subjects):

    print(sub)

    start_sleep, end_sleep = diary_SPT[sub]
    
    ## ECG processing ##
    ecg_df = pd.read_pickle(f'/Volumes/Untitled/rehab/data/{sub}/polar_processed/ecg.pkl')

    ecg_df = ecg_df.loc[start_sleep:end_sleep]
    ecg_filtered = nk.ecg_clean(ecg_df.values, sampling_rate=130)

    # Extract peaks
    _, results = nk.ecg_peaks(ecg_filtered, sampling_rate=130, method = 'neurokit')
    rpeaks = results["ECG_R_Peaks"]
    _, rpeaks_corrected = nk.signal_fixpeaks(rpeaks, sampling_rate=130, iterative=True, method="Kubios")

    t_rpeaks = ecg_df.index.to_series().values[rpeaks]
    t_rpeaks_corrected = ecg_df.index.to_series().values[rpeaks_corrected]
    rr = np.diff(t_rpeaks).astype('timedelta64[ns]').astype('float64') / 1000000000
    rr_corrected = np.diff(t_rpeaks_corrected).astype('timedelta64[ns]').astype('float64') / 1000000000
    hr_ecg = 60/rr
    hr_ecg_corrected = 60/rr_corrected
    hr_df = pd.Series(hr_ecg_corrected, index = t_rpeaks_corrected[1:]).resample("1 s").mean()#.rolling('10s', min_periods=1, center=True).mean()
    hr_df = hr_df.interpolate(method = 'cubic')
    hr_df_noncorrected = pd.Series(hr_ecg, index = t_rpeaks[1:]).resample("1 s").mean()
    hr_df_noncorrected = hr_df_noncorrected.interpolate(method = 'linear')

    artifacts_ecg = pd.read_csv(f'/Volumes/Untitled/rehab/data/{sub}/polar_processed/artifacts_ecg.csv')
    artifacts_ecg['Start'] = pd.to_datetime(artifacts_ecg['Start']).apply(lambda x: x.replace(tzinfo=None))
    artifacts_ecg['End'] = pd.to_datetime(artifacts_ecg['End']).apply(lambda x: x.replace(tzinfo=None))

    for i in range(len(artifacts_ecg)):
        hr_df.loc[artifacts_ecg["Start"].iloc[i]:artifacts_ecg["End"].iloc[i]] = np.nan

    # hr_df.interpolate(method = 'cubic', inplace = True)

    # for i in range(len(artifacts_ecg)):
    #     if artifacts_ecg["End"].iloc[i] - artifacts_ecg["Start"].iloc[i] > pd.Timedelta("10 s"):
    #         hr_df.loc[artifacts_ecg["Start"].iloc[i]:artifacts_ecg["End"].iloc[i]] = np.nan

    bursts_df_filtered = filter_bursts(bursts_df.loc[bursts_df["sub_ID"] == sub].reset_index(drop=True)).reset_index(drop=True)

    artifacts_interpolated[sub] = []
    artifacts_discarded[sub] = []
    bursts_discarded_due_to_artifacts = 0
    bursts_interpolated_due_to_artifacts = 0

    for _, row_burst in bursts_df_filtered.iterrows():
        for _, row_artifact in artifacts_ecg.iterrows():
            if is_overlap(row_burst["Start"]-pd.Timedelta("19 s"), row_burst["Start"]+pd.Timedelta("50 s"), row_artifact["Start"], row_artifact["End"]):
                hr_df_loc = hr_df.loc[row_burst["Start"]-pd.Timedelta("19 s"):row_burst["Start"]+pd.Timedelta("50 s")]
                if row_artifact["End"] - row_artifact["Start"] > pd.Timedelta("10 s"):
                    hr_df_loc.loc[row_artifact["Start"]:row_artifact["End"]] = np.nan
                    print("here!")
                else:
                    hr_df_loc.loc[row_artifact["Start"]:row_artifact["End"]] = np.inf
                    print("here2!")
                artifacts_discarded[sub].append(hr_df_loc.isna().sum())
                artifacts_interpolated[sub].append(np.isinf(hr_df_loc).sum())

158
here2!
098
here2!
here2!
633
here!
here!
here!
here!
here!
here!
here!
here!
here!
here!
here!
here!
here!
here!
here!
here!
here!
here!
here!
here!
here2!
here!
here!
279
here!
here!
here!
here!
here!
here2!
here2!
here!
here2!
here2!
906
here2!
here!
here2!
here2!
here!
here!
here2!
here2!
here!
here!
547
here2!
here!
here!
here!
971
here2!
here2!
here2!
here!
here!
here!
here!
here!
here!
here!
here!
here!
here!
here!
here!
here!
here!
here!
here!
here!
here!
here!
here!
here2!
here2!
here2!
here2!
here2!
here2!
here2!
here2!
here!
here!
here!
here2!
here2!
958
here2!
here2!
here2!
here!
here!
here!
here2!
here2!
here!
here2!
here!
here2!
here2!
here!
here!
here!
here!
here!
here!
here!
here!
here!
here2!
here!
here!
here2!
here!
here!
here!
here!
here!
here2!
here2!
here!
here2!
here2!
here2!
here!
here!
here!
here2!
here2!
here!
here!
here!
here!
here!
here2!
here!
here!
here2!
here2!
here2!
here2!
here2!
here2!
815
here!
here!
here!
here!
here!
here!
here!
here2!
here2!
here2

In [10]:
artifacts_discarded["633"].sum()

AttributeError: 'list' object has no attribute 'sum'

In [7]:
total_seconds = 0
perc_discarded = []
perc_interpolated = []
for i, sub in enumerate(subjects):
    bursts_df_filtered = filter_bursts(bursts_df.loc[bursts_df["sub_ID"] == sub].reset_index(drop=True)).reset_index(drop=True)
    total_seconds += 70*len(bursts_df_filtered)
    perc_discarded.append(sum(artifacts_discarded[sub])/total_seconds)
    perc_interpolated.append(sum(artifacts_interpolated[sub])/total_seconds)


In [8]:
np.mean(perc_discarded)*100, np.std(perc_discarded)*100, np.mean(perc_interpolated)*100, np.std(perc_interpolated)*100

(1.5160706700412319,
 1.7858203387195137,
 0.13197462515033362,
 0.10374187492330982)

0.0024152106885919834

In [39]:
total_seconds

58380