In [1]:
#system imports
import os
import sys

# data science
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse
import seaborn as sns

# signal processing
from scipy import signal
from scipy.ndimage import label
from scipy.stats import zscore
from scipy.interpolate import interp1d
from scipy.integrate import trapz


# misc
import warnings

import glob

##
import pytz
import datetime as dt
import math
import seaborn as sns


import pickle

In [2]:

import biosppy
from biosppy import storage
from biosppy.signals import ecg


# misc
import warnings

#signal processing
from scipy import signal
from scipy.ndimage import label
from scipy.stats import zscore
from scipy.interpolate import interp1d
from scipy.integrate import trapz

## Read and manipulate Psychopy dataframe

In [3]:
def read_manipulate_psychopy(psychopy_path):
    
    psychopy_df = pd.read_csv(psychopy_path)
    col_list = [col for col in psychopy_df.columns if col.endswith('_ts')]
    #col_list.insert(0, "Reference_time")
    psychopy_df_selected = psychopy_df[col_list]
    
    return psychopy_df_selected


def get_nonNan_list_psychopy(psychopy_df, col_name):
    
    
    selected_vals=[val for val in psychopy_df[col_name].to_list() if not(math.isnan(val))]
    
    return selected_vals

## Read, manipulate and slice Shimmer dataframe

In [4]:
#######---------------------------------------------------
###### ------------------Shimmer--------------------------
###----------------------------------------------------------


def read_shimmer_sensor(sensor_file_path):
    
    shimmer_df = pd.read_csv(sensor_file_path, sep='\t', low_memory=False)
    shimmer_df = shimmer_df.reset_index()
    shimmer_df.columns = shimmer_df.iloc[0]
    shimmer_df.drop([0, 1], axis=0, inplace=True)
    shimmer_df=shimmer_df.reset_index(drop=True)
    
    return shimmer_df

def standardize_timestamps_shimmer(shimmer_df, timestamps_col_name):
    
    timesstamps_list = shimmer_df[timestamps_col_name].to_list()
    new_timestamps_list = [float(val)/1000 for val in  timesstamps_list]
    
    shimmer_df[timestamps_col_name] = new_timestamps_list
    
    return shimmer_df



def get_offset_timestamp(timestamp, offset_mins):
    
    time_zone = 'Europe/Berlin'
    tz = pytz.timezone(time_zone)
    local_time = dt.datetime.fromtimestamp(timestamp, tz)
    time_change = dt.timedelta(minutes=offset_mins)
    new_time = local_time + time_change
    new_timestamp =  dt.datetime.timestamp(new_time)
    return new_timestamp

def get_list_timestamp_interest(starting_timestamp, list_offset_mins):
    
    starting_timestamp_list=[]
    
    for offset_min in list_offset_mins:
        starting_timestamp_list.append(starting_timestamp)
        timestamp_offset = get_offset_timestamp(starting_timestamp, offset_min)
        starting_timestamp = timestamp_offset  
     
    
    #starting_timestamp_list = sorted(starting_timestamp_list, key = lambda x:float(x))
    return starting_timestamp_list
        
        

def slice_df_wrt_timestamps(df, start_timestamp, end_timestamp, timestamps_col):
    
    sliced_df=df[(df[timestamps_col]>= start_timestamp) & (df[timestamps_col] <= end_timestamp)]
    
    return sliced_df


def from_str_to_float(str_list):
    #float(recovery_onset_str_timestamps[0][1:-1])
    
    float_array =[float(val[1:-1]) for val in str_list]
    
    return float_array

def col_from_str_float (df, col_name):
    
    str_list = df[col_name].values
    
    float_array =[float(val) for val in str_list]
    
    df[col_name] = float_array
    
    return df

## Time Domain HRV

In [5]:
def detect_peaks(ecg_signal, threshold=0.3, qrs_filter=None):
    '''
    Peak detection algorithm using cross corrrelation and threshold 
    '''
    if qrs_filter is None:
        # create default qrs filter, which is just a part of the sine function
        t = np.linspace(1.5 * np.pi, 3.5 * np.pi, 15)
        qrs_filter = np.sin(t)
    
    # normalize data
    ecg_signal = (ecg_signal - ecg_signal.mean()) / ecg_signal.std()

    # calculate cross correlation
    similarity = np.correlate(ecg_signal, qrs_filter, mode="same")
    similarity = similarity / np.max(similarity)

    # return peaks (values in ms) using threshold
    return ecg_signal[similarity > threshold].index, similarity



def group_peaks(p, threshold=5):
    '''
    The peak detection algorithm finds multiple peaks for each QRS complex. 
    Here we group collections of peaks that are very near (within threshold) and we take the median index 
    '''
    # initialize output
    output = np.empty(0)

    # label groups of sample that belong to the same peak
    peak_groups, num_groups = label(np.diff(p) < threshold)

    # iterate through groups and take the mean as peak index
    for i in np.unique(peak_groups)[1:]:
        peak_group = p[np.where(peak_groups == i)]
        output = np.append(output, int(np.median(peak_group)))
        
        
    #output = int(output)
    return output

def from_rr_ind_2_msec(r_peaks_ind, sampling_freq = 256):
    
    # RR-intervals are the differences between successive peaks
    r_peaks_sec = r_peaks_ind*1/sampling_freq
    r_peaks_msec = r_peaks_sec*1000

    r_peaks_diff_msec=np.diff(r_peaks_msec)
    
    return r_peaks_diff_msec
    
    
    
    
    

def remove_outliers_rri(r_peaks_diff_msec, sampling_freq = 256, outlier_std = 2):
    
    # RR-intervals are the differences between successive peaks
    #r_peaks_sec = r_peaks*1/sampling_freq
    #r_peaks_msec = r_peaks_sec*1000

    #r_peaks_diff=np.diff(r_peaks_msec)
    
    rr_corrected = r_peaks_diff_msec.copy()

    rr_corrected[np.abs(zscore(r_peaks_diff_msec)) > outlier_std] = np.median(r_peaks_diff_msec)
    
    return rr_corrected
    


def get_plot_ranges(start=10, end=20, n=5):
    '''
    Make an iterator that divides into n or n+1 ranges. 
    - if end-start is divisible by steps, return n ranges
    - if end-start is not divisible by steps, return n+1 ranges, where the last range is smaller and ends at n
    
    # Example:
    >> list(get_plot_ranges())
    >> [(0.0, 3.0), (3.0, 6.0), (6.0, 9.0)]

    '''
    distance = end - start
    for i in np.arange(start, end, np.floor(distance/n)):
        yield (int(i), int(np.minimum(end, np.floor(distance/n) + i)))
        
        
        
def timedomain(rr):
    results = {}

    hr = 60000/rr
    
    results['Mean RR (ms)'] = np.mean(rr)
    results['STD RR/SDNN (ms)'] = np.std(rr)
    #results['Mean HR (Kubios\' style) (beats/min)'] = 60000/np.mean(rr)
    results['Mean HR (beats/min)'] = np.mean(hr)
    results['STD HR (beats/min)'] = np.std(hr)
    results['Min HR (beats/min)'] = np.min(hr)
    results['Max HR (beats/min)'] = np.max(hr)
    results['RMSSD (ms)'] = np.sqrt(np.mean(np.square(np.diff(rr))))
    results['NNxx'] = np.sum(np.abs(np.diff(rr)) > 50)*1
    results['pNNxx (%)'] = 100 * np.sum((np.abs(np.diff(rr)) > 50)*1) / len(rr)
    return results

## Plot raw and filtered ECG

In [39]:
def plot_ecg_wf(sliced_ecg_df,r_peaks, time_offset, raw_data_col_name="raw_ecg", filtered_data_col_name="filtered", nr_plots=3):    
    starting_index_slcide_df= sliced_ecg_df.index[0]
    sampfrom = starting_index_slcide_df
    sampto = starting_index_slcide_df+256*time_offset*60
    
    rr_interval_cumm = []
    for start, stop in get_plot_ranges(sampfrom, sampto, nr_plots):
        print(start)
        print(stop)
        # get sliced data of ECG
        #cond_slice = (sliced_ecg_df.index>= start) & (sliced_ecg_df.index <= stop)
        #sliced_ecg_hrv =sliced_ecg_df.filtered[cond_slice]
        
        sliced_ecg_hrv = sliced_ecg_df[filtered_data_col_name][(sliced_ecg_df.index >= start) & (sliced_ecg_df.index<= stop)]
        
        sliced_ecg_raw = sliced_ecg_df[raw_data_col_name][(sliced_ecg_df.index >= start) & (sliced_ecg_df.index<= stop)]
        
        
        r_peaks_selected = [r_p for r_p in r_peaks if r_p >= start and r_p <= stop]
        
        plt.figure(figsize=(20, 15))

        plt.subplot(211)
        plt.title("Raw ECG")
        plt.plot(sliced_ecg_raw, label="ECG", color="#51A6D8", linewidth=1)
        #plt.plot(sliced_ecg_hrv.index[start: stop], sliced_ecg_hrv, label="ECG", color="#51A6D8", linewidth=1)
        #plt.plot(r_peaks_selected, np.repeat(0.3, len(r_peaks_selected)), label="peaks", color="orange", marker="o", linestyle="None")
        plt.legend(loc="upper right")
        plt.xlabel("Time (milliseconds)")
        plt.ylabel("Amplitude (arbitrary unit)")

        plt.subplot(212)
        plt.title('Filtered ECG')
        
        #plt.plot(ecg_slice.index, similarity, label="Similarity with QRS filter", color="olive", linewidth=1)
        plt.plot(sliced_ecg_hrv, label="Filtered ECG", color="olive", linewidth=1)
        plt.plot(r_peaks_selected, np.repeat(0.5, len(r_peaks_selected)), label="peaks", color="orange", marker="o", linestyle="None")
        plt.legend(loc="upper right")
        plt.xlabel("Time (milliseconds)")
        plt.ylabel("Similarity (normalized)")
        
        

## --------------psyschopy related--------

In [40]:
events_timestamps_path = "/home/muhammad/Desktop/Datasets/dataset_sony_sam_2024/WP2/WP2_01_IGFW/preAnalysis/psychopy_events_triggers_WP2_01_IGFW.csv"
event_timestamps_df = pd.read_csv(events_timestamps_path)
psychopy_path = "/home/muhammad/Desktop/Datasets/dataset_sony_sam_2024/WP2/WP2_01_IGFW/Psychopy/WP2_01_IGFW_SAM_Experiment_2023-09-18_11h14.57.361.csv"
psychopy_df = pd.read_csv(psychopy_path)
block_1_type = psychopy_df["block1"][0]
block_2_type = psychopy_df["block2"][0]


## ---- Extract Timestamps---------

In [41]:
baseline_onset = event_timestamps_df["baseline_onset"].values
baseline_onset_timestamps = baseline_onset[1:]
task_onset =event_timestamps_df["task_onset"].values
task_onset_str_timestamps = task_onset[1:]
task_onset_float_timestamp = from_str_to_float(task_onset_str_timestamps)
recovery_onset =event_timestamps_df["recovery_onset"].values
recovery_onset_str_timestamps = recovery_onset[1:]
recovery_onset_list_timestamps=from_str_to_float(recovery_onset_str_timestamps)


## ECG data load and manipulate

In [42]:
sampling_freq_shimmer = 256
shimmer_file_path = "/home/muhammad/Desktop/Datasets/dataset_sony_sam_2024/WP2/WP2_01_IGFW/Shimmer/2023-09-18_08.34.52_WP2_01_IGFW_SD_Session1/WP2_01_IGFW_Session1_Shimmer_8943_Calibrated_SD.csv"
shimmer_df=read_shimmer_sensor(shimmer_file_path)
shimmer_timestamp_col_name = "Shimmer_8943_Timestamp_Unix_CAL"
ecg_col_name = "Shimmer_8943_ECG_LA-RA_24BIT_CAL"

In [None]:
#shimmer_df

In [81]:
selected_ecg_df = shimmer_df[[shimmer_timestamp_col_name, ecg_col_name]]
selected_ecg_df = selected_ecg_df.copy()
selected_ecg_df=standardize_timestamps_shimmer(selected_ecg_df, shimmer_timestamp_col_name)
selected_ecg_df['ecg'] = selected_ecg_df[ecg_col_name]
selected_ecg_df=col_from_str_float(selected_ecg_df, "ecg")

In [46]:
#ecg_array = selected_ecg_df["ecg"].values
#out = ecg.ecg(signal=ecg_array, sampling_rate=sampling_freq_shimmer, show=False, interactive=False)
#selected_ecg_df["filtered"] = out["filtered"]
#r_peaks_ind = out["rpeaks"]
#rr_diff_msec= from_rr_ind_2_msec(r_peaks_ind)

In [70]:
#r_peaks_ind = out["rpeaks"]
#rr_diff_msec= from_rr_ind_2_msec(r_peaks_ind)
#rr_corrected_msec=remove_outliers_rri(rr_diff_msec)
###---------------------------------------------------------
#ecg_df= pd.DataFrame()
#ecg_df["raw_ecg"] = ecg_array
#ecg_df["filtered"] = out["filtered"]
# rr inervals in msec 
#rr_corrected_msec_clean_array = rr_corrected_msec
#plot_ecg_wf(ecg_df,r_peaks_ind, 30, nr_plots=40)

In [61]:
baseline_onset_timestamps

array([1.69502872e+09, 1.69502967e+09, 1.69503100e+09])

In [84]:
time_zone = 'Europe/Berlin'
tz = pytz.timezone(time_zone)

for ind_timestamp, sel_baseline in enumerate(baseline_onset_timestamps):
    
    start_timestamp_baseline = sel_baseline
    stop_timestamp_baseline = get_offset_timestamp(start_timestamp_baseline, 5)
    
    
    sliced_ecg_df=slice_df_wrt_timestamps(selected_ecg_df, start_timestamp_baseline, stop_timestamp_baseline, shimmer_timestamp_col_name)
    #sliced_ecg_df=sliced_ecg_df.reset_index(drop=True)
    
    ####------------------for debugging purposes---------------------------
    start_time = dt.datetime.fromtimestamp(start_timestamp_baseline, tz)
    end_time  = dt.datetime.fromtimestamp(stop_timestamp_baseline, tz)

    print("baseline start time: "+str(start_time))
    print("baseline stop time: "+str(end_time))
    
    time_stamps_shimmer= sliced_ecg_df[shimmer_timestamp_col_name].values
    
    start_timestamp_shimmer = time_stamps_shimmer[0]
    end_timestamp_shimmer =  time_stamps_shimmer[-1]
    
    tart_time = dt.datetime.fromtimestamp(start_timestamp_shimmer, tz)
    end_time  = dt.datetime.fromtimestamp(end_timestamp_shimmer, tz)

    print("shimmer_baseline start time: "+str(start_time))
    print("shimmer_baseline stop time: "+str(end_time))
    
    
    
    
    
    
    
    
    start_timestamp_task= task_onset_float_timestamp[ind_timestamp]
    stop_timestamp_task = get_offset_timestamp(start_timestamp_task, 5)
    
    start_time = dt.datetime.fromtimestamp(start_timestamp_task, tz)
    end_time  = dt.datetime.fromtimestamp(stop_timestamp_task, tz)

    print("task start time: "+str(start_time))
    print("task stop time: "+str(end_time))
    
    start_timestamp_recovery= recovery_onset_list_timestamps[ind_timestamp]
    stop_timestamp_recovery =get_offset_timestamp(start_timestamp_recovery, 5)
    
    start_time = dt.datetime.fromtimestamp(start_timestamp_recovery, tz)
    end_time  = dt.datetime.fromtimestamp(stop_timestamp_recovery, tz)

    print("recovery start time: "+str(start_time))
    print("recovery stop time: "+str(end_time))
    

baseline start time: 2023-09-18 11:18:38.760779+02:00
baseline stop time: 2023-09-18 11:23:38.760779+02:00
shimmer_baseline start time: 2023-09-18 11:18:38.760779+02:00
shimmer_baseline stop time: 2023-09-18 11:23:38.757721+02:00
task start time: 2023-09-18 11:23:39.040433+02:00
task stop time: 2023-09-18 11:28:39.040433+02:00
recovery start time: 2023-09-18 11:28:40.057839+02:00
recovery stop time: 2023-09-18 11:33:40.057839+02:00
baseline start time: 2023-09-18 11:34:25.806835+02:00
baseline stop time: 2023-09-18 11:39:25.806835+02:00
shimmer_baseline start time: 2023-09-18 11:34:25.806835+02:00
shimmer_baseline stop time: 2023-09-18 11:39:25.804596+02:00
task start time: 2023-09-18 11:39:26.194674+02:00
task stop time: 2023-09-18 11:44:26.194674+02:00
recovery start time: 2023-09-18 11:44:28.633173+02:00
recovery stop time: 2023-09-18 11:49:28.633173+02:00
baseline start time: 2023-09-18 11:56:44.448840+02:00
baseline stop time: 2023-09-18 12:01:44.448840+02:00
shimmer_baseline star

In [83]:
sliced_ecg_df[""]

Unnamed: 0,Shimmer_8943_Timestamp_Unix_CAL,Shimmer_8943_ECG_LA-RA_24BIT_CAL,ecg
705411,1.695031e+09,-3.2906214345242306,-3.290621
705412,1.695031e+09,-3.288097177517078,-3.288097
705413,1.695031e+09,-3.292568718501177,-3.292569
705414,1.695031e+09,-3.3536557380742718,-3.353656
705415,1.695031e+09,-3.3591369818612318,-3.359137
...,...,...,...
782091,1.695031e+09,-3.5852382880733358,-3.585238
782092,1.695031e+09,-3.5847334366719052,-3.584733
782093,1.695031e+09,-3.5912243832617263,-3.591224
782094,1.695031e+09,-3.5716073002347115,-3.571607


In [78]:
shimmer_timestamps=selected_ecg_df["Shimmer_8943_Timestamp_Unix_CAL"].values

In [79]:
shimmer_timestamps

array(['1.6950282459061584E12', '1.695028245913971E12',
       '1.6950282459178772E12', ..., '1.6950322083475647E12',
       '1.695032208351471E12', '1.6950322083553772E12'], dtype=object)