In [1]:
# system imports
import os
import sys

# data science
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse
import seaborn as sns

# signal processing
from scipy import signal
from scipy.ndimage import label
from scipy.stats import zscore
from scipy.interpolate import interp1d
from scipy.integrate import trapz


# misc
import warnings



##
import pytz
import datetime as dt
import math
import seaborn as sns


In [2]:
# style settings
sns.set(style='whitegrid', rc={'axes.facecolor': '#EFF2F7'})

## psychopy  and shimmer related 

In [3]:
def read_manipulate_psychopy(psychopy_path):
    
    psychopy_df = pd.read_csv(psychopy_path)
    col_list = [col for col in psychopy_df.columns if col.endswith('_ts')]
    col_list.insert(0, "Reference_time")
    psychopy_df_selected = psychopy_df[col_list]
    
    return psychopy_df_selected


def get_nonNan_list_psychopy(psychopy_df, col_name):
    
    
    selected_vals=[val for val in psychopy_df[col_name].to_list() if not(math.isnan(val))]
    
    return selected_vals



#######---------------------------------------------------
###### ------------------Shimmer--------------------------
###----------------------------------------------------------


def read_shimmer_sensor(sensor_file_path):
    
    shimmer_df = pd.read_csv(sensor_file_path, sep='\t', low_memory=False)
    shimmer_df = shimmer_df.reset_index()
    shimmer_df.columns = shimmer_df.iloc[0]
    shimmer_df.drop([0, 1], axis=0, inplace=True)
    shimmer_df=shimmer_df.reset_index(drop=True)
    
    return shimmer_df

def standardize_timestamps_shimmer(shimmer_df, timestamps_col_name):
    
    timesstamps_list = shimmer_df[timestamps_col_name].to_list()
    new_timestamps_list = [float(val)/1000 for val in  timesstamps_list]
    
    shimmer_df[timestamps_col_name] = new_timestamps_list
    
    return shimmer_df



def get_offset_timestamp(timestamp, offset_mins):
    
    time_zone = 'Europe/Berlin'
    tz = pytz.timezone(time_zone)
    local_time = dt.datetime.fromtimestamp(timestamp, tz)
    time_change = dt.timedelta(minutes=offset_mins)
    new_time = local_time + time_change
    new_timestamp =  dt.datetime.timestamp(new_time)
    return new_timestamp

def get_list_timestamp_interest(starting_timestamp, list_offset_mins):
    
    starting_timestamp_list=[]
    
    for offset_min in list_offset_mins:
        starting_timestamp_list.append(starting_timestamp)
        timestamp_offset = get_offset_timestamp(starting_timestamp, offset_min)
        starting_timestamp = timestamp_offset  
     
    
    #starting_timestamp_list = sorted(starting_timestamp_list, key = lambda x:float(x))
    return starting_timestamp_list
        
        

def slice_df_wrt_timestamps(df, start_timestamp, end_timestamp, timestamps_col):
    
    sliced_df=df[(df[timestamps_col]>= start_timestamp) & (df[timestamps_col] <= end_timestamp)]
    
    return sliced_df


def from_str_to_float(str_list):
    
    float_array =[float(val) for val in str_list]
    
    return float_array

def col_from_str_float (df, col_name):
    
    str_list = df[col_name].values
    
    float_array =[float(val) for val in str_list]
    
    df[col_name] = float_array
    
    return df
    

## HRV related functions--------------------

In [84]:
def detect_peaks(ecg_signal, threshold=0.3, qrs_filter=None):
    '''
    Peak detection algorithm using cross corrrelation and threshold 
    '''
    if qrs_filter is None:
        # create default qrs filter, which is just a part of the sine function
        t = np.linspace(1.5 * np.pi, 3.5 * np.pi, 15)
        qrs_filter = np.sin(t)
    
    # normalize data
    ecg_signal = (ecg_signal - ecg_signal.mean()) / ecg_signal.std()

    # calculate cross correlation
    similarity = np.correlate(ecg_signal, qrs_filter, mode="same")
    similarity = similarity / np.max(similarity)

    # return peaks (values in ms) using threshold
    return ecg_signal[similarity > threshold].index, similarity



def group_peaks(p, threshold=5):
    '''
    The peak detection algorithm finds multiple peaks for each QRS complex. 
    Here we group collections of peaks that are very near (within threshold) and we take the median index 
    '''
    # initialize output
    output = np.empty(0)

    # label groups of sample that belong to the same peak
    peak_groups, num_groups = label(np.diff(p) < threshold)

    # iterate through groups and take the mean as peak index
    for i in np.unique(peak_groups)[1:]:
        peak_group = p[np.where(peak_groups == i)]
        output = np.append(output, np.median(peak_group))
    return output

def group_peaks_from_ind_to_msec(grouped_peaks_ind, sampling_freq):
    
    
    #grouped_peak_ascending = sorted(grouped_peaks_ind, key = lambda x:float(x))
    
    #grouped_peak_ascending_np=np.array(grouped_peak_ascending)
    grouped_peak_sec = grouped_peaks_ind*(1/sampling_freq)
    
    grouped_peak_msec = grouped_peak_sec*1000
    
    return grouped_peak_msec
    


def timedomain(rr):
    results = {}

    hr = 60000/rr
    
    results['Mean RR (ms)'] = np.mean(rr)
    results['STD RR/SDNN (ms)'] = np.std(rr)
    #results['Mean HR (Kubios\' style) (beats/min)'] = 60000/np.mean(rr)
    #results['Mean HR (beats/min)'] = np.mean(hr)
    #results['STD HR (beats/min)'] = np.std(hr)
    #results['Min HR (beats/min)'] = np.min(hr)
    #results['Max HR (beats/min)'] = np.max(hr)
    results['RMSSD (ms)'] = np.sqrt(np.mean(np.square(np.diff(rr))))
    results['NNxx'] = np.sum(np.abs(np.diff(rr)) > 100)*1
    results['pNNxx (%)'] = 100 * np.sum((np.abs(np.diff(rr)) > 100)*1) / len(rr)
    return results


def remove_outliers(grouped_peaks, std_thre):
    
    rr_diff = np.diff(grouped_peaks)
    
    mean_diff=np.mean(rr_diff)
    
    std_diff = np.std(rr_diff)
    
    ind_interest = [ind_i for ind_i, x  in enumerate(rr_diff) if (x > mean_diff - std_thre* std_diff) and (x < mean_diff + std_thre*std_diff)]
    
    return rr_diff, ind_interest

def filtered_rr(rr_diff, ind_interest):
    
    rr_diff_new_list = []
    
    for sel_ind in ind_interest:
        
        rr_diff_selected = rr_diff[sel_ind]
        
        rr_diff_new_list.append(rr_diff_selected)
        
    return rr_diff_new_list
        
        
    


def get_plot_ranges(start=10, end=20, n=5):
    '''
    Make an iterator that divides into n or n+1 ranges. 
    - if end-start is divisible by steps, return n ranges
    - if end-start is not divisible by steps, return n+1 ranges, where the last range is smaller and ends at n
    
    # Example:
    >> list(get_plot_ranges())
    >> [(0.0, 3.0), (3.0, 6.0), (6.0, 9.0)]

    '''
    distance = end - start
    for i in np.arange(start, end, np.floor(distance/n)):
        yield (int(i), int(np.minimum(end, np.floor(distance/n) + i)))

## paths  and inputs

In [124]:
psychopy_file_path = "/home/muhammad/Desktop/Datasets/data_sony_digiRelax/wp3/VP007_091123/Psychopy_data/VP007_091123_DigiRelax_Experiment_2023-11-09_14h19.56.411.csv"
col_interest_psychopy = "tsst_pres_ts"
list_offset_mins = [5]
selec_ind = 0
sampling_frequency = 256
ecg_file_path = "/home/muhammad/Desktop/Datasets/data_sony_digiRelax/wp3/VP007_091123/Shimmer_data/2023-11-09_12.38.31_VP007_091123_SD_Session1/VP007_091123_Session1_Shimmer_6B1E_Calibrated_SD.csv"
ecg_col_name = "Shimmer_6B1E_ECG_LL-LA_24BIT_CAL"
timestamp_shimmer_col_name = "Shimmer_6B1E_Timestamp_Unix_CAL"

In [125]:

####-------------------extracting psychopy inofomration--------------------------
psychopy_df = read_manipulate_psychopy(psychopy_file_path)
timestamp_ineterest = get_nonNan_list_psychopy(psychopy_df, col_interest_psychopy)[0]

######------------- extracting heart rate information----------------------------------
ecg_df = read_shimmer_sensor(ecg_file_path)
selected_ecg_df = ecg_df[[timestamp_shimmer_col_name, ecg_col_name]]
selected_ecg_df = selected_ecg_df.copy()
selected_ecg_df = standardize_timestamps_shimmer(selected_ecg_df, timestamp_shimmer_col_name)
selected_ecg_df['heartrate'] = selected_ecg_df[ecg_col_name]
selected_ecg_df=col_from_str_float (selected_ecg_df, "heartrate")

In [126]:
timestamps_interest_list = get_list_timestamp_interest(timestamp_ineterest, list_offset_mins)    

In [127]:
timestamp_ineterest = timestamps_interest_list[selec_ind]
offset_min = list_offset_mins[selec_ind]
timestamp_offset = get_offset_timestamp(timestamp_ineterest, offset_min)
start_end_time_list = [timestamp_ineterest, timestamp_offset]
timestamp_start_slice = min(start_end_time_list)
timestamp_end_slice = max(start_end_time_list)

ecg_df_ii=slice_df_wrt_timestamps(selected_ecg_df, timestamp_start_slice, timestamp_end_slice, timestamp_shimmer_col_name)
ecg_df_ii=ecg_df_ii.reset_index(drop=True)


start = 0
stop =sampling_frequency*60*offset_min
duration = (stop-start) / sampling_frequency



In [128]:
duration

300.0

In [129]:
cond_slice = (ecg_df_ii[timestamp_shimmer_col_name] >= timestamp_start_slice) & (ecg_df_ii[timestamp_shimmer_col_name] < timestamp_end_slice)
ecg_slice = ecg_df_ii.heartrate[cond_slice] 
# detect peaks
peaks, similarity = detect_peaks(ecg_slice, threshold=0.3)
grouped_peaks = group_peaks(peaks)


In [130]:
std_thre=0.5
rr_vals, ind_interest = remove_outliers(grouped_peaks, std_thre)

selected_rr_vals = filtered_rr(rr_vals, ind_interest)

In [131]:
np.mean(selected_rr_vals)

197.2719298245614