This notebook assumes access to detrended, denoised signals and calculates various parameters from these signals. The parameters estimated within this notebook are split into three broad categories: time domain, frequency-domain and non-linear domain. They have been chosen due to their successes in literature relating to ECG classification as well as their use as Heart rate variability (HRV) metrics:

Time domain:
 - mean RR intervals
 - std RR intervals 
 - mean amplitude
 - RMSSD of differences between successive RR intervals 
 - pNN50 (%) NN50 divided by total number of RR (NN50 is the number of sucessive RR intervals that differ by more than 50ms) 
 - mean QRS duration
 - std QRS duration
 - moments of the signal (physiological measures)
 
 
Frequency Domain:
- absolute power of LF (0.04 - 0.15 Hz) band
- absolute power of HF (0.15 to 0.4Hz) band
- LF/HF ratio
- Total power (0-0.4 Hz)

Non-linear:
- Fractal dimension of dynamic attractor of signal
- std of poincare
- Shannon entropy
- sample entropy


In [3]:
%run "Data_PreProcessing.ipynb" #allowing access to the filtered database with preprocessed signals

Filtering Database


100%|██████████| 549/549 [01:00<00:00,  9.02it/s]


221 remaining out of 290
normalising and preproccessing signals using DWT


100%|██████████| 221/221 [00:01<00:00, 129.99it/s]


6


In [4]:
from scipy.signal import correlate #for autocorrelation for signal embedding
from scipy.stats import skew, kurtosis #for signal measures
from scipy.signal import welch #for frequency band
from scipy.interpolate import interp1d #for interpolation of signal
import neurokit2 as nk #for peak finding, and other measures
from pyhrv.hrv import hrv #think neurokit might use this, needed otherwise issues

In [5]:
def parameter_averages(parameter, health_state):
    """
    calculates healthy and unhealthy means and std of parameter for easy comparisson
    returns np.array containing: healthy mean, healthy std, unhealthy mean, unhealhty std
    """
    encoded_health_state = [True if label == 'Unhealthy' else False for label in health_state]

    unhealthy_param = parameter[np.array(encoded_health_state)]
    healthy_param = parameter[~np.array(encoded_health_state)]
    
    unhealthy_param_av = np.mean(unhealthy_param)
    unhealthy_param_std = np.std(unhealthy_param)
    
    healthy_param_av = np.mean(healthy_param)
    healthy_param_std = np.std(healthy_param)
    
    return np.array([healthy_param_av, healthy_param_std, unhealthy_param_av, unhealthy_param_std])

def print_averages(parameter, parameter_name, nan_indices):
    """
    print healthy and unhealthy means of parameter in nice format
    """
    health_state = allowed_patients.get_diagnoses()
    
    nan_indices = np.array(nan_indices)
    health_state = np.array(health_state)
    
    channel_health_state = health_state[nan_indices]
    
    means = parameter_averages(parameter, channel_health_state)
    
    print(f"Unhealthy {parameter_name}: mean:{means[2]}, std: {means[3]}")
    print(f"Healthy {parameter_name}: mean:{means[0]}, std:{means[1]}")
    return None

In [6]:

def outliers_indices_z_score(data, threshold=2):
    """
    finds outliers from data, those outside the threshold region
    return the indices of the data to be kept
    """
    mean = np.mean(data)
    std = np.std(data)
    z_scores = [(x - mean) / std for x in data]
    indices = np.argwhere([abs(z)<threshold for z in z_scores])
    return indices

def get_peaks(sig):
    """
    returns the location of the peaks (calculated through neurokit) and their average amplitude
    """
    #using neurokit to find the location of the r peaks
    peak_dict, info = nk.ecg_peaks(sig, sampling_rate=1000)
    peaks = info['ECG_R_Peaks']
    
    #calculating amplitude of R peak
    peak_amp = sig[peaks] - np.median(sig) #median used as baseline
    peak_amp_av = np.mean(peak_amp) 
    return peaks, peak_amp_av
        

def get_rri(sig, remove_outliers=False):
    """
    calculates the rr intervals and peak amplitudes of a signal
    has option to remove outliers from the rr interval array
    """
    #gets rpeaks and amplitudes
    peaks, amps = get_peaks(sig)
    
    # calculates rr intervals
    rri = np.diff(peaks)
    
    #removal of outliers
    if remove_outliers:
        rri_outlier_indices = outliers_indices_z_score(rri).reshape(-1)
        rr_intervals = rri[rri_outlier_indices]

    else:
        rr_intervals = rri
    return rr_intervals, amps

As the denoised signal arrays contains nan signals, we have to find the location of these so that they dont interfere with the parameter calculations. These are different for each channel as each channel has different signals that have been allowed to stay based on their signal quality,

In [7]:
#pre-calculation of relevant health array for every channel
nan_indices = []
health_state = allowed_patients.get_diagnoses()
for j in range(0, no_channels):
    signal_nan_indices = []
    for i, signal in enumerate(denoised_signals[:, j]):
        if np.isnan(signal).all():
            signal_nan_indices.append(False)
        else:
            signal_nan_indices.append(True)
    nan_indices.append(signal_nan_indices)

health_state = np.array(health_state)
nan_indices = np.array(nan_indices)

## Time Domain

In [8]:
print('Calculating Time Domain Features')

Calculating Time Domain Features


### RR signal analysis
- RR means: mean of the rr intervals
- RR stds: standard deviation of the rr intervals
- R amplitudes: amplitudes of the r peaks
- RMSSD: the root mean square of successive differences of rr intervals
- pNN50s: the number of successive rr intervals that differ by more than 50ms

In [9]:
def RR_analysis(signal, remove_outliers=False):
    """
    calculates parameters based on the RR intervals of the signal
    returns mean of RR intervals, std of RR intervals, RMSSD, pNN50, amplitude of r peaks
    """
    peak_distances, amps = get_rri(signal, remove_outliers=remove_outliers)
    
    # stastitical analysis on rr intervals
    mean_RR = np.mean(peak_distances)
    std_RR = np.std(peak_distances)

    #RMSSD
    # computes differences between successive RR intervals for RMSSD
    diff_RR_intervals = np.diff(peak_distances)
    
    RMSSD_RR = np.sqrt(np.mean(diff_RR_intervals**2))
    
    #pNN50
    # the number of successive RR intervals that differ by more than 50 ms 
    NN50 = np.sum(np.abs(diff_RR_intervals) > 50)
    # divide by total number of RR intervals
    pNN50 = (NN50 / len(peak_distances)) * 100
    
    
    return mean_RR, std_RR, RMSSD_RR, pNN50, amps


In [10]:
#calculating parameters for each signal in each channel
rr_means_list = []
rr_stds_list = []
rr_RMSSD_list = []
rr_pNN50s_list = []
rr_amps_list = []

for j in range(0, no_channels):
    rr_means = np.zeros(shape = len(health_state[nan_indices[j]]))
    rr_stds = np.zeros(shape = len(health_state[nan_indices[j]]))
    rr_RMSSD = np.zeros(shape = len(health_state[nan_indices[j]]))
    rr_pNN50s = np.zeros(shape = len(health_state[nan_indices[j]]))
    rr_amps = np.zeros(shape = len(health_state[nan_indices[j]]))

    i = 0
    for signal in denoised_signals[:, j]:
        if not np.isnan(signal).all():
            rr_mean, rr_std, rr_rmssd, rr_pNN50, rr_amp = RR_analysis(signal, remove_outliers=True)

            rr_means[i] = rr_mean
            rr_stds[i] = rr_std
            rr_RMSSD[i] = rr_rmssd
            rr_pNN50s[i] = rr_pNN50
            rr_amps[i] = rr_amp


            i+=1
    
    rr_means_list.append(rr_means)
    rr_stds_list.append(rr_stds)
    rr_RMSSD_list.append(rr_RMSSD)
    rr_pNN50s_list.append(rr_pNN50s)
    rr_amps_list.append(rr_amps)



### QRS Complex
QRS complex represents the duration of the main contraction of the heart. Here I have calculated both the mean and standard deviation of it. This has been done using neurokit2's ```ecg_delineate``` function.

In [11]:
def QRS_complex(signal):
    #required nk cleaned signal
    cleaned_signal = nk.ecg_clean(signal, sampling_rate=1000)
    
    #calculates location of all the various peaks in the signal
    wave_dict, signals_df = nk.ecg_delineate(signal)
        
    q_peaks = signals_df['ECG_Q_Peaks']
    s_peaks = signals_df['ECG_S_Peaks']
        
    #calculating duration of QRS complex
    qrs = np.array(s_peaks) - np.array(q_peaks)

    #calculating mean and std
    qrs_mean = np.nanmean(qrs)
    qrs_std = np.nanstd(qrs)
    
    return qrs_mean, qrs_std


In [12]:
# # #takes a long time so ignored for now
# QRS_means_list = []
# QRS_stds_list = []

# for j in range(0, no_channels):
#     QRS_means = np.zeros(shape = len(health_state[nan_indices[j]]))
#     QRS_stds = np.zeros(shape = len(health_state[nan_indices[j]]))
#     i = 0
#     for signal in denoised_signals[:, j]:
#         if not np.isnan(signal).all():
#             QRS_mean, QRS_std = QRS_complex(signal)

#             QRS_means[i] = QRS_mean
#             QRS_stds[i] = QRS_std

#             i+=1
    
#     QRS_means_list.append(QRS_means)
#     QRS_stds_list.append(QRS_stds)   

KeyboardInterrupt: 

### Physiological measures

In [13]:
def get_moments(signal):
    """
    returns the moments of the signal up to the fourth order
    """
    mean = np.mean(signal)
    std = np.std(signal)
    skew_ecg = skew(signal)
    kurtosis_ecg = kurtosis(signal)
    return mean, std, skew_ecg, kurtosis_ecg

In [14]:
means_list = []
stds_list = []
skews_list = []
kurtosiss_list = []


for j in range(0, no_channels):
    means = np.zeros(shape = len(health_state[nan_indices[j]]))
    stds = np.zeros(shape = len(health_state[nan_indices[j]]))
    skews = np.zeros(shape = len(health_state[nan_indices[j]]))
    kurtosiss = np.zeros(shape = len(health_state[nan_indices[j]]))
    
    i=0
    for signal in denoised_signals[:, j]:
        if not np.isnan(signal).all():

            mean, std, ecg_skew, ecg_kurtosis = get_moments(signal)

            means[i] = mean
            stds[i] = std
            skews[i] = ecg_skew
            kurtosiss[i] = ecg_kurtosis
            
            i+=1

    means_list.append(means)
    stds_list.append(stds)
    skews_list.append(skews)
    kurtosiss_list.append(kurtosiss)


## Frequency Domain

In [15]:
print('Calculating Frequency Domain Features')

Calculating Frequency Domain Features


### Power Bands

In [16]:
def power_bands(signal):
    """
    performs welch transform to calculate the power within high and low frequency bands
    as well as the total power within the frequency spectrum
    """
    fs = 1000 #Hz, sampling frequency
    fs_interpolate = 3 #Hz, arbitrary value, 3 is typically used
    
    # define frequency bands, based on typical values used in HRV
    lf_band = (0.04, 0.15)
    hf_band = (0.15, 0.40)
    
    #gets rpeaks and amplitudes
    peaks, amps = get_peaks(signal)
    
    # calculates rr intervals in seconds
    rr_intervals = np.diff(peaks)/fs
    
    # time points of the RR intervals
    rr_times = np.cumsum(rr_intervals)
    rr_times = np.insert(rr_times, 0, 0)  # add time zero at the beginning

    # interpolation for frequency transform
    interpolated_time = np.arange(0, rr_times[-2], 1/fs_interpolate)
    interpolated_rr = interp1d(rr_times[:-1], rr_intervals, kind='cubic')(interpolated_time)

    #welch spectrum
    f, psd = welch(interpolated_rr, fs=fs_interpolate, nperseg=128)
    
    # integrate the power spectral density over the frequency bands
    lf_power = np.trapz(psd[(f >= lf_band[0]) & (f <= lf_band[1])], f[(f >= lf_band[0]) & (f <= lf_band[1])])
    hf_power = np.trapz(psd[(f >= hf_band[0]) & (f <= hf_band[1])], f[(f >= hf_band[0]) & (f <= hf_band[1])])
    
    #total power, integrated over whole range
    total_power = np.trapz(psd, f)
    
    return lf_power, hf_power, lf_power/hf_power, total_power
    

In [17]:
lfs_list = []
hfs_list = []
ratios_list = []
tps_list = []

for j in range(0, no_channels):
    lfs = np.zeros(shape = len(health_state[nan_indices[j]]))
    hfs = np.zeros(shape = len(health_state[nan_indices[j]]))
    ratios = np.zeros(shape = len(health_state[nan_indices[j]]))
    tps = np.zeros(shape = len(health_state[nan_indices[j]]))
    
    i=0
    for signal in denoised_signals[:, j]:
        if not np.isnan(signal).all():
            lf, hf, ratio, tp = power_bands(signal)
            lfs[i] = lf
            hfs[i] = hf
            ratios[i] = ratio
            tps[i] = tp
            
            i+=1
    lfs_list.append(lfs)
    hfs_list.append(hfs)
    ratios_list.append(ratios)
    tps_list.append(tps)

## Non-Linear

In [18]:
print('Calculating Non-Linear Domain Features')

Calculating Non-Linear Domain Features


### Poincare

In [19]:
def calculate_poincare_sd(sig, remove_outliers=False, use_nk=False):
    #get rr intervals
    rr_intervals = get_rri(sig)[0]
    
    if remove_outliers:
        rr_intervals = rr_intervals[outliers_indices_z_score(rr_intervals)]
        
    if use_nk:
        peaks, info = nk.ecg_peaks(sig, sampling_rate=1000)
        nl_indices = nk.hrv_nonlinear(peaks, sampling_rate=1000, show=False)
        return nl_indices['HRV_SD1'], nl_indices['HRV_SD2'], nl_indices['HRV_SD1SD2']
        
    #separating into subsequent coordinates for Poincaré plot
    rr_n = rr_intervals[:-1]
    rr_n1 = rr_intervals[1:]
    
    #calculating SD1, perpendicular to y=x
    diff_rr = np.array(rr_n) - np.array(rr_n1)
    sd1 = np.sqrt(np.var(diff_rr/np.sqrt(2)))
    
    # calculating SD2, along y=x
    sum_rr = rr_n + rr_n1
    sd2 = np.sqrt(np.var(sum_rr/np.sqrt(2)))
    
    # calculating ratio
    sd_ratio = sd2/sd1
   
    if remove_outliers:
        #counting intervals outside SD1 and SD2
        count_outside_sd1 = np.sum(np.abs(diff_rr / np.sqrt(2)) > sd1)
        count_outside_sd2 = np.sum(np.abs(sum_rr / np.sqrt(2)) > sd2)

        out = count_outside_sd1 + count_outside_sd2
        return sd1, sd2, sd_ratio, count_outside_sd1, count_outside_sd2, out
    
    else:
        return sd1, sd2, sd_ratio

In [20]:
# sd1s_outliers_removed_list = []
# sd2s_outliers_removed_list = []
# sd_ratios_outliers_removed_list = []
# out_sd1s_list = []
# out_sd2s_list = []
# n_out_list = []
# for j in range(0, no_channels):
#     sd1s = np.zeros(shape = len(health_state[nan_indices[j]]))
#     sd2s = np.zeros(shape = len(health_state[nan_indices[j]]))
#     sd_ratios = np.zeros(shape = len(health_state[nan_indices[j]]))
#     out_sd1s = np.zeros(shape = len(health_state[nan_indices[j]]))
#     out_sd2s = np.zeros(shape = len(health_state[nan_indices[j]]))
#     n_out = np.zeros(shape = len(health_state[nan_indices[j]]))

#     i = 0
#     for signal in denoised_signals[:, j]:
#         if not np.isnan(signal).all():
#             sd1, sd2, sd_ratio, out_sd1, out_sd2, out = calculate_poincare_sd(signal, remove_outliers=True)
#             sd1s[i] = sd1
#             sd2s[i] = sd2
#             sd_ratios[i] = sd_ratio
#             out_sd1s[i] = out_sd1
#             out_sd2s[i] = out_sd2
#             n_out[i] = out

#             i+=1
        
#     sd1s_outliers_removed_list.append(sd1s)
#     sd2s_outliers_removed_list.append(sd2s)
#     sd_ratios_outliers_removed_list.append(sd_ratios)
#     out_sd1s_list.append(out_sd1s)
#     out_sd2s_list.append(out_sd2s)
#     n_out_list.append(n_out)

In [21]:
sd1s_list = []
sd2s_list = []
sd_ratios_list = []

for j in range(0, no_channels):
    sd1s = np.zeros(shape = len(health_state[nan_indices[j]]))
    sd2s = np.zeros(shape = len(health_state[nan_indices[j]]))
    sd_ratios = np.zeros(shape = len(health_state[nan_indices[j]]))

    i = 0
    for signal in denoised_signals[:, j]:
        if not np.isnan(signal).all():
            sd1, sd2, sd_ratio = calculate_poincare_sd(signal, remove_outliers=False)
            sd1s[i] = sd1
            sd2s[i] = sd2
            sd_ratios[i] = sd_ratio

            i+=1
        
    sd1s_list.append(sd1s)
    sd2s_list.append(sd2s)
    sd_ratios_list.append(sd_ratios)


In [22]:
# #takes a while
# nk_sd1s_list = []
# nk_sd2s_list = []
# nk_sd_ratios_list = []

# for j in range(0, no_channels):
#     nk_sd1s = np.zeros(shape = len(health_state[nan_indices[j]]))
#     nk_sd2s = np.zeros(shape = len(health_state[nan_indices[j]]))
#     nk_sd_ratios = np.zeros(shape = len(health_state[nan_indices[j]]))

#     i = 0
#     for signal in denoised_signals[:, j]:
#         if not np.isnan(signal).all():
#             sd1, sd2, sd_ratio = calculate_poincare_sd(signal, remove_outliers=False, use_nk=True)
#             nk_sd1s[i] = sd1
#             nk_sd2s[i] = sd2
#             nk_sd_ratios[i] = sd_ratio

#             i+=1
        
#     nk_sd1s_list.append(nk_sd1s)
#     nk_sd2s_list.append(nk_sd2s)
#     nk_sd_ratios_list.append(nk_sd_ratios)


### Shannon Entropy

In [23]:
def calculate_shannon_entropy(signal, num_bins=500, use_nk=False):
    rr_intervals = get_rri(signal)[0]
    
    if use_nk:
        peaks, info = nk.ecg_peaks(signal, sampling_rate=1000)
        nl_indices = nk.hrv_nonlinear(peaks, sampling_rate=1000, show=False)
        return nl_indices['HRV_ShanEn']
    
    # discretize RR intervals into bins and calculate probabilities
    hist, bin_edges = np.histogram(rr_intervals, bins=num_bins, density=True)
    
    probabilities = hist / np.sum(hist)
    
    # Calculate Shannon Entropy using equation
    shannon_entropy = -np.sum(probabilities * np.log2(probabilities + 1e-12))  # adding a small value to avoid log(0)
    
    return shannon_entropy

In [24]:
#have nk option here aswell
shannon_ens_list = []

for j in range(0, no_channels):
    shannon_ens = np.zeros(shape = len(health_state[nan_indices[j]]))

    i=0
    for signal in denoised_signals[:, j]:
        if not np.isnan(signal).all():
            shannon_ens[i] = calculate_shannon_entropy(signal)

            i+=1
        
    shannon_ens_list.append(shannon_ens)


### Sample Entropy

In [25]:
def calculate_sample_entropy(signal, m=2, r=0.2):
    rr_intervals = get_rri(signal)[0]
    N = len(rr_intervals)
    r *= np.std(rr_intervals)  # tolerance r is usually set as a fraction of the standard deviation
    
    def _phi(m):
        X = np.array([rr_intervals[i:i + m] for i in range(N - m + 1)])
        C = np.sum(np.max(np.abs(X[:, None] - X[None, :]), axis=2) <= r, axis=0) - 1
        return np.sum(C) / (N - m + 1)
    
    return -np.log((_phi(m + 1)) / (_phi(m)))

In [26]:
# samp_ens_list = []
# for j in range(0, no_channels):
#     samp_ens = np.zeros(shape = len(health_state[nan_indices[j]]))
#     i=0
#     for signal in denoised_signals[:, j]:
#         if not np.isnan(signal).all():
#             samp_en = calculate_sample_entropy(signal)
#             if samp_en == np.inf:
#                 samp_en = np.nan
#             samp_ens[i] = samp_en
            
#             i+=1
#     samp_ens_list.append(samp_ens)

## Higuchi FD 

In [27]:
def calculate_higuchi_fd(time_series, k_max = 50):
    """
    Calculate the fractal dimension of a time series using Higuchi's algorithm.
    
    Parameters:
    - time_series: The input time series as a 1D numpy array.
    - k_max: The maximum value of k (the parameter that controls segment length), would want a value of k_max roughly 10% of signal length but this would take forever
    
    Returns:
    - The estimated fractal dimension.
    """
    N = len(time_series)
    L = np.zeros(k_max)
    x = np.arange(N)

    for k in range(1, k_max + 1):
        Lk = np.zeros(k)

        for m in range(0, k):
            Lmk = 0
            for i in range(1, int((N - m) / k)):
                Lmk += abs(time_series[m + i * k] - time_series[m + (i - 1) * k])
            Lmk = (Lmk * (N - 1) / (int((N - m) / k) * k)) / k
            Lk[m] = Lmk

        L[k - 1] = np.mean(Lk)

    # Perform linear fit in log-log scale
    ln_k = np.log(range(1, k_max + 1))
    ln_L = np.log(L)
    coeffs = np.polyfit(ln_k, ln_L, 1)

    # The slope of the line is the fractal dimension
    fractal_dimension = -coeffs[0]
    
    return fractal_dimension, coeffs, ln_k, ln_L

In [32]:
# #this takes roughly 40 mins so include at your discresion, even with very inoptimal value of kmax
# from tqdm import tqdm
# higuchi_fds_list = []

# for j in tqdm(range(0, no_channels)):
#     higuchi_fds = np.zeros(shape = len(health_state[nan_indices[j]]))
#     i=0
#     for signal in denoised_signals[:, j]:
#         if not np.isnan(signal).all():
#             higuchi_fds[i] = calculate_higuchi_fd(signal)[0]

#             i+=1
        
#     higuchi_fds_list.append(higuchi_fds)


100%|██████████| 6/6 [42:43<00:00, 427.24s/it]


### Fractal Dimension

In [8]:
#calculating autocorrelation and time delay
def autocorrelation(signal):
    n = len(signal)
    mean = np.mean(signal)
    var = np.var(signal)
    signal = signal - mean
    autocorr = correlate(signal, signal, mode='full')[n-1:] / (var * n) #scipy correlate alot faster
    return autocorr

def find_time_delay(signal):
    autocorr = autocorrelation(signal)
    # Find the point where autocorrelation drops to 1/e of its initial value
    threshold = 1 / np.exp(1)
    tau = np.argmax(autocorr <= threshold) #measured in units of sampling rate (1000 hz)
    return tau



In [9]:
tau_list = []
for j in range(0, no_channels):
    tau = np.zeros(shape = len(health_state[nan_indices[j]]), dtype=int)
    i = 0
    for signal in denoised_signals[:, j]:
        if not np.isnan(signal).all():
            tau[i] = find_time_delay(signal)
            
            i+=1
    tau_list.append(tau)

In [10]:
#embedding time series
def embed_time_series(signal, tau, m):
    n = len(signal)
    if n < (m - 1) * tau:
        raise ValueError("Time series is too short for the chosen tau and embedding dimension.")
    embedded = np.array([signal[i : i + (m - 1) * tau + 1 : tau] for i in range(n - (m - 1) * tau)])
    return embedded

# embedding dimension M
M = 4

    
#creates list of embedded arrays i.e. no_patients * len(embedded) * M

embedded_signals_list = []
for j in range(0, no_channels):
    embedded_signals = []
    i = 0
    for signal in denoised_signals[:, j]:
        if not np.isnan(signal).all():
            embedded_signals.append(embed_time_series(signal, tau_list[j][i], M))
            
            i+=1
    embedded_signals_list.append(embedded_signals)

In [11]:
#calculating fractal dimension through box counting
def box_counting(data, box_sizes):
    counts = []
    for size in box_sizes:
        count = 0
        # creates grid
        grid = np.ceil(data / size).astype(int)
        #counts unique boxes
        unique_boxes = np.unique(grid, axis=0)
        count = len(unique_boxes)
        counts.append(count)
    return counts

def fractal_dimension(data, box_sizes):
    counts = box_counting(data, box_sizes)
    log_box_sizes = np.log(box_sizes)
    log_counts = np.log(counts)
    
    # Perform linear regression to find the slope of the log-log plot
    coeffs = np.polyfit(log_box_sizes, log_counts, 1)
    return -coeffs[0]  # The fractal dimension is the negative slope

#check this whole code

In [12]:
def calc_fractal_dim(signal):
    
    scaler = StandardScaler()
    embedded_signal_std = scaler.fit_transform(signal)

    # performs SVD and obtains principal components
    U, S, VT = svd(embedded_signal_std, full_matrices=False)
    PCs = U @ np.diag(S)
    
    # define box sizes
    box_sizes = np.logspace(-1, 1, num=10)  # research/try this out
    
    # calculate the fractal dimension
    fd = fractal_dimension(PCs[:, :3], box_sizes)  # uses the first three principal components, maybe use more??
    return fd    

In [13]:
# #takes a long time so will leave it out for now 
# fd_list = []
# for j in range(0, no_channels):
#     fd = np.zeros(shape = len(health_state[nan_indices[j]]))
#     i=0
#     for signal in embedded_signals_list[j]:
#         fd[i] = find_time_delay(signal)
            
#         i+=1
#     fd_list.append(fd)

### Covariates
- have access to age and gender, age is the only one that makes sense to investigate

In [None]:
ages=np.zeros(no_patients)
for i in range(0, no_patients):
    ages[i] = allowed_patients.get_patients(i).get_age()

In [None]:
ages_list = []
for j in range(0, no_channels):
    ages_list.append(ages[nan_indices[j]])

## Creating Parameter Dictionary

In [None]:

#create dictionary with parameters
params = {}

#time components
params['RR mean'] = rr_means_list
params['RR std'] = rr_stds_list
params['RR amps'] = rr_amps_list
params['RMSSD'] = rr_RMSSD_list
params['pNN50'] = rr_pNN50s_list

#params['QRS_mean'] = QRS_means_list #contains nan so need to remove those so can use the filtering methods
#params['QRS_std'] = QRS_stds_list

params['mean'] = means_list
params['std'] = stds_list
params['skews'] = skews_list
params['kurtosis'] = kurtosiss_list

#frequency components
params['hf'] = hfs_list
params['lf'] = lfs_list
params['power ratio'] = ratios_list
params['total power'] = tps_list

#nonlinear components
params['shannon en'] = shannon_ens_list
#params['sample_en'] = samp_ens_list need to do imputing if want to include it, due to some nan values
params['sd ratio'] = sd_ratios_list
params['sd1'] = sd1s_list
params['sd2'] = sd2s_list
#params['nk_sd_ratio'] = nk_sd_ratios_list
#params['sd_ratio_outliers_removed'] = sd_ratios_outliers_removed_list
#params['multi fd'] = fd_list
#params['higuchi fd'] = higuchi_fds_list

#covariates
params['age'] = ages_list

