In [63]:
import numpy as np
import pandas as pd
from scipy.signal import welch, hilbert
from scipy.stats import entropy
import numpy as np
import pandas as pd
import xgboost as xgb
import neurokit2 as nk
import polars as pl
from sklearn.metrics import (accuracy_score, confusion_matrix, f1_score,
                             make_scorer, precision_score, recall_score)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.tree import DecisionTreeClassifier as DTC
import os, glob
import statistics
import joblib, math
import scipy.signal as signal


In [66]:
def calculate_hrv(ecg_90s):
    try:
        ecg_clean = nk.ecg_clean(ecg_90s, sampling_rate=200)
        ecg_peaks = nk.ecg_findpeaks(ecg_clean, sampling_rate=200)
        info, r_peaks_corrected = nk.signal_fixpeaks(ecg_peaks, sampling_rate=200, iterative=False, show=False, method="Kubios")
        hrv = nk.hrv(r_peaks_corrected,sampling_rate=200, show=False)
        mean = hrv["HRV_MeanNN"][0]
        median = hrv["HRV_MedianNN"][0]
        sdnn = hrv["HRV_SDNN"][0]
        min = hrv["HRV_MinNN"][0]
        max = hrv["HRV_MaxNN"][0]
        vhf = hrv["HRV_VHF"][0]
        lf = hrv["HRV_LF"][0]
        hf = hrv["HRV_HF"][0]
        lf_hf = hrv["HRV_LFHF"][0]


        return mean, median, sdnn, min, max, vhf, lf, hf, lf_hf
    except:
        return np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan

def next_power_of_2(x):
    return 1 if x == 0 else 2 ** (x - 1).bit_length()

def spectrum(signal, sampling_rate):
    m = len(signal)
    n = next_power_of_2(m)
    y = np.fft.fft(signal, n)
    yh = y[0:int(n / 2 - 1)]
    fh = (sampling_rate / n) * np.arange(0, n / 2 - 1, 1)
    power = np.real(yh * np.conj(yh) / n)

    return fh, power


def frequency_ratio(frequency, power):
    power_low = power[(frequency >= 1) & (frequency <= 30)]  # Low range: 1-30 Hz
    power_high = power[(frequency > 30) & (frequency <= 100)]  # High range: 30-100 Hz (up to Nyquist)

    ULC = np.sum(power_low)
    UHC = np.sum(power_high)

    # Avoid division by zero in case the high-frequency power is 0
    if UHC == 0:
        return np.nan
    else:
        return ULC / UHC

def mean_freq(frequency, power):
    num = 0
    den = 0
    for i in range(int(len(power) / 2)):
        num += frequency[i] * power[i]
        den += power[i]
    
    if den == 0:
        return np.nan
    else:
        return num / den


def median_freq(frequency, power):
    power_total = np.sum(power) / 2
    temp = 0
    tol = 0.01
    errel = 1
    i = 0
    try:
        while abs(errel) > tol:
            temp += power[i]
            errel = (power_total - temp) / power_total
            i += 1
            if errel < 0:
                errel = 0
                i -= 1

        return frequency[i]
    except:
        return np.nan
    
def peak_freq(frequency, power):
    try:
        return frequency[power.argmax()]
    
    except:
        return np.nan


def get_rri(ecg, sampling_rate=200):
    ecg_clean = nk.ecg_clean(ecg, sampling_rate=sampling_rate)
    ecg_peaks = nk.ecg_findpeaks(ecg_clean, sampling_rate=sampling_rate)
    info, r_peaks_corrected = nk.signal_fixpeaks(ecg_peaks, sampling_rate=sampling_rate, iterative=False, show=False, method="Kubios")

    # Calculate RR intervals
    rr_intervals = np.diff(r_peaks_corrected) / sampling_rate * 1000

    # Insert fake data point
    rr_intervals_adjusted = np.insert(rr_intervals, 0, rr_intervals[0])

    # Calculate time axis
    time_r_peaks = r_peaks_corrected[1:] / sampling_rate
    time_r_peaks_adjusted = np.insert(time_r_peaks, 0, 0)
    print(time_r_peaks[0])

    rri_adj_df = pd.DataFrame(data={'RRI': rr_intervals_adjusted, 'RRI_t': time_r_peaks_adjusted})

    return rri_adj_df

# Function to populate the RRI column in df_sliding
def extend_df_with_rri(df_sliding, df_rr):
    df_sliding['RRI'] = np.nan
    
    rri_index = 0
    total_rr_rows = len(df_rr)

    for idx in range(len(df_sliding)):
        # Get the current start time of the sliding window
        window_start = df_sliding.at[idx, 'start_time']
        
        while rri_index < total_rr_rows and df_rr.at[rri_index, 'RRI_t'] <= window_start:
            df_sliding.at[idx, 'RRI'] = df_rr.at[rri_index, 'RRI']
            rri_index += 1
        
        if rri_index >= total_rr_rows:
            break
    
    df_sliding['RRI'].fillna(method='ffill', inplace=True)
    
    return df_sliding


In [67]:
def extract_features(sensor_data, events_data, patient_id, window_size_emg_s=1, overlap_emg_s=0.5, window_size_ecg_s=90, overlap_ecg_s=45, sampling_rate=200):
    window_size_emg = int(window_size_emg_s * sampling_rate)
    window_size_ecg = int(window_size_ecg_s * sampling_rate)
    overlap_emg = int(overlap_emg_s * sampling_rate)
    overlap_ecg = int(overlap_ecg_s * sampling_rate)

    features_1s = []
    labels = []
    features_90s = []

    # Prepare sensor data
    sensor_data.columns = ["ECG", "MR", "ML"]

    # Prepare events data
    event_intervals = events_data[['Time in Sec', 'Event Duration']].values[1:]
    event_intervals = [(start, start + duration) for start, duration in event_intervals]

    scaler_mr = MinMaxScaler(feature_range=(0,100))
    scaler_ml = MinMaxScaler(feature_range=(0,100))

    ecg_data = sensor_data['ECG'].values
    mr_data = pd.DataFrame(scaler_mr.fit_transform(sensor_data[["MR"]]), columns=["MR"])['MR'].values
    ml_data = pd.DataFrame(scaler_ml.fit_transform(sensor_data[["ML"]]), columns=["ML"])['ML'].values

    mr_threshold = np.mean(mr_data) + 3 * np.std(mr_data)
    ml_threshold = np.mean(ml_data) + 3 * np.std(ml_data)

    for i in range(window_size_emg, len(sensor_data), overlap_emg):

        # Extract the 1-second window
        mr_window = mr_data[i-window_size_emg:i]
        ml_window = ml_data[i-window_size_emg:i]

        # Standard Deviation
        std_mr = np.std(mr_window)
        std_ml = np.std(ml_window)

        # Variance
        var_mr = np.var(mr_window)
        var_ml = np.var(ml_window)

        # RMS
        rms_mr = np.sqrt(np.mean(mr_window ** 2))
        rms_ml = np.sqrt(np.mean(ml_window ** 2))

        # Mean Absolute Value
        mav_mr = np.mean(np.abs(mr_window))
        mav_ml = np.mean(np.abs(ml_window))

        # Log detector: handle potetial log of zero
        log_det_mr = np.mean(np.log(np.maximum(np.absolute(mr_window), 1e-10)))  # Replace 0 with a small value
        log_det_ml = np.mean(np.log(np.maximum(np.absolute(ml_window), 1e-10)))  # Same here


        # Wavelength
        wl_mr = np.sum(abs(np.diff(mr_window)))
        wl_ml = np.sum(abs(np.diff(ml_window)))

        # Average Amplitude Change
        aac_mr = np.mean(np.abs(np.diff(mr_window)))
        aac_ml = np.mean(np.abs(np.diff(ml_window)))

        # Difference absolute standard deviation value
        dasdv_mr = math.sqrt((1 / (window_size_emg - 1)) * np.sum((np.diff(mr_window)) ** 2))
        dasdv_ml = math.sqrt((1 / (window_size_emg - 1)) * np.sum((np.diff(ml_window)) ** 2))

        # Willison Amplitude
        wamp_mr = np.sum(np.abs(np.diff(mr_window)) > mr_threshold)
        wamp_ml = np.sum(np.abs(np.diff(ml_window)) > ml_threshold)

        frequency_mr, power_mr = spectrum(mr_window, sampling_rate)
        frequency_ml, power_ml = spectrum(ml_window, sampling_rate)
        
        # Frequency power
        fr_mr =frequency_ratio(frequency_mr, power_mr) 
        fr_ml =frequency_ratio(frequency_mr, power_mr)

        # Mean power
        mnp_mr = np.sum(power_mr) / len(power_mr)
        mnp_ml = np.sum(power_ml) / len(power_ml)

        
        # Total power
        tot_mr = np.sum(power_mr)
        tot_ml = np.sum(power_ml)

        #Mean Frequency
        mnf_mr = mean_freq(frequency_mr, power_mr)
        mnf_ml = mean_freq(frequency_ml, power_ml)

        # Median frequency
        mdf_mr = median_freq(frequency_mr, power_mr)
        mdf_ml = median_freq(frequency_ml, power_ml)

        # Peak frequency
        pkf_mr = peak_freq(frequency_mr, power_mr)
        pkf_ml = peak_freq(frequency_ml, power_ml)

        start_time = (i-window_size_emg) / sampling_rate
        end_time = i / sampling_rate
        
        current_features = [start_time, end_time, std_mr, std_ml, var_mr, var_ml, rms_mr, rms_ml, mav_mr, mav_ml, log_det_mr, log_det_ml, wl_mr, wl_ml, aac_mr, aac_ml, dasdv_mr, dasdv_ml, wamp_mr, wamp_ml, fr_mr, fr_ml, mnp_mr, mnp_ml, tot_mr, tot_ml, mnf_mr, mnf_ml, mdf_mr, mdf_ml, pkf_mr, pkf_ml]
        features_1s.append(current_features)

        # Assign label based on event timing from the Excel file
        current_time = (i-window_size_emg) / sampling_rate  # Convert start index to time in seconds
        label = 0  # Default label is 0 (no event)
        
        # Loop through the events and check if current time falls within any event interval
        for event_start, event_end in event_intervals:
            if event_start <= current_time < event_end:
                label = 1  # Mark as event present
                break

        labels.append(label)

        if i % 200000 == 0:
            print(f"i: {i}")

    for i in range(window_size_ecg, len(ecg_data), overlap_ecg):
        window_90s = ecg_data[i - window_size_ecg:i]  # 90-second window

        mean, median, sdnn, min, max, vhf, lf, hf, lf_hf = calculate_hrv(window_90s)

        num_1s_windows_in_90s = window_size_ecg // window_size_emg
        for _ in range(num_1s_windows_in_90s):
            features_90s.append([mean, median, sdnn, min, max, vhf, lf, hf, lf_hf])


    combined_features = [f1 + f41 for f1, f41 in zip(features_1s, features_90s[:len(features_1s)])]

    columns = ["start_time", "end_time", "std_mr", "std_ml", "var_mr", "var_ml", "rms_mr",
               "rms_ml", "mav_mr", "mav_ml", "log_det_mr", "log_det_ml", "wl_mr", "wl_ml",
               "aac_mr", "aac_ml", "dasdv_mr", "dasdv_ml", "wamp_mr",
               "wamp_ml", "fr_mr", "fr_ml", "mnp_mr", "mnp_ml", "tot_mr", "tot_ml", "mnf_mr",
               "mnf_ml", "mdf_mr", "mdf_ml", "pkf_mr", "pkf_ml", "HRV_mean", "HRV_median", 
               "HRV_sdnn", "HRV_min", "HRV_max", "HRV_vhf", "HRV_lf", "HRV_hf", "HRV_lf_hf"]
    
    features = pd.DataFrame(combined_features, columns=columns)

    rri = get_rri(ecg_data)
    features = extend_df_with_rri(features, rri)

    if len(labels) != len(features):
        last_labels = len(labels) - len(features)
        for i in range(last_labels):
            del labels[-1]

    features["y"] = labels

    features.iloc[:, 18:41] = features.iloc[:, 18:41].ffill()

    features.to_csv(f"C:/Users/eleon/Desktop/SDAP/backend/src/data_brazil/Patient {patient_id}/p{patient_id}_features.csv")

    return features




In [68]:
path = 'C:/Users/eleon/Desktop/SDAP/backend/src/data_brazil/'
patients_dirs = [x[0] for x in os.walk(path)]
del patients_dirs[0]

# Leave-One-Patient-Out Cross-Validation
for i, dir_path in enumerate(patients_dirs):
    # Leave one patient out (this will be the test set)
    patient_nr = dir_path[-1]
    norm_csv_files = [file for file in os.listdir(dir_path) if file.endswith('norm.csv')]

    print(f"Patient {patient_nr}: {norm_csv_files[0]}")

    patient = pd.read_csv(f"{dir_path}/{norm_csv_files[0]}", usecols=[' HR 1 Frequenz [200 Hz]','EMG MR Frequenz [200 Hz]', 'EMG ML Frequenz [200 Hz]'])
    patient.columns = ["ECG", "MR", "ML"]

    events_data = pd.read_excel(f"{dir_path}/Patient_{patient_nr}_episodes.xlsx")

    features = extract_features(patient, events_data, patient_nr)

    

Patient 1: 00000714-A5BS17960_norm.csv
i: 200000
i: 400000
i: 600000
i: 800000
i: 1000000
i: 1200000
i: 1400000
i: 1600000
i: 1800000
i: 2000000
i: 2200000
i: 2400000
i: 2600000
i: 2800000
i: 3000000
i: 3200000
i: 3400000
i: 3600000
i: 3800000
i: 4000000
i: 4200000
i: 4400000
i: 4600000
i: 4800000
i: 5000000
i: 5200000


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


0.93


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_sliding['RRI'].fillna(method='ffill', inplace=True)
  df_sliding['RRI'].fillna(method='ffill', inplace=True)


Patient 2: 00000718-A5BS17960_norm.csv
i: 200000
i: 400000
i: 600000
i: 800000
i: 1000000
i: 1200000
i: 1400000
i: 1600000
i: 1800000
i: 2000000
i: 2200000
i: 2400000
i: 2600000
i: 2800000
i: 3000000
i: 3200000
i: 3400000
i: 3600000
i: 3800000
i: 4000000
i: 4200000
i: 4400000
i: 4600000
i: 4800000
i: 5000000
i: 5200000
i: 5400000


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


1.6


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_sliding['RRI'].fillna(method='ffill', inplace=True)
  df_sliding['RRI'].fillna(method='ffill', inplace=True)


Patient 3: 00000721-A5BS17960_norm.csv
i: 200000
i: 400000
i: 600000
i: 800000
i: 1000000
i: 1200000
i: 1400000
i: 1600000
i: 1800000
i: 2000000
i: 2200000
i: 2400000
i: 2600000
i: 2800000
i: 3000000
i: 3200000
i: 3400000
i: 3600000
i: 3800000
i: 4000000
i: 4200000
i: 4400000
i: 4600000
i: 4800000


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn

1.865


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_sliding['RRI'].fillna(method='ffill', inplace=True)
  df_sliding['RRI'].fillna(method='ffill', inplace=True)


Patient 4: 00000723-A5BS17960_norm.csv
i: 200000
i: 400000
i: 600000
i: 800000
i: 1000000
i: 1200000
i: 1400000
i: 1600000
i: 1800000
i: 2000000
i: 2200000
i: 2400000
i: 2600000
i: 2800000
i: 3000000
i: 3200000
i: 3400000
i: 3600000
i: 3800000
i: 4000000
i: 4200000
i: 4400000
i: 4600000
i: 4800000
i: 5000000
i: 5200000
i: 5400000


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn

1.2


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_sliding['RRI'].fillna(method='ffill', inplace=True)
  df_sliding['RRI'].fillna(method='ffill', inplace=True)


Patient 5: 00000724-A5BS17960_norm.csv
i: 200000
i: 400000
i: 600000
i: 800000
i: 1000000
i: 1200000
i: 1400000
i: 1600000
i: 1800000
i: 2000000
i: 2200000
i: 2400000
i: 2600000
i: 2800000
i: 3000000
i: 3200000
i: 3400000
i: 3600000
i: 3800000
i: 4000000
i: 4200000
i: 4400000
i: 4600000
i: 4800000


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


0.93


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_sliding['RRI'].fillna(method='ffill', inplace=True)
  df_sliding['RRI'].fillna(method='ffill', inplace=True)


Patient 6: 00000729-A5BS17960_norm.csv
i: 200000
i: 400000
i: 600000
i: 800000
i: 1000000
i: 1200000
i: 1400000
i: 1600000
i: 1800000
i: 2000000
i: 2200000
i: 2400000
i: 2600000
i: 2800000
i: 3000000
i: 3200000
i: 3400000
i: 3600000
i: 3800000
i: 4000000
i: 4200000
i: 4400000
i: 4600000
i: 4800000
i: 5000000
i: 5200000
i: 5400000


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


1.27


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_sliding['RRI'].fillna(method='ffill', inplace=True)
  df_sliding['RRI'].fillna(method='ffill', inplace=True)


Patient 7: 00000730-A5BS17960_norm.csv
i: 200000
i: 400000
i: 600000
i: 800000
i: 1000000
i: 1200000
i: 1400000
i: 1600000
i: 1800000
i: 2000000
i: 2200000
i: 2400000
i: 2600000
i: 2800000
i: 3000000
i: 3200000
i: 3400000
i: 3600000
i: 3800000
i: 4000000
i: 4200000
i: 4400000
i: 4600000
i: 4800000
i: 5000000
i: 5200000


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  out["MeanNN"] = np.nanmean(rri)
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  mrrs /= th2
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  out["RMSSD"] = np.sqrt(np.nanmean(diff_rri**2))
  power /= np.max(power)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  out["PSS"] = np.sum(np.asarray(lengths) < 3) / len(lengths)
  out["PAS"] = np.sum(np.asarray(lengths) >= 4) / len(lengths)
  return _methods._mean(a, axis=axis, dtype=dtype,
  out["GI"] = (num_GI / den_GI) * 100
  out["SI"] = (num_SI / den_SI) * 100
  out["A

112.66


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_sliding['RRI'].fillna(method='ffill', inplace=True)
  df_sliding['RRI'].fillna(method='ffill', inplace=True)


Patient 8: 00000732-A5BS17960_norm.csv
i: 200000
i: 400000
i: 600000
i: 800000
i: 1000000
i: 1200000
i: 1400000
i: 1600000
i: 1800000
i: 2000000
i: 2200000
i: 2400000
i: 2600000
i: 2800000
i: 3000000
i: 3200000
i: 3400000
i: 3600000
i: 3800000
i: 4000000
i: 4200000
i: 4400000
i: 4600000


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn

1.06


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_sliding['RRI'].fillna(method='ffill', inplace=True)
  df_sliding['RRI'].fillna(method='ffill', inplace=True)


In [49]:
f1 = pd.read_csv("C:/Users/eleon/Desktop/SDAP/backend/src/data_brazil/Patient 1/p1_features.csv")
f1_old = pd.read_csv("C:/Users/eleon/Desktop/SDAP/backend/src/data_brazil/Patient 1/p1_features_old.csv")

In [62]:
f1[f1["y"] == 1]

Unnamed: 0.1,Unnamed: 0,start_time,end_time,std_mr,std_ml,var_mr,var_ml,rms_mr,rms_ml,mav_mr,...,HRV_sdnn,HRV_min,HRV_max,HRV_vlf,HRV_vhf,HRV_lf,HRV_hf,HRV_lf_hf,RRI,y
5650,5650,2825.0,2826.0,7.616716,5.038503,58.014361,25.386513,51.235531,50.452281,50.666214,...,25.644200,695.0,815.0,,0.000133,0.031369,0.002309,13.586795,720.0,1
5651,5651,2825.5,2826.5,14.797717,10.575846,218.972442,111.848517,52.739731,51.325277,50.621209,...,25.644200,695.0,815.0,,0.000133,0.031369,0.002309,13.586795,720.0,1
5972,5972,2986.0,2987.0,7.331114,5.451662,53.745237,29.720623,51.109763,50.447267,50.581248,...,43.601438,650.0,810.0,,0.000062,0.008396,0.000928,9.051683,760.0,1
5973,5973,2986.5,2987.5,6.779413,4.790907,45.960444,22.952787,51.122033,50.499793,50.670523,...,43.601438,650.0,810.0,,0.000062,0.008396,0.000928,9.051683,765.0,1
5974,5974,2987.0,2988.0,8.022305,5.413796,64.357385,29.309188,51.253054,50.490376,50.621322,...,43.601438,650.0,810.0,,0.000062,0.008396,0.000928,9.051683,760.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51815,51815,25907.5,25908.5,5.322200,4.293442,28.325808,18.433647,50.905326,50.404880,50.626341,...,82.058789,665.0,960.0,,0.000011,0.002928,0.000158,18.537601,875.0,1
51816,51816,25908.0,25909.0,4.084382,2.864632,16.682176,8.206114,50.738327,50.294772,50.573666,...,82.058789,665.0,960.0,,0.000011,0.002928,0.000158,18.537601,870.0,1
51817,51817,25908.5,25909.5,5.870017,3.180872,34.457101,10.117947,50.848996,50.264762,50.509042,...,82.058789,665.0,960.0,,0.000011,0.002928,0.000158,18.537601,845.0,1
51818,51818,25909.0,25910.0,8.564815,5.184637,73.356058,26.880460,51.298057,50.464536,50.578005,...,82.058789,665.0,960.0,,0.000011,0.002928,0.000158,18.537601,845.0,1


In [51]:
f1_old['RRI']

0         759.5
1         759.5
2         759.5
3         759.5
4         759.5
          ...  
52285    8657.5
52286    8657.5
52287    8657.5
52288    8657.5
52289    8657.5
Name: RRI, Length: 52290, dtype: float64