# Loading the data and exploring its shape and values



# 1. библиотеки

In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
from sklearn.utils import resample
import itertools

import pywt
try:
    import pathlib
except ImportError:
    import pathlib2 as pathlib
import scipy.signal as signal

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import f1_score

import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow import keras
from tensorflow.keras.layers import Dense, Conv1D, MaxPool1D, Flatten, Dropout, InputLayer, LSTM, GRU, BatchNormalization, Bidirectional, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.optimizers import SGD, RMSprop
from tensorflow.keras.utils import to_categorical

# 2. вспомогательные функции
## 2.1 класс детекторов для нахождения пик

In [None]:
#класс детекторов дл нахождения пик
class Detectors:
    """ECG heartbeat detection algorithms
    General useage instructions:
    r_peaks = detectors.the_detector(ecg_in_samples)
    The argument ecg_in_samples is a single channel ECG in volt
    at the given sample rate.
    """
    
    def __init__(self, sampling_frequency):
        """
        The constructor takes the sampling rate in Hz of the ECG data.
        """

        self.fs = sampling_frequency
        # this is set to a positive value for benchmarking
        self.engzee_fake_delay = 0

    def hamilton_detector(self, unfiltered_ecg):
        """
        P.S. Hamilton, 
        Open Source ECG Analysis Software Documentation, E.P.Limited, 2002.
        """
        
        f1 = 8/self.fs
        f2 = 16/self.fs

        b, a = signal.butter(1, [f1*2, f2*2], btype='bandpass')

        filtered_ecg = signal.lfilter(b, a, unfiltered_ecg)

        diff = abs(np.diff(filtered_ecg))

        b = np.ones(int(0.08*self.fs))
        b = b/int(0.08*self.fs)
        a = [1]

        ma = signal.lfilter(b, a, diff)

        ma[0:len(b)*2] = 0

        n_pks = []
        n_pks_ave = 0.0
        s_pks = []
        s_pks_ave = 0.0
        QRS = [0]
        RR = []
        RR_ave = 0.0

        th = 0.0

        i=0
        idx = []
        peaks = []  

        for i in range(len(ma)):

            if i>0 and i<len(ma)-1:
                if ma[i-1]<ma[i] and ma[i+1]<ma[i]:
                    peak = i
                    peaks.append(i)

                    if ma[peak] > th and (peak-QRS[-1])>0.3*self.fs:        
                        QRS.append(peak)
                        idx.append(i)
                        s_pks.append(ma[peak])
                        if len(n_pks)>8:
                            s_pks.pop(0)
                        s_pks_ave = np.mean(s_pks)

                        if RR_ave != 0.0:
                            if QRS[-1]-QRS[-2] > 1.5*RR_ave:
                                missed_peaks = peaks[idx[-2]+1:idx[-1]]
                                for missed_peak in missed_peaks:
                                    if missed_peak-peaks[idx[-2]]>int(0.360*self.fs) and ma[missed_peak]>0.5*th:
                                        QRS.append(missed_peak)
                                        QRS.sort()
                                        break

                        if len(QRS)>2:
                            RR.append(QRS[-1]-QRS[-2])
                            if len(RR)>8:
                                RR.pop(0)
                            RR_ave = int(np.mean(RR))

                    else:
                        n_pks.append(ma[peak])
                        if len(n_pks)>8:
                            n_pks.pop(0)
                        n_pks_ave = np.mean(n_pks)

                    th = n_pks_ave + 0.45*(s_pks_ave-n_pks_ave)

                    i+=1

        QRS.pop(0)

        return QRS

    
    def christov_detector(self, unfiltered_ecg):
        """
        Ivaylo I. Christov, 
        Real time electrocardiogram QRS detection using combined 
        adaptive threshold, BioMedical Engineering OnLine 2004, 
        vol. 3:28, 2004.
        """
        total_taps = 0

        b = np.ones(int(0.02*self.fs))
        b = b/int(0.02*self.fs)
        total_taps += len(b)
        a = [1]

        MA1 = signal.lfilter(b, a, unfiltered_ecg)

        b = np.ones(int(0.028*self.fs))
        b = b/int(0.028*self.fs)
        total_taps += len(b)
        a = [1]

        MA2 = signal.lfilter(b, a, MA1)

        Y = []
        for i in range(1, len(MA2)-1):
            
            diff = abs(MA2[i+1]-MA2[i-1])

            Y.append(diff)

        b = np.ones(int(0.040*self.fs))
        b = b/int(0.040*self.fs)
        total_taps += len(b)
        a = [1]

        MA3 = signal.lfilter(b, a, Y)

        MA3[0:total_taps] = 0

        ms50 = int(0.05*self.fs)
        ms200 = int(0.2*self.fs)
        ms1200 = int(1.2*self.fs)
        ms350 = int(0.35*self.fs)

        M = 0
        newM5 = 0
        M_list = []
        MM = []
        M_slope = np.linspace(1.0, 0.6, ms1200-ms200)
        F = 0
        F_list = []
        R = 0
        RR = []
        Rm = 0
        R_list = []

        MFR = 0
        MFR_list = []

        QRS = []

        for i in range(len(MA3)):

            # M
            if i < 5*self.fs:
                M = 0.6*np.max(MA3[:i+1])
                MM.append(M)
                if len(MM)>5:
                    MM.pop(0)

            elif QRS and i < QRS[-1]+ms200:
                newM5 = 0.6*np.max(MA3[QRS[-1]:i])
                if newM5>1.5*MM[-1]:
                    newM5 = 1.1*MM[-1]

            elif QRS and i == QRS[-1]+ms200:
                if newM5==0:
                    newM5 = MM[-1]
                MM.append(newM5)
                if len(MM)>5:
                    MM.pop(0)    
                M = np.mean(MM)    
            
            elif QRS and i > QRS[-1]+ms200 and i < QRS[-1]+ms1200:

                M = np.mean(MM)*M_slope[i-(QRS[-1]+ms200)]

            elif QRS and i > QRS[-1]+ms1200:
                M = 0.6*np.mean(MM)

            # F
            if i > ms350:
                F_section = MA3[i-ms350:i]
                max_latest = np.max(F_section[-ms50:])
                max_earliest = np.max(F_section[:ms50])
                F = F + ((max_latest-max_earliest)/150.0)

            # R
            if QRS and i < QRS[-1]+int((2.0/3.0*Rm)):

                R = 0

            elif QRS and i > QRS[-1]+int((2.0/3.0*Rm)) and i < QRS[-1]+Rm:

                dec = (M-np.mean(MM))/1.4
                R = 0 + dec


            MFR = M+F+R
            M_list.append(M)
            F_list.append(F)
            R_list.append(R)
            MFR_list.append(MFR)

            if not QRS and MA3[i]>MFR:
                QRS.append(i)
            
            elif QRS and i > QRS[-1]+ms200 and MA3[i]>MFR:
                QRS.append(i)
                if len(QRS)>2:
                    RR.append(QRS[-1]-QRS[-2])
                    if len(RR)>5:
                        RR.pop(0)
                    Rm = int(np.mean(RR))

        QRS.pop(0)
        
        return QRS

    
    def engzee_detector(self, unfiltered_ecg):
        """
        C. Zeelenberg, A single scan algorithm for QRS detection and
        feature extraction, IEEE Comp. in Cardiology, vol. 6,
        pp. 37-42, 1979 with modifications A. Lourenco, H. Silva,
        P. Leite, R. Lourenco and A. Fred, “Real Time
        Electrocardiogram Segmentation for Finger Based ECG
        Biometrics”, BIOSIGNALS 2012, pp. 49-54, 2012.
        """
                
        f1 = 48/self.fs
        f2 = 52/self.fs
        b, a = signal.butter(4, [f1*2, f2*2], btype='bandstop')
        filtered_ecg = signal.lfilter(b, a, unfiltered_ecg)

        diff = np.zeros(len(filtered_ecg))
        for i in range(4, len(diff)):
            diff[i] = filtered_ecg[i]-filtered_ecg[i-4]

        ci = [1,4,6,4,1]        
        low_pass = signal.lfilter(ci, 1, diff)

        low_pass[:int(0.2*self.fs)] = 0
      
        ms200 = int(0.2*self.fs)
        ms1200 = int(1.2*self.fs)        
        ms160 = int(0.16*self.fs)
        neg_threshold = int(0.01*self.fs)

        M = 0
        M_list = []
        neg_m = []
        MM = []
        M_slope = np.linspace(1.0, 0.6, ms1200-ms200)

        QRS = []
        r_peaks = []

        counter = 0

        thi_list = []
        thi = False
        thf_list = []
        thf = False

        for i in range(len(low_pass)):

            # M
            if i < 5*self.fs:
                M = 0.6*np.max(low_pass[:i+1])
                MM.append(M)
                if len(MM)>5:
                    MM.pop(0)

            elif QRS and i < QRS[-1]+ms200:

                newM5 = 0.6*np.max(low_pass[QRS[-1]:i])

                if newM5>1.5*MM[-1]:
                    newM5 = 1.1*MM[-1]

            elif QRS and i == QRS[-1]+ms200:
                MM.append(newM5)
                if len(MM)>5:
                    MM.pop(0)    
                M = np.mean(MM)    
            
            elif QRS and i > QRS[-1]+ms200 and i < QRS[-1]+ms1200:

                M = np.mean(MM)*M_slope[i-(QRS[-1]+ms200)]

            elif QRS and i > QRS[-1]+ms1200:
                M = 0.6*np.mean(MM)

            M_list.append(M)
            neg_m.append(-M)


            if not QRS and low_pass[i]>M:
                QRS.append(i)
                thi_list.append(i)
                thi = True
            
            elif QRS and i > QRS[-1]+ms200 and low_pass[i]>M:
                QRS.append(i)
                thi_list.append(i)
                thi = True

            if thi and i<thi_list[-1]+ms160:
                if low_pass[i]<-M and low_pass[i-1]>-M:
                    #thf_list.append(i)
                    thf = True
                    
                if thf and low_pass[i]<-M:
                    thf_list.append(i)
                    counter += 1
                
                elif low_pass[i]>-M and thf:
                    counter = 0
                    thi = False
                    thf = False
            
            elif thi and i>thi_list[-1]+ms160:
                    counter = 0
                    thi = False
                    thf = False                                        
            
            if counter>neg_threshold:
                unfiltered_section = unfiltered_ecg[thi_list[-1]-int(0.01*self.fs):i]
                r_peaks.append(self.engzee_fake_delay+
                               np.argmax(unfiltered_section)+thi_list[-1]-int(0.01*self.fs))
                counter = 0
                thi = False
                thf = False

        return r_peaks

    
    def matched_filter_detector(self, unfiltered_ecg, template_file = ""):
        """
        FIR matched filter using template of QRS complex.
        Template provided for 250Hz and 360Hz. Optionally provide your
        own template file where every line has one sample.
        Uses the Pan and Tompkins thresholding method.
        """
        current_dir = pathlib.Path(__file__).resolve()

        if len(template_file) > 1:
            template = np.loadtxt(template_file)
        else:
            if self.fs == 250:
                template_dir = current_dir.parent/'templates'/'template_250hz.csv'
                template = np.loadtxt(template_dir)
            elif self.fs == 360:
                template_dir = current_dir.parent/'templates'/'template_360hz.csv'
                template = np.loadtxt(template_dir)
            else:
                print('\n!!No template for this frequency!!\n')
                return False

        f0 = 0.1/self.fs
        f1 = 48/self.fs

        b, a = signal.butter(4, [f0*2, f1*2], btype='bandpass')

        prefiltered_ecg = signal.lfilter(b, a, unfiltered_ecg)

        matched_coeffs = template[::-1]  #time reversing template

        detection = signal.lfilter(matched_coeffs, 1, prefiltered_ecg)  # matched filter FIR filtering
        squared = detection*detection  # squaring matched filter output
        squared[:len(template)] = 0

        squared_peaks = panPeakDetect(squared, self.fs)
  
        return squared_peaks

    
    def swt_detector(self, unfiltered_ecg):
        """
        Stationary Wavelet Transform 
        based on Vignesh Kalidas and Lakshman Tamil. 
        Real-time QRS detector using Stationary Wavelet Transform 
        for Automated ECG Analysis. 
        In: 2017 IEEE 17th International Conference on 
        Bioinformatics and Bioengineering (BIBE). 
        Uses the Pan and Tompkins thresolding.
        """
        
        swt_level=3
        padding = -1
        for i in range(1000):
            if (len(unfiltered_ecg)+i)%2**swt_level == 0:
                padding = i
                break

        if padding > 0:
            unfiltered_ecg = np.pad(unfiltered_ecg, (0, padding), 'edge')
        elif padding == -1:
            print("Padding greater than 1000 required\n")    

        swt_ecg = pywt.swt(unfiltered_ecg, 'db3', level=swt_level)
        swt_ecg = np.array(swt_ecg)
        swt_ecg = swt_ecg[0, 1, :]

        squared = swt_ecg*swt_ecg

        f1 = 0.01/self.fs
        f2 = 10/self.fs

        b, a = signal.butter(3, [f1*2, f2*2], btype='bandpass')
        filtered_squared = signal.lfilter(b, a, squared)       

        filt_peaks = panPeakDetect(filtered_squared, self.fs)
        
        return filt_peaks


    def pan_tompkins_detector(self, unfiltered_ecg):
        """
        Jiapu Pan and Willis J. Tompkins.
        A Real-Time QRS Detection Algorithm. 
        In: IEEE Transactions on Biomedical Engineering 
        BME-32.3 (1985), pp. 230–236.
        """
        
        f1 = 5/self.fs
        f2 = 15/self.fs

        b, a = signal.butter(1, [f1*2, f2*2], btype='bandpass')

        filtered_ecg = signal.lfilter(b, a, unfiltered_ecg)        

        diff = np.diff(filtered_ecg) 

        squared = diff*diff

        N = int(0.12*self.fs)
        mwa = MWA(squared, N)
        mwa[:int(0.2*self.fs)] = 0

        mwa_peaks = panPeakDetect(mwa, self.fs)

        return mwa_peaks


    def two_average_detector(self, unfiltered_ecg):
        """
        Elgendi, Mohamed & Jonkman, 
        Mirjam & De Boer, Friso. (2010).
        Frequency Bands Effects on QRS Detection.
        The 3rd International Conference on Bio-inspired Systems 
        and Signal Processing (BIOSIGNALS2010). 428-431.
        """
        
        f1 = 8/self.fs
        f2 = 20/self.fs

        b, a = signal.butter(2, [f1*2, f2*2], btype='bandpass')

        filtered_ecg = signal.lfilter(b, a, unfiltered_ecg)

        window1 = int(0.12*self.fs)
        mwa_qrs = MWA(abs(filtered_ecg), window1)

        window2 = int(0.6*self.fs)
        mwa_beat = MWA(abs(filtered_ecg), window2)

        blocks = np.zeros(len(unfiltered_ecg))
        block_height = np.max(filtered_ecg)

        for i in range(len(mwa_qrs)):
            if mwa_qrs[i] > mwa_beat[i]:
                blocks[i] = block_height
            else:
                blocks[i] = 0

        QRS = []

        for i in range(1, len(blocks)):
            if blocks[i-1] == 0 and blocks[i] == block_height:
                start = i
            
            elif blocks[i-1] == block_height and blocks[i] == 0:
                end = i-1

                if end-start>int(0.08*self.fs):
                    detection = np.argmax(filtered_ecg[start:end+1])+start
                    if QRS:
                        if detection-QRS[-1]>int(0.3*self.fs):
                            QRS.append(detection)
                    else:
                        QRS.append(detection)

        return QRS


def MWA(input_array, window_size):

    mwa = np.zeros(len(input_array))
    for i in range(len(input_array)):
        if i < window_size:
            section = input_array[0:i]
        else:
            section = input_array[i-window_size:i]
        
        if i!=0:
            mwa[i] = np.mean(section)
        else:
            mwa[i] = input_array[i]

    return mwa


def normalise(input_array):

    output_array = (input_array-np.min(input_array))/(np.max(input_array)-np.min(input_array))

    return output_array


def panPeakDetect(detection, fs):    

    min_distance = int(0.25*fs)

    signal_peaks = [0]
    noise_peaks = []

    SPKI = 0.0
    NPKI = 0.0

    threshold_I1 = 0.0
    threshold_I2 = 0.0

    RR_missed = 0
    index = 0
    indexes = []

    missed_peaks = []
    peaks = []

    for i in range(len(detection)):

        if i>0 and i<len(detection)-1:
            if detection[i-1]<detection[i] and detection[i+1]<detection[i]:
                peak = i
                peaks.append(i)

                if detection[peak]>threshold_I1 and (peak-signal_peaks[-1])>0.3*fs:
                        
                    signal_peaks.append(peak)
                    indexes.append(index)
                    SPKI = 0.125*detection[signal_peaks[-1]] + 0.875*SPKI
                    if RR_missed!=0:
                        if signal_peaks[-1]-signal_peaks[-2]>RR_missed:
                            missed_section_peaks = peaks[indexes[-2]+1:indexes[-1]]
                            missed_section_peaks2 = []
                            for missed_peak in missed_section_peaks:
                                if missed_peak-signal_peaks[-2]>min_distance and signal_peaks[-1]-missed_peak>min_distance and detection[missed_peak]>threshold_I2:
                                    missed_section_peaks2.append(missed_peak)

                            if len(missed_section_peaks2)>0:           
                                missed_peak = missed_section_peaks2[np.argmax(detection[missed_section_peaks2])]
                                missed_peaks.append(missed_peak)
                                signal_peaks.append(signal_peaks[-1])
                                signal_peaks[-2] = missed_peak   

                else:
                    noise_peaks.append(peak)
                    NPKI = 0.125*detection[noise_peaks[-1]] + 0.875*NPKI

                threshold_I1 = NPKI + 0.25*(SPKI-NPKI)
                threshold_I2 = 0.5*threshold_I1

                if len(signal_peaks)>8:
                    RR = np.diff(signal_peaks[-9:])
                    RR_ave = int(np.mean(RR))
                    RR_missed = int(1.66*RR_ave)

                index = index+1      
    
    signal_peaks.pop(0)

    return signal_peaks

## 2.2 функция выранного детектора

In [None]:
def detect(unfiltered_ecg, fs=250):
    detectors = Detectors(fs)
    r_peaks = []
    # r_peaks_two_average = detectors.two_average_detector(unfiltered_ecg)
    #r_peaks = detectors.matched_filter_detector(unfiltered_ecg,"templates/template_250hz.csv")
    # r_peaks_swt = detectors.swt_detector(unfiltered_ecg)
    # r_peaks_engzee = detectors.engzee_detector(unfiltered_ecg)
    # r_peaks_christ = detectors.christov_detector(unfiltered_ecg)
    # r_peaks_ham = detectors.hamilton_detector(unfiltered_ecg)
    r_peaks = detectors.pan_tompkins_detector(unfiltered_ecg)
    # r_peaks.append(sum(r_peaks_two_average, r_peaks_swt, r_peaks_engzee, \
    #               r_peaks_christ, r_peaks_ham, r_peaks_pan_tom)/(r_peaks_two_average, \
    #               r_peaks_swt, r_peaks_engzee, \
    #               r_peaks_christ, r_peaks_ham, r_peaks_pan_tom).count())

    return r_peaks
def visualize(unfiltered_ecg, r_peaks):
    plt.figure()
    plt.plot(unfiltered_ecg)
    plt.plot(r_peaks, unfiltered_ecg[r_peaks], 'ro')
    plt.title('Detected R-peaks')

    plt.show()

# 3. Загрузка подготовленных датасетов

In [None]:
data_test = pd.read_csv('../input/test-ecg/new_test_df.csv')
data_train = pd.read_csv('../input/train-ecg/new_train_df.csv')
data_valid = pd.read_csv('../input/valid-ecg/new_valid_df.csv')

In [None]:
# тренировочная, тестовая и валидационная выборки X
X_cols = []
for col in data_train.columns.tolist():
    if 'channel' in col:
        X_cols.append(col)
train_ptb = data_train[X_cols] 


In [None]:
test_ptb = data_test[X_cols] 

In [None]:
valid_ptb = data_valid[X_cols] 

In [None]:
# таргеты будем выбирать по очереди
y_variables = data_train.columns.tolist()[-5:]

In [None]:
# тренировочная выборка y - синусовый ритм
out_train_ptb = data_train[y_variables[0]]


In [None]:
out_test_ptb = data_test[y_variables[0]]


In [None]:
out_valid_ptb = data_valid[y_variables[0]]


In [None]:
#Normalizing the training, validation & test data 
# train_ptb = normalize(train_ptb, axis=0, norm='max')
valid_ptb = normalize(valid_ptb, axis=0, norm='max')
test_ptb = normalize(test_ptb, axis=0, norm='max')

In [None]:
# Reshaping the dataframe into a 3-D Numpy array (batch, Time Period, Value)
# x_train_ptb = train_ptb.reshape(len(train_ptb),train_ptb.shape[1],1)
x_valid_ptb = valid_ptb.reshape(len(valid_ptb),valid_ptb.shape[1],1)
x_test_ptb = test_ptb.reshape(len(test_ptb),test_ptb.shape[1],1)

# Converting the output into a categorical array
# y_train_ptb = to_categorical(out_train_ptb)
y_valid_ptb = to_categorical(out_valid_ptb)
y_test_ptb = to_categorical(out_test_ptb)

# 4. Выбор таргета

## 4.1 Таргет для модели - 1

## 4.2 Таргет для модели - 2 по конкретному ЭКГ

## 5.1 Датафрейм с конкретным ЭКГ по указанным датасету и номеру ecg_id

In [None]:
# функция, возвращающая фрейм только по указанному экг
def get_unfiltered_ecg(df, ecg_id, cut_target_data=True):

    ecg_unique = df[df['ecg_id']==ecg_id]
    
            
    return ecg_unique[ecg_unique.columns[1:-5]] if cut_target_data else ecg_unique

## 5.2 Формируем датасет с пиками поканально по указанным датасету и ЭКГ

In [None]:
# функция, возвращающая словарь из пиков к конкретному экг
def get_peaks(df, ecg_id):
    list_of_peaks = []
    num_channels = []
    ecg_unique = get_unfiltered_ecg(df, ecg_id)
    ecg_data = ecg_unique.to_numpy()

# подавать несколько каналов (пики попеременно)
    for num_channel in range(ecg_data.shape[1]):
        unfiltered_ecg = ecg_data[:,num_channel]
        r_peaks = detect(unfiltered_ecg, 100)
        # visualize(unfiltered_ecg, r_peaks)
        list_of_peaks.append(r_peaks)
        num_channels.append(f'channel-{num_channel}')

        dict_peaks = dict(list(zip(num_channels, list_of_peaks)))
#     dict_peaks_list[ecg_id_data] = dict_peaks

#     peaks_data = pd.DataFrame(dict_peaks_list).T
    return dict_peaks

# функция, возвращающая фрейм из пиков для дальнейшего слияния с общими данными по этому экг
def pad_nan_peaks(dict_peaks, pad_value = 9999):
    len_max = 0
    for channel, list_peaks in dict_peaks.items():
        if len_max < len(list_peaks):
            len_max = len(list_peaks)
    
    for channel, list_peaks in dict_peaks.items():
        dict_peaks[channel] = np.pad(list_peaks, (0, len_max-len(list_peaks)), 'constant', 
                 constant_values=pad_value)
        
    peaks_df = pd.DataFrame(dict_peaks)
        
    return peaks_df

## 5.3 Формируем датасет с неотфильтрованными данными и пиками поканально по указанным датасету и ЭКГ

In [None]:
def merge_peaks_to_data(df, ecg_id):
    ecg_unique = get_unfiltered_ecg(df, ecg_id)
    peaks_df = pad_nan_peaks(get_peaks(df, ecg_id))
    df_with_peaks = pd.concat([ecg_unique, peaks_df], ignore_index=True)
    df_with_peaks['ecg_id'] = ecg_id
    return df_with_peaks

In [None]:
def df_with_peaks(df):
    labels = []
    for ecg_id in df['ecg_id'].unique():
        if ecg_id == min(df['ecg_id'].unique()):
            df_with_peaks = merge_peaks_to_data(df, ecg_id)
            label = np.pad([], (0, df_with_peaks.shape[0]), 'constant', 
                 constant_values=ecg_id)
            labels.append(label)

        else:
            df_with_peaks = pd.concat([df_with_peaks, merge_peaks_to_data(df, ecg_id)])
            
            label = np.pad([], (0, merge_peaks_to_data(df, ecg_id).shape[0]), 'constant', 
                 constant_values=ecg_id)
            labels.append(label)
            
    return df_with_peaks


In [None]:
# если y с пиками и надо определить длину таргета
def find_y(df_with_peaks, start_df):
    ecg_id = df_with_peaks['ecg_id'][0]
    y_value = start_df['sinus_rythm'].tolist()[0]
    df_with_peaks['target'] = y_value
    return df_with_peaks['target']

In [None]:
find_y(merge_peaks_to_data(data_train[:2000], 1), get_unfiltered_ecg(data_train[:2000], 1, False))

In [None]:
df_with_peaks = merge_peaks_to_data(data_valid, 17)
start_df = get_unfiltered_ecg(data_valid, 17, False)
# y_valid_ptb = find_y(df_with_peaks, get_unfiltered_ecg(data_valid, 17, False))
ecg_id = df_with_peaks['ecg_id'][0]
start_df['sinus_rythm'].tolist()[0]
# df_with_peaks['target'] = y_value

In [None]:
# достать сразу пики с основными с датасетами для входа в модель (тяжелые, не получается)

# def X_Y_valid(data_valid):
#     y_valid_list = []
#     x_valid_list = []
#     for ecg_id in data_valid['ecg_id'].unique():
#         df_with_peaks = merge_peaks_to_data(data_valid, ecg_id)
#         y_valid_ptb = find_y(df_with_peaks, get_unfiltered_ecg(data_valid, ecg_id, False))
#         valid_ptb = normalize(df_with_peaks, axis=0, norm='max')
#         x_valid_ptb = valid_ptb.reshape(len(valid_ptb),valid_ptb.shape[1],1)
#         y_valid_list.append(y_valid_ptb)
#         x_valid_list.append(x_valid_ptb)

#         y_valid_ptb = list(itertools.chain(*y_valid_list))
#         x_valid_ptb = list(itertools.chain(*x_valid_list))

#     return y_valid_list, x_valid_list
# y_valid_list, x_valid_list = X_Y_valid(data_valid)

# data_train.shape[1]

In [None]:
tf.keras.backend.clear_session()

def build_conv1d_model(input_shape):
    model = keras.models.Sequential()
    
    model.add(Conv1D(32,7, padding='same', input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(tf.keras.layers.ReLU())
    model.add(MaxPool1D(5,padding='same'))

    model.add(Conv1D(64,7, padding='same'))
    model.add(BatchNormalization())
    model.add(tf.keras.layers.ReLU())
    model.add(MaxPool1D(5,padding='same'))

    model.add(Conv1D(128,7, padding='same', input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(tf.keras.layers.ReLU())
    model.add(MaxPool1D(5,padding='same'))

    model.add(Conv1D(256,7, padding='same'))
    model.add(BatchNormalization())
    model.add(tf.keras.layers.ReLU())
    model.add(MaxPool1D(5,padding='same'))

    model.add(Conv1D(512,7, padding='same', input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(tf.keras.layers.ReLU())
    model.add(MaxPool1D(5,padding='same'))

    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(2, activation="softmax"))
    model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=[tfa.metrics.F1Score(2,"micro")])
    return model

def summary_model():
    

    model_conv1d_ptb= build_conv1d_model(input_shape=(12, 1))
    model_conv1d_ptb.summary()
    return model_conv1d_ptb



In [None]:
def df_X_y_from_ecg_id(data_train, x_valid_ptb, y_valid_ptb):
#     checkpoint_cb = ModelCheckpoint("conv1d_ptb.h5", save_best_only=True)
#     earlystop_cb = EarlyStopping(patience=5, restore_best_weights=True)
#     model_conv1d_ptb = summary_model()
    history_conv1d_ptb = None
    
    for ecg_id in data_train['ecg_id'].unique():
        
        df_with_peaks = merge_peaks_to_data(data_train, ecg_id)
        y_train_ptb = find_y(df_with_peaks, get_unfiltered_ecg(data_train, ecg_id, False))
        train_ptb = normalize(df_with_peaks, axis=0, norm='max')
        x_train_ptb = train_ptb.reshape(len(train_ptb),train_ptb.shape[1],1)
        
        history_conv1d_ptb = (model_conv1d_ptb, checkpoint_cb, earlystop_cb,
                              x_train_ptb, y_train_ptb, x_valid_ptb, y_valid_ptb)
        print(ecg_id,sep='',end="\r",flush=True)

    return model_conv1d_ptb


In [None]:

model_conv1d_ptb = df_X_y_from_ecg_id(data_train[:10000], x_valid_ptb, y_valid_ptb)
# model_conv1d_ptb.load_weights("conv1d_ptb.h5")
# model_conv1d_ptb.evaluate(x_test_ptb,y_test_ptb)

In [None]:
model_conv1d_ptb.load_weights("conv1d_ptb.h5")
model_conv1d_ptb.evaluate(x_test_ptb,y_test_ptb)

In [None]:

checkpoint_cb = ModelCheckpoint("conv1d_ptb.h5", save_best_only=True)
earlystop_cb = EarlyStopping(patience=5, restore_best_weights=True)
model_conv1d_ptb = summary_model()

# checkpoint_cb = ModelCheckpoint("conv1d_ptb.h5", save_best_only=True)
# earlystop_cb = EarlyStopping(patience=5, restore_best_weights=True)
# model_conv1d_ptb= build_conv1d_model(input_shape=(x_train_ptb.shape[1], x_train_ptb.shape[2]))
# model_conv1d_ptb.summary()

In [None]:
df_with_peaks(data_train[:2000])


## Defining Conv1D model for PTB

Creating a model based on a series of Conv1D layers that are connected to another series of full connected dense layers

In [None]:
tf.keras.backend.clear_session()

#Function to build Convolutional 1D Networks
# def build_conv1d_model_old (input_shape=(x_train_ptb.shape[1],1)):
#     model = keras.models.Sequential()
    
#     model.add(Conv1D(32,7, padding='same', input_shape=input_shape))
#     model.add(BatchNormalization())
#     model.add(tf.keras.layers.ReLU())
#     model.add(MaxPool1D(5,padding='same'))

#     model.add(Conv1D(64,7, padding='same'))
#     model.add(BatchNormalization())
#     model.add(tf.keras.layers.ReLU())
#     model.add(MaxPool1D(5,padding='same'))

#     model.add(Conv1D(128,7, padding='same', input_shape=input_shape))
#     model.add(BatchNormalization())
#     model.add(tf.keras.layers.ReLU())
#     model.add(MaxPool1D(5,padding='same'))

#     model.add(Conv1D(256,7, padding='same'))
#     model.add(BatchNormalization())
#     model.add(tf.keras.layers.ReLU())
#     model.add(MaxPool1D(5,padding='same'))

#     model.add(Conv1D(512,7, padding='same', input_shape=input_shape))
#     model.add(BatchNormalization())
#     model.add(tf.keras.layers.ReLU())
#     model.add(MaxPool1D(5,padding='same'))

#     model.add(Flatten())
#     model.add(Dense(512, activation='relu'))
#     model.add(Dropout(0.5))
#     model.add(Dense(256, activation='relu'))
#     model.add(Dropout(0.5))
#     model.add(Dense(128, activation='relu'))
#     model.add(Dense(64, activation='relu'))
#     model.add(Dense(32, activation='relu'))
#     model.add(Dense(2, activation="softmax"))
#     model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=[tfa.metrics.F1Score(2,"micro")])
#     return model
def build_conv1d_model(input_shape):
    model = keras.models.Sequential()
    
    model.add(Conv1D(32,7, padding='same', input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(tf.keras.layers.ReLU())
    model.add(MaxPool1D(5,padding='same'))

    model.add(Conv1D(64,7, padding='same'))
    model.add(BatchNormalization())
    model.add(tf.keras.layers.ReLU())
    model.add(MaxPool1D(5,padding='same'))

    model.add(Conv1D(128,7, padding='same', input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(tf.keras.layers.ReLU())
    model.add(MaxPool1D(5,padding='same'))

    model.add(Conv1D(256,7, padding='same'))
    model.add(BatchNormalization())
    model.add(tf.keras.layers.ReLU())
    model.add(MaxPool1D(5,padding='same'))

    model.add(Conv1D(512,7, padding='same', input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(tf.keras.layers.ReLU())
    model.add(MaxPool1D(5,padding='same'))

    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(2, activation="softmax"))
    model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=[tfa.metrics.F1Score(2,"micro")])
    return model

In [None]:
# checkpoint_cb = ModelCheckpoint("conv1d_ptb.h5", save_best_only=True)

# earlystop_cb = EarlyStopping(patience=5, restore_best_weights=True)

# model_conv1d_ptb= build_conv1d_model(input_shape=(x_train_ptb.shape[1], x_train_ptb.shape[2]))
# model_conv1d_ptb.summary()
def summary_model(x_train_ptb):
    checkpoint_cb = ModelCheckpoint("conv1d_ptb.h5", save_best_only=True)

    earlystop_cb = EarlyStopping(patience=5, restore_best_weights=True)

    model_conv1d_ptb= build_conv1d_model(input_shape=(x_train_ptb.shape[1], x_train_ptb.shape[2]))
    return model_conv1d_ptb.summary()
    

In [None]:
def history_model(model_conv1d_ptb, checkpoint_cb, earlystop_cb,
                  x_train_ptb, y_train_ptb, x_valid_ptb, y_valid_ptb):
    history_conv1d_ptb = model_conv1d_ptb.fit(x_train_ptb, y_train_ptb,
                                                  epochs=1, batch_size=32, 
                                                  steps_per_epoch = 100 ,
                                                  validation_data=(x_valid_ptb, y_valid_ptb), 
                                              validation_steps = 50,
                                                  callbacks=[checkpoint_cb, earlystop_cb]
                                             )
    return history_conv1d_ptb

In [None]:
# for i in list(range(2)):
#     history = model.fit(training_set_1,epochs=1)
#     history = model.fit(training_set_2,epochs=1)
history_conv1d_ptb = model_conv1d_ptb.fit(x_train_ptb, y_train_ptb,
                                              epochs=10, batch_size=32, 
                                              steps_per_epoch = 1000 ,
    #                                           class_weight=class_weight, 
                                              validation_data=(x_valid_ptb, y_valid_ptb), 
                                          validation_steps = 500,
                                              callbacks=[checkpoint_cb, earlystop_cb])
#     history_conv1d_ptb = model_conv1d_ptb.fit(x_train_ptb[100000:200000], y_train_ptb[100000:200000],
#                                               epochs=1, batch_size=32, 
# #                                           class_weight=class_weight, 
#                                           validation_data=(x_valid_ptb[100000:200000], y_valid_ptb[100000:200000]),  
#                                           callbacks=[checkpoint_cb, earlystop_cb])

In [None]:
# history_conv1d_ptb = model_conv1d_ptb.fit(x_train_ptb, y_train_ptb, epochs=40, batch_size=32, 
# #                                           class_weight=class_weight, 
#                                           validation_data=(x_valid_ptb, y_valid_ptb),  
#                                           callbacks=[checkpoint_cb, earlystop_cb])

In [None]:
data_test

In [None]:
def evaluate_model(x_train_ptb, y_train_ptb, x_valid_ptb, y_valid_ptb,
                   x_test_ptb, y_test_ptb):
    model_conv1d_ptb = history_model(x_train_ptb, y_train_ptb, x_valid_ptb, y_valid_ptb)
    model_conv1d_ptb.evaluate(x_test_ptb,y_test_ptb)

In [None]:
model_conv1d_ptb.load_weights("conv1d_ptb.h5")
model_conv1d_ptb.evaluate(x_test_ptb,y_test_ptb)

In [None]:
# Calculating the predictions based on the highest probability class
conv1d_pred_proba_ptb = model_conv1d_ptb.predict (x_test_ptb)
conv1d_pred_ptb = np.argmax(conv1d_pred_proba_ptb, axis=1)

In [None]:
print(classification_report(out_test_ptb, conv1d_pred_ptb > 0.5, target_names=[PTB_Outcome[i] for i in PTB_Outcome]))

In [None]:
print(roc_auc_score(conv1d_pred_proba_ptb, out_test_ptb))
print(balanced_accuracy_score(conv1d_pred_proba_ptb, out_test_ptb))
print(f1_score(conv1d_pred_proba_ptb, out_test_ptb))

In [None]:
# Plotting the training and validatoin results
plt.figure(figsize=(25,12))
plt.plot(history_conv1d_ptb.epoch, history_conv1d_ptb.history['loss'],
           color='r', label='Train loss')
plt.plot(history_conv1d_ptb.epoch, history_conv1d_ptb.history['val_loss'],
           color='b', label='Val loss' , linestyle="--")
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.plot(history_conv1d_ptb.epoch, history_conv1d_ptb.history['f1_score'],
           color='g', label='Train F1')
plt.plot(history_conv1d_ptb.epoch, history_conv1d_ptb.history['val_f1_score'],
           color='c', label='Val F1' , linestyle="--")
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

## Defining Conv1D Residual model for PTB

Creating a model based on a series of Conv1D layers with 2 residual blocks that are connected to another series of full connected dense layers

In [None]:
def build_conv1d_res_model (input_shape=(x_train_ptb.shape[1],1)):
    model = keras.models.Sequential()
    
    input_ = tf.keras.layers.Input (shape=(input_shape))
    
    conv1_1 = Conv1D(64,7, padding='same', input_shape=input_shape) (input_)
    conv1_1 = BatchNormalization() (conv1_1)
    conv1_1 = tf.keras.layers.ReLU() (conv1_1)

    conv1_2 = Conv1D(64,7, padding='same') (conv1_1)
    conv1_2 = BatchNormalization() (conv1_2)
    conv1_2 = tf.keras.layers.ReLU() (conv1_2)
   
    conv1_3 = Conv1D(64,7, padding='same') (conv1_2)
    conv1_3 = BatchNormalization() (conv1_3)
    conv1_3 = tf.keras.layers.ReLU() (conv1_3)

    concat_1 = Concatenate()([conv1_1 , conv1_3 ])
    max_1 = MaxPool1D(5, padding="same") (concat_1)
    
    conv1_4 = Conv1D(128,7, padding='same') (max_1)
    conv1_4 = BatchNormalization() (conv1_4)
    conv1_4 = tf.keras.layers.ReLU() (conv1_4)

    conv1_5 = Conv1D(128,7, padding='same', input_shape=input_shape) (conv1_4)
    conv1_5 = BatchNormalization() (conv1_5)
    conv1_5 = tf.keras.layers.ReLU() (conv1_5)
    
    conv1_6 = Conv1D(128,7, padding='same', input_shape=input_shape) (conv1_5)
    conv1_6 = BatchNormalization() (conv1_6)
    conv1_6 = tf.keras.layers.ReLU() (conv1_6)

    concat_2 = Concatenate()([conv1_4, conv1_6])
    max_2 = MaxPool1D(5, padding="same") (concat_2)

    flat = Flatten() (max_2)
    dense_1 = Dense(512, activation='relu') (flat)
    drop_1 = Dropout(0.5) (dense_1)
    dense_2 = Dense(256, activation='relu') (drop_1)
    drop_2 = Dropout(0.5) (dense_2)
    dense_3 = Dense(128, activation='relu') (drop_2)
    dense_4 = Dense(64, activation='relu') (dense_3)
    dense_5 = Dense(32, activation='relu') (dense_4)
    dense_6 = Dense(2, activation="softmax") (dense_5)
    
    model = Model (inputs=input_ , outputs=dense_6)
    
    model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=[tfa.metrics.F1Score(2,"micro")])
    return model

In [None]:
checkpoint_cb = ModelCheckpoint("conv1d_res_ptb.h5", save_best_only=True)

earlystop_cb = EarlyStopping(patience=5, restore_best_weights=True)

inp_shape = (x_train_ptb.shape[1], x_train_ptb.shape[2])
model_conv1d_res_ptb= build_conv1d_res_model(input_shape=(x_train_ptb.shape[1], x_train_ptb.shape[2]))
#model_conv1d_res_ptb.build(inp_shape)

In [None]:
history_conv1d_res_ptb = model_conv1d_res_ptb.fit(x_train_ptb, y_train_ptb,
                                              epochs=10, batch_size=32, 
                                              steps_per_epoch = 1000 ,
    #                                           class_weight=class_weight, 
                                              validation_data=(x_valid_ptb, y_valid_ptb), 
                                          validation_steps = 500,
                                              callbacks=[checkpoint_cb, earlystop_cb])

In [None]:
# history_conv1d_res_ptb = model_conv1d_res_ptb.fit(x_train_ptb, y_train_ptb, epochs=40, batch_size=32, 
#                                           class_weight=class_weight, validation_data=(x_valid_ptb, y_valid_ptb),  
#                                           callbacks=[checkpoint_cb, earlystop_cb])

In [None]:
model_conv1d_res_ptb.load_weights("conv1d_res_ptb.h5")
model_conv1d_res_ptb.evaluate(x_test_ptb,y_test_ptb)

In [None]:
# Calculating the predictions based on the highest probability class
conv1d_res_pred_proba_ptb = model_conv1d_res_ptb.predict (x_test_ptb)
conv1d_res_pred_ptb = np.argmax(conv1d_res_pred_proba_ptb, axis=1)

In [None]:
print(classification_report(out_test_ptb, conv1d_res_pred_ptb > 0.5, target_names=[PTB_Outcome[i] for i in PTB_Outcome]))

In [None]:
print(roc_auc_score(conv1d_res_pred_ptb, out_test_ptb))
print(balanced_accuracy_score(conv1d_res_pred_ptb, out_test_ptb))
print(f1_score(conv1d_res_pred_ptb, out_test_ptb))

In [None]:
# Plotting the training and validatoin results
plt.figure(figsize=(25,12))
plt.plot(history_conv1d_res_ptb.epoch, history_conv1d_res_ptb.history['loss'],
           color='r', label='Train loss')
plt.plot(history_conv1d_res_ptb.epoch, history_conv1d_res_ptb.history['val_loss'],
           color='b', label='Val loss' , linestyle="--")
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.plot(history_conv1d_res_ptb.epoch, history_conv1d_res_ptb.history['f1_score'],
           color='g', label='Train F1')
plt.plot(history_conv1d_res_ptb.epoch, history_conv1d_res_ptb.history['val_f1_score'],
           color='c', label='Val F1' , linestyle="--")
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()