# <center>Physionet/Cinc Challenge 2020</center>
## <center>The Physionet/CinC Challenge 2020 is about automatic detection and classification of cardiac abnormalities using 12-Lead ECGs</center>

## In this study I have examined Deep Learning as a method classify 12-lead ECGs. More specifically I have used
* ## 1D Fully Convolutional Network
* ## 1D Residual Networks
* ## 1D Encoders
## I have combined these three CNNs with Dense Neural Networks and some rule-based algorithms such as Pan-Tompkins to optimize the performace of the classification

In [None]:
!pip install ecg-plot
import physionet_challenge_utility_script as pc
import ecg_plot
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow_addons as tfa
import tensorflow as tf
from tensorflow import keras
from keras.utils import plot_model
from keras.preprocessing.sequence import pad_sequences

def plot_ecg(path):
    ecg_data = pc.load_challenge_data(path)
    ecg_plot.plot(ecg_data[0]/1000, sample_rate=500, title='')
    ecg_plot.show()
plot_ecg("/kaggle/input/china-12lead-ecg-challenge-database/Training_2/Q0948.mat")

# Feature Description
## Primary Features
### This dataset contains 43.101 Electrocariographic recordings from different patients. The data is given in the form of native Python waveform-database-format (WFDB)[ref : https://wfdb.readthedocs.io/en/latest/ ]. The dataset contains two file types:


### 1.   Header files (.hea)
### 2.   Signal files (.mat)

### We have 43.101 Signal file with a corresponding header file. Each file are named with a patient number starting with ***A0001*** and goes all the way up to ***A6877***


In [None]:
gender, age, labels, ecg_filenames = pc.import_key_data("/kaggle/input/")
ecg_filenames = np.asarray(ecg_filenames)

## The data set we use for training and validation in this study have different signal lengths
### From the figure under we can se that the signals varies, but the most of the signals are around 5000 samples 

In [None]:
pc.get_signal_lengths("/kaggle/input/", "Distribution of signal lengths of the ECGs")

### Clean age and gender data

In [None]:
age, gender = pc.import_gender_and_age(age, gender)

### Import sheet to decode SNOMED-CT codes

In [None]:
SNOMED_scored=pd.read_csv("/kaggle/input/physionet-snomed-mappings/SNOMED_mappings_scored.csv", sep=";")
SNOMED_unscored=pd.read_csv("/kaggle/input/physionet-snomed-mappings/SNOMED_mappings_unscored.csv", sep=";")
df_labels = pc.make_undefined_class(labels,SNOMED_unscored)

### Get OneHot encoded labels

In [None]:
y , snomed_classes = pc.onehot_encode(df_labels)

### In the figure under we can see the same SNOMED CT codes decoded into human readable diagnoses on the X-axis. On the Y-axis we have the number of the given diagnoses in the dataset

In [None]:
pc.plot_classes(snomed_classes, SNOMED_scored,y)

###  Transform our One Hot encoded multilabel output to a new type of label where all can be represented by a number:

In [None]:
y_all_comb = pc.get_labels_for_all_combinations(y)
print("Total number of unique combinations of diagnosis: {}".format(len(np.unique(y_all_comb))))

### K-fold, 10 splits, Shuffle=True and random_seed = 42. The distribution of Training and Val data in each fold is now:

In [None]:
folds = pc.split_data(labels, y_all_comb)

In [None]:
pc.plot_all_folds(folds,y,snomed_classes)

## Make validation data (for hold out validation)

## Make Batch generators

In [None]:
order_array = folds[0][0]

In [None]:
def shuffle_batch_generator_demo(batch_size, gen_x,gen_y, gen_z): 
    np.random.shuffle(order_array)
    batch_features = np.zeros((batch_size,5000, 12))
    batch_labels = np.zeros((batch_size,snomed_classes.shape[0])) #drop undef class
    batch_demo_data = np.zeros((batch_size,2))
    while True:
        for i in range(batch_size):

            batch_features[i] = next(gen_x)
            batch_labels[i] = next(gen_y)
            batch_demo_data[i] = next(gen_z)

        X_combined = [batch_features, batch_demo_data]
        yield X_combined, batch_labels
        
def shuffle_batch_generator(batch_size, gen_x,gen_y): 
    np.random.shuffle(order_array)
    batch_features = np.zeros((batch_size,5000, 12))
    batch_labels = np.zeros((batch_size,snomed_classes.shape[0])) #drop undef class
    while True:
        for i in range(batch_size):

            batch_features[i] = next(gen_x)
            batch_labels[i] = next(gen_y)
            
        yield batch_features, batch_labels

def generate_y_shuffle(y_train):
    while True:
        for i in order_array:
            y_shuffled = y_train[i]
            yield y_shuffled


def generate_X_shuffle(X_train):
    while True:
        for i in order_array:
                #if filepath.endswith(".mat"):
                    data, header_data = pc.load_challenge_data(X_train[i])
                    X_train_new = pad_sequences(data, maxlen=5000, truncating='post',padding="post")
                    X_train_new = X_train_new.reshape(5000,12)
                    yield X_train_new

def generate_z_shuffle(age_train, gender_train):
    while True:
        for i in order_array:
            gen_age = age_train[i]
            gen_gender = gender_train[i]
            z_train = [gen_age , gen_gender]
            yield z_train

In [None]:
new_weights=pc.calculating_class_weights(y)

In [None]:
keys = np.arange(0,27,1)
weight_dictionary = dict(zip(keys, new_weights.T[1]))
weight_dictionary

In [None]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_AUC', factor=0.1, patience=1, verbose=1, mode='max',
    min_delta=0.0001, cooldown=0, min_lr=0
)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_AUC', mode='max', verbose=1, patience=2)

## Residual Network

In [None]:
model = pc.residual_network_1d()

load a pre-trained model

In [None]:
model.load_weights("/kaggle/input/physionet-challenge-models/resnet_model.h5")

or train it your self by uncomment the code below

In [None]:
#batchsize = 30

#model.fit(x=shuffle_batch_generator(batch_size=batchsize, gen_x=generate_X_shuffle(ecg_filenames), gen_y=generate_y_shuffle(y)), epochs=100, steps_per_epoch=(len(order_array)/batchsize), validation_data=pc.generate_validation_data(ecg_filenames,y,folds[0][1]), validation_freq=1, class_weight=weight_dictionary, callbacks=[reduce_lr,early_stop])

In [None]:
y_pred = model.predict(x=pc.generate_validation_data(ecg_filenames,y,folds[0][1])[0])

In [None]:
init_thresholds = np.arange(0,1,0.05)

In [None]:
all_scores = pc.iterate_threshold(y_pred, ecg_filenames, y ,folds[0][1] )

In [None]:
from scipy import optimize
def thr_chall_metrics(thr, label, output_prob):
    return -pc.compute_challenge_metric_for_opt(label, np.array(output_prob>thr))

In [None]:
new_best_thr = optimize.fmin(thr_chall_metrics, args=(pc.generate_validation_data(ecg_filenames,y,folds[0][1])[1],y_pred), x0=init_thresholds[all_scores.argmax()]*np.ones(27))

In [None]:
print(pc.compute_challenge_metric_for_opt(pc.generate_validation_data(ecg_filenames,y,folds[0][1])[1],(y_pred>new_best_thr)*1))

Make conf.matrix

In [None]:
pc.plot_normalied_conf_matrix(y_pred, ecg_filenames, y, folds[0][1], new_best_thr, snomed_classes)
plt.savefig("confusion_matrix_resnet.png")

## Encoder Network

In [None]:
model = pc.encoder_model()

In [None]:
model.load_weights("/kaggle/input/physionet-challenge-models/encoder_model.h5")

In [None]:
#batchsize = 30

#model.fit(x=shuffle_batch_generator(batch_size=batchsize, gen_x=generate_X_shuffle(ecg_filenames), gen_y=generate_y_shuffle(y)), epochs=50, steps_per_epoch=(len(order_array)/batchsize), validation_data=pc.generate_validation_data(ecg_filenames,y,folds[0][1]), validation_freq=1, class_weight=weight_dictionary, callbacks=[reduce_lr,early_stop])

In [None]:
y_pred = model.predict(x=pc.generate_validation_data(ecg_filenames,y,folds[0][1])[0])

In [None]:
init_thresholds = np.arange(0,1,0.05)

In [None]:
all_scores = pc.iterate_threshold(y_pred, ecg_filenames, y ,folds[0][1])

In [None]:
from scipy import optimize
def thr_chall_metrics(thr, label, output_prob):
    return -pc.compute_challenge_metric_for_opt(label, np.array(output_prob>thr))

In [None]:
new_best_thr = optimize.fmin(thr_chall_metrics, args=(pc.generate_validation_data(ecg_filenames,y,folds[0][1])[1],y_pred), x0=init_thresholds[all_scores.argmax()]*np.ones(27))#

In [None]:
print(pc.compute_challenge_metric_for_opt(pc.generate_validation_data(ecg_filenames,y,folds[0][1])[1],(y_pred>new_best_thr)*1))

Make conf.matrix

In [None]:
pc.plot_normalied_conf_matrix(y_pred, ecg_filenames, y, folds[0][1], new_best_thr, snomed_classes)
plt.savefig("confusion_matrix_encoder.png")

## Fully Convolutional Network

In [None]:
model = pc.FCN()

In [None]:
model.load_weights("/kaggle/input/physionet-challenge-models/fcn_model.h5")

In [None]:
#batchsize = 30

#model.fit(x=shuffle_batch_generator(batch_size=batchsize, gen_x=generate_X_shuffle(ecg_filenames), gen_y=generate_y_shuffle(y)), epochs=30, steps_per_epoch=(len(order_array)/batchsize), validation_data=pc.generate_validation_data(ecg_filenames,y,folds[0][1]), validation_freq=1, class_weight=weight_dictionary, callbacks=[reduce_lr,early_stop])

In [None]:
y_pred = model.predict(x=pc.generate_validation_data(ecg_filenames,y,folds[0][1])[0])

In [None]:
init_thresholds = np.arange(0,1,0.05)

In [None]:
all_scores = pc.iterate_threshold(y_pred, ecg_filenames, y ,folds[0][1])

In [None]:
from scipy import optimize
def thr_chall_metrics(thr, label, output_prob):
    return -pc.compute_challenge_metric_for_opt(label, np.array(output_prob>thr))

In [None]:
new_best_thr = optimize.fmin(thr_chall_metrics, args=(pc.generate_validation_data(ecg_filenames,y,folds[0][1])[1],y_pred), x0=init_thresholds[all_scores.argmax()]*np.ones(27))#

In [None]:
print(pc.compute_challenge_metric_for_opt(pc.generate_validation_data(ecg_filenames,y,folds[0][1])[1],(y_pred>new_best_thr)*1))

Make conf.matrix

In [None]:
pc.plot_normalied_conf_matrix(y_pred, ecg_filenames, y, folds[0][1], new_best_thr, snomed_classes)
plt.savefig("confusion_matrix_fcn.png")

## ResNet + Gender and Age

In [None]:
model = pc.residual_network_1d_demo()

In [None]:
model.load_weights("/kaggle/input/physionet-challenge-models/resnet_gender_age_model.h5")

In [None]:
#batchsize = 30

#history = model.fit(x=shuffle_batch_generator_demo(batch_size=batchsize, gen_x=generate_X_shuffle(ecg_filenames), gen_y=generate_y_shuffle(y), gen_z=generate_z_shuffle(age, gender)), epochs=50, steps_per_epoch=(len(order_array)/batchsize), validation_data=pc.generate_validation_data_with_demo_data(ecg_filenames,y, gender, age, folds[0][1]), validation_freq=1, class_weight=weight_dictionary, callbacks=[reduce_lr,early_stop])

In [None]:
y_pred = model.predict(x=pc.generate_validation_data_with_demo_data(ecg_filenames,y, gender, age,folds[0][1])[0])

In [None]:
init_thresholds = np.arange(0,1,0.05)

In [None]:
all_scores = pc.iterate_threshold(y_pred, ecg_filenames, y ,folds[0][1] )

In [None]:
from scipy import optimize
def thr_chall_metrics(thr, label, output_prob):
    return -pc.compute_challenge_metric_for_opt(label, np.array(output_prob>thr))

In [None]:
new_best_thr = optimize.fmin(thr_chall_metrics, args=(pc.generate_validation_data_with_demo_data(ecg_filenames,y, gender, age,folds[0][1])[1],y_pred), x0=init_thresholds[all_scores.argmax()]*np.ones(27))

In [None]:
print(pc.compute_challenge_metric_for_opt(pc.generate_validation_data_with_demo_data(ecg_filenames,y, gender, age,folds[0][1])[1],(y_pred>new_best_thr)*1))

Conf.matrix

In [None]:
pc.plot_normalied_conf_matrix(y_pred, ecg_filenames, y, folds[0][1], new_best_thr, snomed_classes)
plt.savefig("confusion_matrix_resnet_age_gender.png")

## Encoder + Gender and Age

In [None]:
model = pc.encoder_model_demo()

In [None]:
model.load_weights("/kaggle/input/physionet-challenge-models/encoder_gender_age_model.h5")

In [None]:
#batchsize = 30

#history = model.fit(x=shuffle_batch_generator_demo(batch_size=batchsize, gen_x=generate_X_shuffle(ecg_filenames), gen_y=generate_y_shuffle(y), gen_z=generate_z_shuffle(age, gender)), epochs=50, steps_per_epoch=(len(order_array)/batchsize), validation_data=pc.generate_validation_data_with_demo_data(ecg_filenames,y, gender, age, folds[0][1]), validation_freq=1, class_weight=weight_dictionary, callbacks=[reduce_lr,early_stop])

In [None]:
y_pred = model.predict(x=pc.generate_validation_data_with_demo_data(ecg_filenames,y, gender, age,folds[0][1])[0])

In [None]:
init_thresholds = np.arange(0,1,0.05)

In [None]:
all_scores = pc.iterate_threshold(y_pred, ecg_filenames, y ,folds[0][1] )

In [None]:
from scipy import optimize
def thr_chall_metrics(thr, label, output_prob):
    return -pc.compute_challenge_metric_for_opt(label, np.array(output_prob>thr))

In [None]:
new_best_thr = optimize.fmin(thr_chall_metrics, args=(pc.generate_validation_data_with_demo_data(ecg_filenames,y, gender, age,folds[0][1])[1],y_pred), x0=init_thresholds[all_scores.argmax()]*np.ones(27))

In [None]:
print(pc.compute_challenge_metric_for_opt(pc.generate_validation_data_with_demo_data(ecg_filenames,y, gender, age,folds[0][1])[1],(y_pred>new_best_thr)*1))

Conf.matrix

In [None]:
pc.plot_normalied_conf_matrix(y_pred, ecg_filenames, y, folds[0][1], new_best_thr, snomed_classes)
plt.savefig("confusion_matrix_encoder_age_gender.png")

## FCN + Gender and Age

In [None]:
model = pc.FCN_demo()

In [None]:
model.load_weights("/kaggle/input/physionet-challenge-models/fcn_gender_age_model.h5")

In [None]:
#batchsize = 30

#history = model.fit(x=shuffle_batch_generator_demo(batch_size=batchsize, gen_x=generate_X_shuffle(ecg_filenames), gen_y=generate_y_shuffle(y), gen_z=generate_z_shuffle(age, gender)), epochs=50, steps_per_epoch=(len(order_array)/batchsize), validation_data=pc.generate_validation_data_with_demo_data(ecg_filenames,y, gender, age, folds[0][1]), validation_freq=1, class_weight=weight_dictionary, callbacks=[reduce_lr,early_stop])

In [None]:
y_pred = model.predict(x=pc.generate_validation_data_with_demo_data(ecg_filenames,y, gender, age,folds[0][1])[0])

In [None]:
init_thresholds = np.arange(0,1,0.05)

In [None]:
all_scores = pc.iterate_threshold(y_pred, ecg_filenames, y ,folds[0][1] )

In [None]:
from scipy import optimize
def thr_chall_metrics(thr, label, output_prob):
    return -pc.compute_challenge_metric_for_opt(label, np.array(output_prob>thr))

In [None]:
new_best_thr = optimize.fmin(thr_chall_metrics, args=(pc.generate_validation_data_with_demo_data(ecg_filenames,y, gender, age,folds[0][1])[1],y_pred), x0=init_thresholds[all_scores.argmax()]*np.ones(27))

In [None]:
print(pc.compute_challenge_metric_for_opt(pc.generate_validation_data_with_demo_data(ecg_filenames,y, gender, age,folds[0][1])[1],(y_pred>new_best_thr)*1))

Conf matrix

In [None]:
pc.plot_normalied_conf_matrix(y_pred, ecg_filenames, y, folds[0][1], new_best_thr, snomed_classes)
plt.savefig("confusion_matrix_fcn_gender_age.png")

## FCN and Encoder

In [None]:
model = pc.FCN_Encoder()

In [None]:
model.load_weights("/kaggle/input/physionet-challenge-models/fcn_and_encoder_model.h5")

In [None]:
#batchsize = 30

#model.fit(x=shuffle_batch_generator(batch_size=batchsize, gen_x=generate_X_shuffle(ecg_filenames), gen_y=generate_y_shuffle(y)), epochs=5, steps_per_epoch=(len(order_array)/batchsize), validation_data=pc.generate_validation_data(ecg_filenames,y,folds[0][1]), validation_freq=1, class_weight=weight_dictionary, callbacks=[reduce_lr,early_stop])

In [None]:
y_pred = model.predict(x=pc.generate_validation_data_with_demo_data(ecg_filenames,y, gender, age,folds[0][1])[0])

In [None]:
init_thresholds = np.arange(0,1,0.05)

In [None]:
all_scores = pc.iterate_threshold(y_pred, ecg_filenames, y ,folds[0][1] )

In [None]:
from scipy import optimize
def thr_chall_metrics(thr, label, output_prob):
    return -pc.compute_challenge_metric_for_opt(label, np.array(output_prob>thr))

In [None]:
new_best_thr = optimize.fmin(thr_chall_metrics, args=(pc.generate_validation_data_with_demo_data(ecg_filenames,y, gender, age,folds[0][1])[1],y_pred), x0=init_thresholds[all_scores.argmax()]*np.ones(27))

In [None]:
print(pc.compute_challenge_metric_for_opt(pc.generate_validation_data_with_demo_data(ecg_filenames,y, gender, age,folds[0][1])[1],(y_pred>new_best_thr)*1))

Conf.matrix

In [None]:
pc.plot_normalied_conf_matrix(y_pred, ecg_filenames, y, folds[0][1], new_best_thr, snomed_classes)
plt.savefig("confusion_matrix_fcn_and_encoder.png")

## FCN and Encoder + Rule-based model

In [None]:
binary_prediction = y_pred > new_best_thr
binary_prediction = binary_prediction * 1

In [None]:
rb_pred = pc.rule_based_predictions(ecg_filenames,folds[0][1],binary_prediction)

In [None]:
pc.plot_normalied_conf_matrix_rule(y,folds[0][1], binary_prediction, snomed_classes)

In [None]:
print(pc.compute_challenge_metric_for_opt(pc.generate_validation_data_with_demo_data(ecg_filenames,y, gender, age,folds[0][1])[1],rb_pred))

## FCN and Encoder + Gender and Age

In [None]:
model = pc.FCN_Encoder_demo()

In [None]:
model.load_weights("/kaggle/input/physionet-challenge-models/fcn_encoder_and_gender_age_model.h5")

In [None]:
#batchsize = 30

#history = model.fit(x=shuffle_batch_generator_demo(batch_size=batchsize, gen_x=generate_X_shuffle(ecg_filenames), gen_y=generate_y_shuffle(y), gen_z=generate_z_shuffle(age, gender)), epochs=30, steps_per_epoch=(len(order_array)/batchsize), validation_data=pc.generate_validation_data_with_demo_data(ecg_filenames,y, gender, age, folds[0][1]), validation_freq=1, class_weight=weight_dictionary, callbacks=[reduce_lr,early_stop])

In [None]:
y_pred = model.predict(x=pc.generate_validation_data_with_demo_data(ecg_filenames,y, gender, age,folds[0][1])[0])

In [None]:
init_thresholds = np.arange(0,1,0.05)

In [None]:
all_scores = pc.iterate_threshold(y_pred, ecg_filenames, y ,folds[0][1] )

In [None]:
from scipy import optimize
def thr_chall_metrics(thr, label, output_prob):
    return -pc.compute_challenge_metric_for_opt(label, np.array(output_prob>thr))

In [None]:
new_best_thr = optimize.fmin(thr_chall_metrics, args=(pc.generate_validation_data_with_demo_data(ecg_filenames,y, gender, age,folds[0][1])[1],y_pred), x0=init_thresholds[all_scores.argmax()]*np.ones(27))

In [None]:
print(pc.compute_challenge_metric_for_opt(pc.generate_validation_data_with_demo_data(ecg_filenames,y, gender, age,folds[0][1])[1],(y_pred>new_best_thr)*1))

Conf.matrix

In [None]:
pc.plot_normalied_conf_matrix(y_pred, ecg_filenames, y, folds[0][1], new_best_thr, snomed_classes)
plt.savefig("confusion_matrix_fcn_and_encoder_an_demo.png")

## FCN and Encoder + Gender and Age + Rule-based

In [None]:
binary_prediction = y_pred > new_best_thr
binary_prediction = binary_prediction * 1

In [None]:
rb_pred = pc.rule_based_predictions(ecg_filenames,folds[0][1],binary_prediction)

In [None]:
pc.plot_normalied_conf_matrix_rule(y,folds[0][1], binary_prediction, snomed_classes)

In [None]:
print(pc.compute_challenge_metric_for_opt(pc.generate_validation_data_with_demo_data(ecg_filenames,y, gender, age,folds[0][1])[1],rb_pred))