# Libraries

In [27]:
# !pip install imbalanced-learn
# !pip install scikit-image
# !pip install statsmodels
# !pip install tensorflow
# !pip install pydot
# !pip install graphviz

In [28]:
# import zipfile
# with zipfile.ZipFile("training.zip", 'r') as zip_ref:
#     zip_ref.extractall("")

In [29]:
from wettbewerb import load_references, save_predictions
from preprocess import *
from sklearn import preprocessing
import numpy as np
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras
import pydot
import graphviz

# Data import

In [30]:
ecg_leads, ecg_labels, fs, ecg_names = load_references(folder='/shared_data/training')

6000	 Dateien wurden geladen.


# Preprocessing

In [31]:
ecg_leads_edited = ecg_leads

### Noise removal (fan_multiscaled_2018)

In [32]:
filtered = []
for j, data in enumerate(ecg_leads_edited):
    data_ftt, freq = ecg_furier(ecg_leads_edited[j], fs)
    lowpassed = ecg_denoise_spectrum(data_ftt, freq, 0, 60)
    filtered.append(ecg_invfurier(lowpassed))

ecg_leads_edited = filtered

### Z-normalization

In [33]:
filtered = []
for j, data in enumerate(ecg_leads_edited):
    filtered.append(ecg_norm(ecg_leads_edited[j]))

ecg_leads_edited = filtered

### Length normalization (hsieh_detection_2020)

In [34]:
def ecg_split(data, limit, label):
    splitted = []
    ratio = len(data)/limit

    # If data is longer than limit, split with at least 50% overlap
    if ratio > 1:
        for i in range(0, int(np.ceil(2*ratio))):
            if i == int(np.ceil(2*ratio))-1:
                string = data[len(data)-limit:len(data)]
            else:
                string = data[int(np.floor(i*limit/2)):int(np.floor(i*limit/2+limit))]
            splitted.append(string)
        splitted = [x for x in splitted if x.shape[0] >= limit] # remove string which are shorter
        labels_multiplied = [label] * len(splitted)

    # If data is shorter than limit, add from the beginning
    elif ratio < 1:
        if ratio <= 0.5:
            data = np.tile(data, int(np.floor(1/ratio)))
        diff = limit - len(data)
        appended_data = data[0:diff]
        data = np.append(data, appended_data)
        splitted.append(data)
        labels_multiplied = [label] * 1

    # If it is the exact length, then don't alter it
    elif ratio == 1:
        splitted.append(data)
        labels_multiplied = [label] * 1
    return splitted, labels_multiplied

In [35]:
normalized = []
normalized_label = []
time_set = 30
duration = time_set*fs
for j, data in enumerate(ecg_leads_edited):
    ecg_split_data, lab_mul = ecg_split(data, duration, ecg_labels[j])
    normalized.extend(ecg_split_data)
    normalized_label.extend(lab_mul)

ecg_leads_edited = normalized
ecg_labels_edited = normalized_label

### Split training, validation, test data

In [36]:
# Validation set
X_train, X_test, y_train, y_test = train_test_split(ecg_leads_edited, ecg_labels_edited, test_size=0.2, shuffle = True, random_state = 8)

In [37]:
X_train = np.asarray(X_train)
y_train = np.asarray(y_train)

### Random oversampling for imbalanced training data

In [38]:
ros = RandomOverSampler(random_state=0, sampling_strategy='minority')
X_resampled, y_resampled = ros.fit_resample(X_train, y_train)

# Converts sublists to arrays
xfiltered = []
yfiltered = []
for j, data in enumerate(X_train):
    xfiltered.append(np.asarray(X_resampled[j]))
    yfiltered.append(np.asarray(y_resampled[j]))
X_train = xfiltered
y_train = yfiltered

### Label encoding (0: A, 1: N, 2: O, 3: ~)

In [39]:
# get indices for each label
# idx_N = [i for i in range(len(ecg_labels_edited)) if ecg_labels_edited[i] == 'N']
# idx_A = [i for i in range(len(ecg_labels_edited)) if ecg_labels_edited[i] == 'A']
# idx_tilde = [i for i in range(len(ecg_labels_edited)) if ecg_labels_edited[i] == '~']
# idx_O = [i for i in range(len(ecg_labels_edited)) if ecg_labels_edited[i] == 'O']

In [40]:
def labelencode(y_data):
    le = preprocessing.LabelEncoder()
    le.fit(y_data)
    encoded = le.transform(y_data)
    encoded = keras.utils.to_categorical(encoded)
    return encoded

def tolist(array):
    liste = []
    for i, data in enumerate(array):
        liste.append(data)
    return liste

In [41]:
# d = []
# for p, data in enumerate(ecg_labels_edited):
#     d.append(
#         {
#             'ecg_data': ecg_names[p],
#             'label': ecg_labels_enc[p]
#         }
#     )
# features_names = pd.DataFrame(d)


In [42]:
# Numerical & one-hot encode
y_train = labelencode(y_train)
y_test = labelencode(y_test)

# Convert to list
y_train = tolist(y_train)
y_test = tolist(y_test)

# Convert to array... x=(samples, time), y=(samples, encodings)
X_train = np.asarray(X_train)
y_train = np.asarray(y_train)
X_test = np.asarray(X_test)
y_test = np.asarray(y_test)

In [43]:
del(xfiltered, yfiltered, X_resampled, y_resampled, ros, ecg_leads_edited, normalized, ecg_labels_edited, normalized_label, ecg_leads, ecg_labels, fs, ecg_names, filtered)

# Classification (CNN)
- Sources:
    - https://stackoverflow.com/questions/55233377/keras-sequential-model-with-multiple-inputs
    - https://pyimagesearch.com/2019/02/04/keras-multiple-inputs-and-mixed-data/
    - https://towardsdatascience.com/building-a-convolutional-neural-network-cnn-in-keras-329fbbadc5f5
    - https://www.tutorialspoint.com/keras/keras_convolution_neural_network.htm

### CNN-defaults

In [44]:
def conv_1d(filters, kernel_size, act='relu'):
    layer = tf.keras.layers.Conv1D(
        filters,
        kernel_size,
        strides=1,
        activation=act,
        use_bias=True,
        kernel_initializer="glorot_uniform",
        bias_initializer="zeros",
        data_format="channels_last",
        padding="valid",
        # dilation_rate=1,
        # groups=1,
        # kernel_regularizer=None,
        # bias_regularizer=None,
        # activity_regularizer=None,
        # kernel_constraint=None,
        # bias_constraint=None
        )
    return layer

def maxpool(stride_num, pool_num=2):
    layer = tf.keras.layers.MaxPooling1D(
        strides=stride_num,
        pool_size=pool_num,
        padding="valid",
        # data_format="channels_last"
        )
    return layer

def fully(uni, act='relu'):
    layer = tf.keras.layers.Dense(
        units=uni,
        activation=act,
        use_bias=True,
        kernel_initializer="glorot_uniform",
        bias_initializer="zeros",
        kernel_regularizer=keras.regularizers.L2(1e-3),
        # bias_regularizer=None,
        # activity_regularizer=None,
        # kernel_constraint=None,
        # bias_constraint=None
        )
    return layer

### CNN (fan_multiscaled_2018)

In [45]:
batch_s = None
epochen = 20
num_classes = 4
X_tr = X_train
y_tr = y_train
X_te = X_test
y_te = y_test

In [64]:
stream = keras.layers.Input(
    shape=(duration,1),
    batch_size=batch_s
)

# First branch
line1 = conv_1d(64,3,act=None)(stream)
line1 = conv_1d(64,3)(line1)
line1 = maxpool(3)(line1)
line1 = conv_1d(128,3)(line1)
line1 = conv_1d(128,3)(line1)
line1 = maxpool(3)(line1)
line1 = conv_1d(256,3)(line1)
line1 = conv_1d(256,3)(line1)
line1 = conv_1d(256,3)(line1)
line1 = maxpool(2)(line1)
line1 = conv_1d(512,3)(line1)
line1 = conv_1d(512,3)(line1)
line1 = conv_1d(512,3)(line1)
line1 = maxpool(2)(line1)
line1 = conv_1d(512,3)(line1)
line1 = conv_1d(512,3)(line1)
line1 = conv_1d(512,3)(line1)
line1_out = maxpool(2)(line1)
line1_mod = keras.Model(inputs=stream, outputs=line1_out)

# Second branch
line2 = conv_1d(64,7,act=None)(stream)
line2 = conv_1d(64,7)(line2)
line2 = maxpool(3)(line2)
line2 = conv_1d(128,7)(line2)
line2 = conv_1d(128,7)(line2)
line2 = maxpool(3)(line2)
line2 = conv_1d(256,3)(line2)
line2 = conv_1d(256,3)(line2)
line2 = conv_1d(256,3)(line2)
line2 = maxpool(2)(line2)
line2 = conv_1d(512,3)(line2)
line2 = conv_1d(512,3)(line2)
line2 = conv_1d(512,3)(line2)
line2 = maxpool(2)(line2)
line2 = conv_1d(512,3)(line2)
line2 = conv_1d(512,3)(line2)
line2 = conv_1d(512,3)(line2)
line2_out = maxpool(2)(line2)
line2_mod = keras.Model(inputs=stream, outputs=line2_out)

# Combination
combined = keras.layers.Concatenate()([line1_mod.output, line2_mod.output])
combined = keras.layers.Flatten()(combined)

# Fully connected layer
line3 = fully(1024, act=None)(combined)
line3 = fully(1024)(line3)
line3 = fully(256)(line3)
outputs = fully(num_classes, act="softmax")(line3)
model = keras.Model(inputs=stream, outputs=outputs)

In [65]:
model.summary()

Model: "model_24"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_10 (InputLayer)          [(None, 9000, 1)]    0           []                               
                                                                                                  
 conv1d_193 (Conv1D)            (None, 8998, 64)     256         ['input_10[0][0]']               
                                                                                                  
 conv1d_206 (Conv1D)            (None, 8994, 64)     512         ['input_10[0][0]']               
                                                                                                  
 conv1d_194 (Conv1D)            (None, 8996, 64)     12352       ['conv1d_193[0][0]']             
                                                                                           

### Compile & train

In [66]:
# Compile
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=1e-2,
    decay_steps=300,
    decay_rate=0.1)
opt = tf.keras.optimizers.SGD(learning_rate=lr_schedule)

model.compile(
    optimizer=opt,
    loss='categorical_crossentropy',
    metrics=['accuracy'],
    # loss_weights=None,
    # weighted_metrics=None,
    # run_eagerly=None,
    # steps_per_execution=None,
    # jit_compile=None
)

In [67]:
# Train
callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath='cnn_julian_{epoch}',
        save_freq='epoch',
        monitor='val_loss',
        mode='auto',
        verbose=0,
        save_best_only=True
    ),
    keras.callbacks.EarlyStopping(
        monitor="loss",
        patience=2,
        mode="auto",
        verbose=1,
        # min_delta=0,
        # baseline=None,
        # restore_best_weights=False,
    )
]

#with tf.device('/CPU:0'):
model.fit(
    x = X_tr,
    y = y_tr,
    batch_size = batch_s, 
    epochs = epochen, 
    verbose = 1,
    callbacks = callbacks,
    validation_data = (X_te, y_te),
    # shuffle=True,
    # class_weight=None,
    # sample_weight=None,
    # initial_epoch=0,
    # steps_per_epoch=None,
    # validation_steps=None,
    # validation_batch_size=None,
    # validation_split=0.0,
    # validation_freq=1,
    # max_queue_size=10,
    # workers=1,
    # use_multiprocessing=False,
)

Epoch 1/20

KeyboardInterrupt: 

### Test & evaluate

In [None]:
with tf.device('/CPU:0'):
    score = model.evaluate(
        x=X_te,
        y=y_te,
        verbose = 'auto',
        # batch_size=None,
        # sample_weight=None,
        # steps=None,
        # callbacks=None,
        # max_queue_size=10,
        # workers=1,
        # use_multiprocessing=False,
        # return_dict=False
    ) 
print('Test loss:', score[0]) 
print('Test accuracy:', score[1])

### Model output

In [None]:
#Save model
Model.save(
    filepath="cnn_julian.tf",
    include_optimizer=True,
    save_format="tf",
    # overwrite=True,
    # signatures=None,
    # options=None,
    # save_traces=True,
)
#loaded_model = tf.keras.models.load_model('/tmp/model')

In [None]:
#Model statistics
model.summary()
print(history.history)
#model.get_weights()

In [None]:
#Plot model
tf.keras.utils.plot_model(
    model,
    to_file="model.png",
    rankdir="TB",
    dpi=80,
    show_shapes=True,
    layer_range=None,
    show_layer_names=False,
    
    # Misc.
    # show_dtype=True,
    # show_layer_activations=True,
    # expand_nested=False,
)

### Predict & run

In [None]:
pred = model.predict(
    x = X_te,
    batch_size=None,
    verbose="auto"
    
    # Misc.
    steps=None,
    callbacks=None,
    max_queue_size=10,
    workers=1,
    use_multiprocessing=False
)
pred = np.argmax(pred, axis = 1)[:5] 
label = np.argmax(y_test,axis = 1)[:5] 

print(pred)
print(label)

## Sonstige
- [ ] Normalisieren:
    - https://keras.io/api/layers/preprocessing_layers/numerical/normalization/
    - https://keras.io/api/layers/normalization_layers/batch_normalization/
- [ ] Rebalance: https://keras.io/examples/structured_data/imbalanced_classification/
- [ ] Schnitttechnik der Daten (nur R-Peaks cutten)
- [ ] Alternative Architekturen (notfalls einfach auch nur Code durchgehen, um zu ergänzen):
    - https://keras.io/examples/timeseries/timeseries_classification_from_scratch/
    - https://keras.io/examples/timeseries/timeseries_classification_transformer/
    - https://keras.io/api/data_loading/timeseries/