#### Train a Convolutional Neural Network model for predicting cardiac arrhythmias.

#### Import necessary libraries.

In [1]:

import gc
import time
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import scipy.signal as signal
import seaborn as sns
from tensorflow import keras
from keras.callbacks import EarlyStopping
from keras.models import load_model
from keras.models import Sequential
from keras.layers import BatchNormalization
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Conv1D
from keras.layers import MaxPooling1D
from keras.optimizers import Adam
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler


#### Define a function to load the dataset.

In [2]:

def load_dataset(lead_name):
    # Loads the dataset using only the chosen lead.
    # Parameters:
    #    lead_name: Lead to be used.
    # Return:
    #    Dataframe loaded (Pandas dataframe)
    df = None
    column_names = ["idx", "ecg_id", lead_name, "arrhythmia_code"]
    dtypes = {"ecg_id": "str", lead_name : "float16", "arrhythmia_code" : "int16"}
    try:
        print("\nStart loading CSV file...")
        df = pd.read_csv("../dataset/csv_files/ecg_sph_dataset.csv", sep="|", dtype = dtypes, usecols = column_names)
        print("Finish loading CSV file.")
    except Exception as e:
        print("\nFail to load CSV file.")
        print("Error: {}".format(e))
    return df


#### Build a helper function to convert the records to the required format to perform a time series processing.

In [3]:

number_of_steps = 1250
number_of_features = 1
number_of_classes = 32

def get_new_columns_order(column_names_array):
    columns_series = pd.Series(column_names_array)
    column_idx_count = 0
    new_array = np.zeros(len(column_names_array), dtype = int)
    for column_idx in range(0, (number_of_steps * 4)):
        for column_idx_2 in range(0, number_of_features):
            new_array[column_idx + column_idx_2 * (number_of_steps * 4)] = column_idx_count
            column_idx_count += 1
    return new_array

def build_time_window_structure(df, lead_name):
    # Splits the dataset into "time windows" to be used as a time series.
    # The function groups each 125 dataset records (CSV lines) into one record.
    # Parameters:
    #    df: Dataframe to be splitted.
    #    lead_name: Lead to be used.
    # Return:
    #    All time windows (np.array)
    #    All target values (np.array)
    print("\nStarting build_time_window_structure function...")
    df["idx"] = df["idx"] % (number_of_steps * 4)
    df_aux = df.pivot_table(index = "ecg_id", columns = "idx", values = [lead_name], aggfunc = "sum")
    new_columns = get_new_columns_order(df_aux.columns.values)
    df_aux.columns = list(new_columns)
    sorted_columns = sorted(df_aux.columns)
    df_modified = df_aux[sorted_columns]
    X_array = df_modified.values
    y_array = df["arrhythmia_code"].values
    y_array = y_array[::(number_of_steps * 4)]
    # Resample sample frequency to 125 hz.
    fs_original = 500 # Original frequency (Hz)
    fs_new = 125 # New frequency (Hz)
    downsampling_factor = int(fs_original / fs_new)
    nyquist_rate = fs_original / 2.0  # Nyquist rate
    cutoff_freq = fs_new / 2.0  # Cut off rate
    b, a = signal.butter(4, cutoff_freq / nyquist_rate, btype = "low")
    X_array_filtered = signal.filtfilt(b, a, X_array, axis = 1)
    X_array_125hz = X_array_filtered[:, ::downsampling_factor]
    print("\nShape of features: ", X_array_125hz.shape)
    print("Quantity os samples (labels): ", len(y_array))
    print("\nFinishing build_time_window_structure function.")
    return X_array_125hz, y_array


#### Define a function to remove classes with less than 6 samples.

In [4]:

def remove_classes_with_less_samples(X_array, y_array):
    # Remove classes with less than 6 samples.
    # Parameters:
    #    X_array: array of features.
    #    y_array: array of targets.
    # Return:
    #    Array of features (np.array)
    #    Array of targets (np.array)

    # Remove samples belonging to diagnostics 31, 37, 84, 87, 102, 143, 148, and 152 because these classes have less than 6 samples (SMOTE restriction).
    print("\nRemove classes with less than 6 samples.")
    removed_idx = np.where(np.isin(y_array, [31, 37, 84, 87, 102, 143, 148, 152]))[0]
    X_array = np.delete(X_array, removed_idx, axis = 0)
    y_array = np.delete(y_array, removed_idx, axis = 0)
    number_of_classes = 32
    # Generate a class number for each diagnostic code and replace y_array values.
    sorted_codes = sorted(set(y_array))
    dict_aux = {}
    for classes_idx in range(0, number_of_classes):
        dict_aux[classes_idx] = sorted_codes[classes_idx]
        y_array = [classes_idx if elem == sorted_codes[classes_idx] else elem for elem in y_array]
    y_array = np.array(y_array)
    print("\nShow classes identification:")
    for key, value in dict_aux.items():
        print(f"Class: {key} - Arrhythmia code: {value}")
    # Check for dataset balance.
    diagnostic_classes, count = np.unique(y_array, return_counts = True)
    percentage_by_class = [(i * 100 / np.sum(count)) for i in count]
    category_count = list(zip(diagnostic_classes, count, percentage_by_class))
    category_count.sort(key = lambda x: x[1], reverse = True)
    print("\nCheck for dataset balance:")
    for diagnostic_classes, count, percentage_by_class in category_count:
        print(f"Class = {diagnostic_classes:3.0f}   Qty = {count:8.0f}   Percentage = {percentage_by_class:2.2f} %")
    return X_array, y_array


#### Define a function for training a CNN model.

In [5]:

def train_cnn_model(cnn_model, X_train, y_train, X_test, y_test, num_epochs, batch_size, validation_split, model_cfg_file):
    # Train a CNN model.
    # Parameters:
    #    cnn_model (Sequential): model to be trained.
    #    X_train (np.array): array of features values.
    #    X_test (np.array): array of features values.
    #    y_train (np.array): array of target values.
    #    y_test (np.array): array of target values.
    #    nun_folds (int): number of folds.
    #    num_epochs (int): number of epochs of training.
    #    batch_size (int): batch size.
    #    validation_split (float): percentage of instances for validation set.
    #    model_cfg_file (str): file to save the configuration model.
    # Returns:
    #    history (History object): history of training metrics.

    # Train the CNN model and evaluate it.
    start_time = time.time()
    print("\nStarting training at: ", time.strftime("%H:%M:%S", time.localtime()))
    es = EarlyStopping(monitor = "val_loss", mode = "min", verbose = 1, patience = 30)
    with tf.device('/cpu:0'):
        history = cnn_model.fit(X_train, y_train, validation_split = validation_split, epochs = num_epochs, batch_size = batch_size, 
                                verbose = 1, callbacks = [es])
        cnn_model.save("../modelconfig/" + model_cfg_file)
        _, train_accuracy = cnn_model.evaluate(X_train, y_train, verbose = 0)
        _, test_accuracy = cnn_model.evaluate(X_test, y_test, verbose = 0)
        elapsed_seconds = time.time() - start_time
        print("\nTime taken for training: ", time.strftime("%H:%M:%S", time.gmtime(elapsed_seconds)))
        print("\nTrain Accuracy: {:.2f} %".format(train_accuracy * 100))
        print("Test Accuracy: {:.2f} %".format(test_accuracy * 100))
        print("\nEvaluate other metrics:")
        pred_classes = np.argmax(cnn_model.predict(X_test), axis = 1)
        truth_classes = y_test
        print(classification_report(truth_classes, pred_classes, zero_division = 0))
    return history


#### Define a function to build a version 1 of CNN model.

In [6]:

def create_v1():
    act_fuction = "relu"
    k_init = "he_uniform"
    model = Sequential()
    model.add(Conv1D(filters = 8, kernel_size = 3, activation = act_fuction, kernel_initializer = k_init, 
                     input_shape = (number_of_steps, number_of_features)))
    model.add(BatchNormalization())
    model.add(Conv1D(filters = 8, kernel_size = 3, activation = act_fuction, kernel_initializer = k_init))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size = 2))
    model.add(Conv1D(filters = 16, kernel_size = 5, activation = act_fuction, kernel_initializer = k_init))
    model.add(BatchNormalization())
    model.add(Conv1D(filters = 16, kernel_size = 5, activation = act_fuction, kernel_initializer = k_init))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size = 2))
    model.add(Flatten())
    model.add(Dense(512, activation = act_fuction, kernel_initializer = k_init))
    model.add(BatchNormalization())
    model.add(Dense(number_of_classes, activation = 'softmax'))
    opt = Adam(learning_rate = 0.001)
    model.summary()
    model.compile(loss = "sparse_categorical_crossentropy", optimizer = opt, metrics = ["accuracy"])
    return model


#### Define a function to use SMOTE technique to generate upsampling.

In [7]:

def apply_smote(X_array, y_array):
    # Apply the SMOTE technique to generate upsampling.
    # Parameters:
    #    X_array: array of features.
    #    y_array: array of targets.
    # Return:
    #    Array of features (np.array)
    #    Array of targets (np.array)
    diagnostic_classes, count = np.unique(y_array, return_counts = True)
    percentage_by_class = [(i * 100 / np.sum(count)) for i in count]
    category_count = list(zip(diagnostic_classes, count, percentage_by_class))
    category_count.sort(key = lambda x: x[1], reverse = True)
    print("\nGenerating upsampling through SMOTE...")
    smote = SMOTE(sampling_strategy = "auto", k_neighbors = 5, random_state = 42)
    X_array_res, y_array_res = smote.fit_resample(X_array, y_array)
    dict_samples_per_class = {}
    nr_samples_per_class = 6250
    for category_ids, count, percentage_of_categories in category_count:
        dict_samples_per_class[category_ids] = nr_samples_per_class
    rus = RandomUnderSampler(sampling_strategy = dict_samples_per_class, random_state = 42)
    X_array_res, y_array_res = rus.fit_resample(X_array_res, y_array_res)
    print("{} samples after upsampling.".format(len(y_array_res)))
    print("Finishing upsampling.")
    return X_array_res, y_array_res


#### Define a function to split the dataset for training.

In [8]:

def split_dataset_for_training(X_array, y_array):
    # Split dataset for training
    # Parameters:
    #    X_array: array of features.
    #    y_array: array of targets.
    # Return:
    #    X_train (np.array)
    #    X_test (np.array)
    #    y_train (np.array)
    #    y_test (np.array)

    # Scale features using statistics that are robust to outliers.
    print("\nStarting dataset split...")
    rb_scaler = RobustScaler()
    rb_scaler.fit(X_array)
    X_array_samples = rb_scaler.transform(X_array)
    # Reshape the structure data to be compatible with the pattern [samples, timesteps, features].
    X_array_samples = X_array_samples.reshape((X_array_samples.shape[0], number_of_steps, number_of_features))
    # Split train and test sets.
    X_train, X_test, y_train, y_test = train_test_split(X_array_samples, y_array, test_size = 0.2, stratify = y_array, random_state = 42)
    print("Finishing dataset split.\n")
    return X_train, X_test, y_train, y_test


#### Lead I

In [9]:

# Build a "time window" structure to handle the dataset as a time series.
new_df = load_dataset("lead1")
X_array, y_array = build_time_window_structure(new_df, "lead1")

# Try to release memory
del new_df
gc.collect()

X_array, y_array = remove_classes_with_less_samples(X_array, y_array)
X_array, y_array = apply_smote(X_array, y_array)
X_train, X_test, y_train, y_test = split_dataset_for_training(X_array, y_array)

# Train the CNN model.
v1_model = create_v1()
v1_num_epochs = 300
v1_batch_size = 32
v1_validation_split = 0.1

training_history_v1 = train_cnn_model(v1_model, X_train, y_train, X_test, y_test, v1_num_epochs, v1_batch_size, v1_validation_split, "v1_model_lead1.h5")



Start loading CSV file...

Finish loading CSV file.

Starting build_time_window_structure function...

Shape of features:  (25770, 1250)
Quantity os samples (labels):  25770

Finishing build_time_window_structure function.

Remove classes with less than 6 samples.

Show classes identification:
Class: 0 - Arrhythmia code: 1
Class: 1 - Arrhythmia code: 21
Class: 2 - Arrhythmia code: 22
Class: 3 - Arrhythmia code: 23
Class: 4 - Arrhythmia code: 30
Class: 5 - Arrhythmia code: 36
Class: 6 - Arrhythmia code: 50
Class: 7 - Arrhythmia code: 51
Class: 8 - Arrhythmia code: 54
Class: 9 - Arrhythmia code: 60
Class: 10 - Arrhythmia code: 80
Class: 11 - Arrhythmia code: 82
Class: 12 - Arrhythmia code: 83
Class: 13 - Arrhythmia code: 88
Class: 14 - Arrhythmia code: 101
Class: 15 - Arrhythmia code: 104
Class: 16 - Arrhythmia code: 105
Class: 17 - Arrhythmia code: 106
Class: 18 - Arrhythmia code: 108
Class: 19 - Arrhythmia code: 120
Class: 20 - Arrhythmia code: 121
Class: 21 - Arrhythmia code: 125
Cla

#### Lead II

In [9]:

# Build a "time window" structure to handle the dataset as a time series.
new_df = load_dataset("lead2")
X_array, y_array = build_time_window_structure(new_df, "lead2")

# Try to release memory
del new_df
gc.collect()

X_array, y_array = remove_classes_with_less_samples(X_array, y_array)
X_array, y_array = apply_smote(X_array, y_array)
X_train, X_test, y_train, y_test = split_dataset_for_training(X_array, y_array)

# Train the CNN model.
v1_model = create_v1()
v1_num_epochs = 300
v1_batch_size = 32
v1_validation_split = 0.1

training_history_v1 = train_cnn_model(v1_model, X_train, y_train, X_test, y_test, v1_num_epochs, v1_batch_size, v1_validation_split, "v1_model_lead2.h5")



Start loading CSV file...
Finish loading CSV file.

Starting build_time_window_structure function...

Shape of features:  (25770, 1250)
Quantity os samples (labels):  25770

Finishing build_time_window_structure function.

Remove classes with less than 6 samples.

Show classes identification:
Class: 0 - Arrhythmia code: 1
Class: 1 - Arrhythmia code: 21
Class: 2 - Arrhythmia code: 22
Class: 3 - Arrhythmia code: 23
Class: 4 - Arrhythmia code: 30
Class: 5 - Arrhythmia code: 36
Class: 6 - Arrhythmia code: 50
Class: 7 - Arrhythmia code: 51
Class: 8 - Arrhythmia code: 54
Class: 9 - Arrhythmia code: 60
Class: 10 - Arrhythmia code: 80
Class: 11 - Arrhythmia code: 82
Class: 12 - Arrhythmia code: 83
Class: 13 - Arrhythmia code: 88
Class: 14 - Arrhythmia code: 101
Class: 15 - Arrhythmia code: 104
Class: 16 - Arrhythmia code: 105
Class: 17 - Arrhythmia code: 106
Class: 18 - Arrhythmia code: 108
Class: 19 - Arrhythmia code: 120
Class: 20 - Arrhythmia code: 121
Class: 21 - Arrhythmia code: 125
Clas

#### Lead III

In [10]:

# Build a "time window" structure to handle the dataset as a time series.
new_df = load_dataset("lead3")
X_array, y_array = build_time_window_structure(new_df, "lead3")

# Try to release memory
del new_df
gc.collect()

X_array, y_array = remove_classes_with_less_samples(X_array, y_array)
X_array, y_array = apply_smote(X_array, y_array)
X_train, X_test, y_train, y_test = split_dataset_for_training(X_array, y_array)

# Train the CNN model.
v1_model = create_v1()
v1_num_epochs = 300
v1_batch_size = 32
v1_validation_split = 0.1

training_history_v1 = train_cnn_model(v1_model, X_train, y_train, X_test, y_test, v1_num_epochs, v1_batch_size, v1_validation_split, "v1_model_lead3.h5")



Start loading CSV file...
Finish loading CSV file.

Starting build_time_window_structure function...

Shape of features:  (25770, 1250)
Quantity os samples (labels):  25770

Finishing build_time_window_structure function.

Remove classes with less than 6 samples.

Show classes identification:
Class: 0 - Arrhythmia code: 1
Class: 1 - Arrhythmia code: 21
Class: 2 - Arrhythmia code: 22
Class: 3 - Arrhythmia code: 23
Class: 4 - Arrhythmia code: 30
Class: 5 - Arrhythmia code: 36
Class: 6 - Arrhythmia code: 50
Class: 7 - Arrhythmia code: 51
Class: 8 - Arrhythmia code: 54
Class: 9 - Arrhythmia code: 60
Class: 10 - Arrhythmia code: 80
Class: 11 - Arrhythmia code: 82
Class: 12 - Arrhythmia code: 83
Class: 13 - Arrhythmia code: 88
Class: 14 - Arrhythmia code: 101
Class: 15 - Arrhythmia code: 104
Class: 16 - Arrhythmia code: 105
Class: 17 - Arrhythmia code: 106
Class: 18 - Arrhythmia code: 108
Class: 19 - Arrhythmia code: 120
Class: 20 - Arrhythmia code: 121
Class: 21 - Arrhythmia code: 125
Clas

#### aVR

In [9]:

# Build a "time window" structure to handle the dataset as a time series.
new_df = load_dataset("aVR")
X_array, y_array = build_time_window_structure(new_df, "aVR")

# Try to release memory
del new_df
gc.collect()

X_array, y_array = remove_classes_with_less_samples(X_array, y_array)
X_array, y_array = apply_smote(X_array, y_array)
X_train, X_test, y_train, y_test = split_dataset_for_training(X_array, y_array)

# Train the CNN model.
v1_model = create_v1()
v1_num_epochs = 300
v1_batch_size = 32
v1_validation_split = 0.1

training_history_v1 = train_cnn_model(v1_model, X_train, y_train, X_test, y_test, v1_num_epochs, v1_batch_size, v1_validation_split, "v1_model_aVR.h5")



Start loading CSV file...
Finish loading CSV file.

Starting build_time_window_structure function...

Shape of features:  (25770, 1250)
Quantity os samples (labels):  25770

Finishing build_time_window_structure function.

Remove classes with less than 6 samples.

Show classes identification:
Class: 0 - Arrhythmia code: 1
Class: 1 - Arrhythmia code: 21
Class: 2 - Arrhythmia code: 22
Class: 3 - Arrhythmia code: 23
Class: 4 - Arrhythmia code: 30
Class: 5 - Arrhythmia code: 36
Class: 6 - Arrhythmia code: 50
Class: 7 - Arrhythmia code: 51
Class: 8 - Arrhythmia code: 54
Class: 9 - Arrhythmia code: 60
Class: 10 - Arrhythmia code: 80
Class: 11 - Arrhythmia code: 82
Class: 12 - Arrhythmia code: 83
Class: 13 - Arrhythmia code: 88
Class: 14 - Arrhythmia code: 101
Class: 15 - Arrhythmia code: 104
Class: 16 - Arrhythmia code: 105
Class: 17 - Arrhythmia code: 106
Class: 18 - Arrhythmia code: 108
Class: 19 - Arrhythmia code: 120
Class: 20 - Arrhythmia code: 121
Class: 21 - Arrhythmia code: 125
Clas

#### aVL

In [9]:

# Build a "time window" structure to handle the dataset as a time series.
new_df = load_dataset("aVL")
X_array, y_array = build_time_window_structure(new_df, "aVL")

# Try to release memory
del new_df
gc.collect()

X_array, y_array = remove_classes_with_less_samples(X_array, y_array)
X_array, y_array = apply_smote(X_array, y_array)
X_train, X_test, y_train, y_test = split_dataset_for_training(X_array, y_array)

# Train the CNN model.
v1_model = create_v1()
v1_num_epochs = 300
v1_batch_size = 32
v1_validation_split = 0.1

training_history_v1 = train_cnn_model(v1_model, X_train, y_train, X_test, y_test, v1_num_epochs, v1_batch_size, v1_validation_split, "v1_model_aVL.h5")



Start loading CSV file...
Finish loading CSV file.

Starting build_time_window_structure function...

Shape of features:  (25770, 1250)
Quantity os samples (labels):  25770

Finishing build_time_window_structure function.

Remove classes with less than 6 samples.

Show classes identification:
Class: 0 - Arrhythmia code: 1
Class: 1 - Arrhythmia code: 21
Class: 2 - Arrhythmia code: 22
Class: 3 - Arrhythmia code: 23
Class: 4 - Arrhythmia code: 30
Class: 5 - Arrhythmia code: 36
Class: 6 - Arrhythmia code: 50
Class: 7 - Arrhythmia code: 51
Class: 8 - Arrhythmia code: 54
Class: 9 - Arrhythmia code: 60
Class: 10 - Arrhythmia code: 80
Class: 11 - Arrhythmia code: 82
Class: 12 - Arrhythmia code: 83
Class: 13 - Arrhythmia code: 88
Class: 14 - Arrhythmia code: 101
Class: 15 - Arrhythmia code: 104
Class: 16 - Arrhythmia code: 105
Class: 17 - Arrhythmia code: 106
Class: 18 - Arrhythmia code: 108
Class: 19 - Arrhythmia code: 120
Class: 20 - Arrhythmia code: 121
Class: 21 - Arrhythmia code: 125
Clas

#### aVF

In [9]:

# Build a "time window" structure to handle the dataset as a time series.
new_df = load_dataset("aVF")
X_array, y_array = build_time_window_structure(new_df, "aVF")

# Try to release memory
del new_df
gc.collect()

X_array, y_array = remove_classes_with_less_samples(X_array, y_array)
X_array, y_array = apply_smote(X_array, y_array)
X_train, X_test, y_train, y_test = split_dataset_for_training(X_array, y_array)

# Train the CNN model.
v1_model = create_v1()
v1_num_epochs = 300
v1_batch_size = 32
v1_validation_split = 0.1

training_history_v1 = train_cnn_model(v1_model, X_train, y_train, X_test, y_test, v1_num_epochs, v1_batch_size, v1_validation_split, "v1_model_aVF.h5")



Start loading CSV file...
Finish loading CSV file.

Starting build_time_window_structure function...

Shape of features:  (25770, 1250)
Quantity os samples (labels):  25770

Finishing build_time_window_structure function.

Remove classes with less than 6 samples.

Show classes identification:
Class: 0 - Arrhythmia code: 1
Class: 1 - Arrhythmia code: 21
Class: 2 - Arrhythmia code: 22
Class: 3 - Arrhythmia code: 23
Class: 4 - Arrhythmia code: 30
Class: 5 - Arrhythmia code: 36
Class: 6 - Arrhythmia code: 50
Class: 7 - Arrhythmia code: 51
Class: 8 - Arrhythmia code: 54
Class: 9 - Arrhythmia code: 60
Class: 10 - Arrhythmia code: 80
Class: 11 - Arrhythmia code: 82
Class: 12 - Arrhythmia code: 83
Class: 13 - Arrhythmia code: 88
Class: 14 - Arrhythmia code: 101
Class: 15 - Arrhythmia code: 104
Class: 16 - Arrhythmia code: 105
Class: 17 - Arrhythmia code: 106
Class: 18 - Arrhythmia code: 108
Class: 19 - Arrhythmia code: 120
Class: 20 - Arrhythmia code: 121
Class: 21 - Arrhythmia code: 125
Clas

#### V1

In [9]:

# Build a "time window" structure to handle the dataset as a time series.
new_df = load_dataset("V1")
X_array, y_array = build_time_window_structure(new_df, "V1")

# Try to release memory
del new_df
gc.collect()

X_array, y_array = remove_classes_with_less_samples(X_array, y_array)
X_array, y_array = apply_smote(X_array, y_array)
X_train, X_test, y_train, y_test = split_dataset_for_training(X_array, y_array)

# Train the CNN model.
v1_model = create_v1()
v1_num_epochs = 300
v1_batch_size = 32
v1_validation_split = 0.1

training_history_v1 = train_cnn_model(v1_model, X_train, y_train, X_test, y_test, v1_num_epochs, v1_batch_size, v1_validation_split, "v1_model_V1.h5")



Start loading CSV file...
Finish loading CSV file.

Starting build_time_window_structure function...

Shape of features:  (25770, 1250)
Quantity os samples (labels):  25770

Finishing build_time_window_structure function.

Remove classes with less than 6 samples.

Show classes identification:
Class: 0 - Arrhythmia code: 1
Class: 1 - Arrhythmia code: 21
Class: 2 - Arrhythmia code: 22
Class: 3 - Arrhythmia code: 23
Class: 4 - Arrhythmia code: 30
Class: 5 - Arrhythmia code: 36
Class: 6 - Arrhythmia code: 50
Class: 7 - Arrhythmia code: 51
Class: 8 - Arrhythmia code: 54
Class: 9 - Arrhythmia code: 60
Class: 10 - Arrhythmia code: 80
Class: 11 - Arrhythmia code: 82
Class: 12 - Arrhythmia code: 83
Class: 13 - Arrhythmia code: 88
Class: 14 - Arrhythmia code: 101
Class: 15 - Arrhythmia code: 104
Class: 16 - Arrhythmia code: 105
Class: 17 - Arrhythmia code: 106
Class: 18 - Arrhythmia code: 108
Class: 19 - Arrhythmia code: 120
Class: 20 - Arrhythmia code: 121
Class: 21 - Arrhythmia code: 125
Clas

#### V2

In [9]:

# Build a "time window" structure to handle the dataset as a time series.
new_df = load_dataset("V2")
X_array, y_array = build_time_window_structure(new_df, "V2")

# Try to release memory
del new_df
gc.collect()

X_array, y_array = remove_classes_with_less_samples(X_array, y_array)
X_array, y_array = apply_smote(X_array, y_array)
X_train, X_test, y_train, y_test = split_dataset_for_training(X_array, y_array)

# Train the CNN model.
v1_model = create_v1()
v1_num_epochs = 300
v1_batch_size = 32
v1_validation_split = 0.1

training_history_v1 = train_cnn_model(v1_model, X_train, y_train, X_test, y_test, v1_num_epochs, v1_batch_size, v1_validation_split, "v1_model_V2.h5")



Start loading CSV file...
Finish loading CSV file.

Starting build_time_window_structure function...

Shape of features:  (25770, 1250)
Quantity os samples (labels):  25770

Finishing build_time_window_structure function.

Remove classes with less than 6 samples.

Show classes identification:
Class: 0 - Arrhythmia code: 1
Class: 1 - Arrhythmia code: 21
Class: 2 - Arrhythmia code: 22
Class: 3 - Arrhythmia code: 23
Class: 4 - Arrhythmia code: 30
Class: 5 - Arrhythmia code: 36
Class: 6 - Arrhythmia code: 50
Class: 7 - Arrhythmia code: 51
Class: 8 - Arrhythmia code: 54
Class: 9 - Arrhythmia code: 60
Class: 10 - Arrhythmia code: 80
Class: 11 - Arrhythmia code: 82
Class: 12 - Arrhythmia code: 83
Class: 13 - Arrhythmia code: 88
Class: 14 - Arrhythmia code: 101
Class: 15 - Arrhythmia code: 104
Class: 16 - Arrhythmia code: 105
Class: 17 - Arrhythmia code: 106
Class: 18 - Arrhythmia code: 108
Class: 19 - Arrhythmia code: 120
Class: 20 - Arrhythmia code: 121
Class: 21 - Arrhythmia code: 125
Clas

#### V3

In [9]:

# Build a "time window" structure to handle the dataset as a time series.
new_df = load_dataset("V3")
X_array, y_array = build_time_window_structure(new_df, "V3")

# Try to release memory
del new_df
gc.collect()

X_array, y_array = remove_classes_with_less_samples(X_array, y_array)
X_array, y_array = apply_smote(X_array, y_array)
X_train, X_test, y_train, y_test = split_dataset_for_training(X_array, y_array)

# Train the CNN model.
v1_model = create_v1()
v1_num_epochs = 300
v1_batch_size = 32
v1_validation_split = 0.1

training_history_v1 = train_cnn_model(v1_model, X_train, y_train, X_test, y_test, v1_num_epochs, v1_batch_size, v1_validation_split, "v1_model_V3.h5")



Start loading CSV file...
Finish loading CSV file.

Starting build_time_window_structure function...

Shape of features:  (25770, 1250)
Quantity os samples (labels):  25770

Finishing build_time_window_structure function.

Remove classes with less than 6 samples.

Show classes identification:
Class: 0 - Arrhythmia code: 1
Class: 1 - Arrhythmia code: 21
Class: 2 - Arrhythmia code: 22
Class: 3 - Arrhythmia code: 23
Class: 4 - Arrhythmia code: 30
Class: 5 - Arrhythmia code: 36
Class: 6 - Arrhythmia code: 50
Class: 7 - Arrhythmia code: 51
Class: 8 - Arrhythmia code: 54
Class: 9 - Arrhythmia code: 60
Class: 10 - Arrhythmia code: 80
Class: 11 - Arrhythmia code: 82
Class: 12 - Arrhythmia code: 83
Class: 13 - Arrhythmia code: 88
Class: 14 - Arrhythmia code: 101
Class: 15 - Arrhythmia code: 104
Class: 16 - Arrhythmia code: 105
Class: 17 - Arrhythmia code: 106
Class: 18 - Arrhythmia code: 108
Class: 19 - Arrhythmia code: 120
Class: 20 - Arrhythmia code: 121
Class: 21 - Arrhythmia code: 125
Clas

#### V4

In [9]:

# Build a "time window" structure to handle the dataset as a time series.
new_df = load_dataset("V4")
X_array, y_array = build_time_window_structure(new_df, "V4")

# Try to release memory
del new_df
gc.collect()

X_array, y_array = remove_classes_with_less_samples(X_array, y_array)
X_array, y_array = apply_smote(X_array, y_array)
X_train, X_test, y_train, y_test = split_dataset_for_training(X_array, y_array)

# Train the CNN model.
v1_model = create_v1()
v1_num_epochs = 300
v1_batch_size = 32
v1_validation_split = 0.1

training_history_v1 = train_cnn_model(v1_model, X_train, y_train, X_test, y_test, v1_num_epochs, v1_batch_size, v1_validation_split, "v1_model_V4.h5")



Start loading CSV file...
Finish loading CSV file.

Starting build_time_window_structure function...

Shape of features:  (25770, 1250)
Quantity os samples (labels):  25770

Finishing build_time_window_structure function.

Remove classes with less than 6 samples.

Show classes identification:
Class: 0 - Arrhythmia code: 1
Class: 1 - Arrhythmia code: 21
Class: 2 - Arrhythmia code: 22
Class: 3 - Arrhythmia code: 23
Class: 4 - Arrhythmia code: 30
Class: 5 - Arrhythmia code: 36
Class: 6 - Arrhythmia code: 50
Class: 7 - Arrhythmia code: 51
Class: 8 - Arrhythmia code: 54
Class: 9 - Arrhythmia code: 60
Class: 10 - Arrhythmia code: 80
Class: 11 - Arrhythmia code: 82
Class: 12 - Arrhythmia code: 83
Class: 13 - Arrhythmia code: 88
Class: 14 - Arrhythmia code: 101
Class: 15 - Arrhythmia code: 104
Class: 16 - Arrhythmia code: 105
Class: 17 - Arrhythmia code: 106
Class: 18 - Arrhythmia code: 108
Class: 19 - Arrhythmia code: 120
Class: 20 - Arrhythmia code: 121
Class: 21 - Arrhythmia code: 125
Clas

#### V5

In [9]:

# Build a "time window" structure to handle the dataset as a time series.
new_df = load_dataset("V5")
X_array, y_array = build_time_window_structure(new_df, "V5")

# Try to release memory
del new_df
gc.collect()

X_array, y_array = remove_classes_with_less_samples(X_array, y_array)
X_array, y_array = apply_smote(X_array, y_array)
X_train, X_test, y_train, y_test = split_dataset_for_training(X_array, y_array)

# Train the CNN model.
v1_model = create_v1()
v1_num_epochs = 300
v1_batch_size = 32
v1_validation_split = 0.1

training_history_v1 = train_cnn_model(v1_model, X_train, y_train, X_test, y_test, v1_num_epochs, v1_batch_size, v1_validation_split, "v1_model_V5.h5")



Start loading CSV file...
Finish loading CSV file.

Starting build_time_window_structure function...

Shape of features:  (25770, 1250)
Quantity os samples (labels):  25770

Finishing build_time_window_structure function.

Remove classes with less than 6 samples.

Show classes identification:
Class: 0 - Arrhythmia code: 1
Class: 1 - Arrhythmia code: 21
Class: 2 - Arrhythmia code: 22
Class: 3 - Arrhythmia code: 23
Class: 4 - Arrhythmia code: 30
Class: 5 - Arrhythmia code: 36
Class: 6 - Arrhythmia code: 50
Class: 7 - Arrhythmia code: 51
Class: 8 - Arrhythmia code: 54
Class: 9 - Arrhythmia code: 60
Class: 10 - Arrhythmia code: 80
Class: 11 - Arrhythmia code: 82
Class: 12 - Arrhythmia code: 83
Class: 13 - Arrhythmia code: 88
Class: 14 - Arrhythmia code: 101
Class: 15 - Arrhythmia code: 104
Class: 16 - Arrhythmia code: 105
Class: 17 - Arrhythmia code: 106
Class: 18 - Arrhythmia code: 108
Class: 19 - Arrhythmia code: 120
Class: 20 - Arrhythmia code: 121
Class: 21 - Arrhythmia code: 125
Clas

#### V6

In [9]:

# Build a "time window" structure to handle the dataset as a time series.
new_df = load_dataset("V6")
X_array, y_array = build_time_window_structure(new_df, "V6")

# Try to release memory
del new_df
gc.collect()

X_array, y_array = remove_classes_with_less_samples(X_array, y_array)
X_array, y_array = apply_smote(X_array, y_array)
X_train, X_test, y_train, y_test = split_dataset_for_training(X_array, y_array)

# Train the CNN model.
v1_model = create_v1()
v1_num_epochs = 300
v1_batch_size = 32
v1_validation_split = 0.1

training_history_v1 = train_cnn_model(v1_model, X_train, y_train, X_test, y_test, v1_num_epochs, v1_batch_size, v1_validation_split, "v1_model_V6.h5")



Start loading CSV file...
Finish loading CSV file.

Starting build_time_window_structure function...

Shape of features:  (25770, 1250)
Quantity os samples (labels):  25770

Finishing build_time_window_structure function.

Remove classes with less than 6 samples.

Show classes identification:
Class: 0 - Arrhythmia code: 1
Class: 1 - Arrhythmia code: 21
Class: 2 - Arrhythmia code: 22
Class: 3 - Arrhythmia code: 23
Class: 4 - Arrhythmia code: 30
Class: 5 - Arrhythmia code: 36
Class: 6 - Arrhythmia code: 50
Class: 7 - Arrhythmia code: 51
Class: 8 - Arrhythmia code: 54
Class: 9 - Arrhythmia code: 60
Class: 10 - Arrhythmia code: 80
Class: 11 - Arrhythmia code: 82
Class: 12 - Arrhythmia code: 83
Class: 13 - Arrhythmia code: 88
Class: 14 - Arrhythmia code: 101
Class: 15 - Arrhythmia code: 104
Class: 16 - Arrhythmia code: 105
Class: 17 - Arrhythmia code: 106
Class: 18 - Arrhythmia code: 108
Class: 19 - Arrhythmia code: 120
Class: 20 - Arrhythmia code: 121
Class: 21 - Arrhythmia code: 125
Clas