## Setup models

In [12]:
!git clone https://github.com/HemaxiN/DL_ECG_Classification.git

Cloning into 'DL_ECG_Classification'...
remote: Enumerating objects: 811, done.[K
remote: Counting objects: 100% (259/259), done.[K
remote: Compressing objects: 100% (127/127), done.[K
remote: Total 811 (delta 162), reused 204 (delta 127), pack-reused 552 (from 1)[K
Receiving objects: 100% (811/811), 14.00 MiB | 16.55 MiB/s, done.
Resolving deltas: 100% (501/501), done.


In [2]:
!pip install -U -r DL_ECG_Classification/requirements.txt

Defaulting to user installation because normal site-packages is not writeable
Collecting argon2-cffi==21.3.0 (from -r DL_ECG_Classification/requirements.txt (line 1))
  Using cached argon2_cffi-21.3.0-py3-none-any.whl.metadata (5.4 kB)
Collecting argon2-cffi-bindings==21.2.0 (from -r DL_ECG_Classification/requirements.txt (line 2))
  Using cached argon2_cffi_bindings-21.2.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting asttokens==2.0.5 (from -r DL_ECG_Classification/requirements.txt (line 3))
  Using cached asttokens-2.0.5-py2.py3-none-any.whl.metadata (4.6 kB)
Collecting attrs==21.4.0 (from -r DL_ECG_Classification/requirements.txt (line 4))
  Using cached attrs-21.4.0-py2.py3-none-any.whl.metadata (9.8 kB)
Collecting certifi==2021.10.8 (from -r DL_ECG_Classification/requirements.txt (line 8))
  Using cached certifi-2021.10.8-py2.py3-none-any.whl.metadata (3.0 kB)
Collecting charset-normalizer==2.0.12 (from -r DL_ECG_Classification/requirements.

In [3]:
!pip install -q tifffile pyts wfdb


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


## Setup PTB-XL dataset (v1.0.3)

In [1]:
!wget https://physionet.org/static/published-projects/ptb-xl/ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.3.zip

--2025-05-13 11:52:08--  https://physionet.org/static/published-projects/ptb-xl/ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.3.zip
Resolving physionet.org (physionet.org)... 18.18.42.54
Connecting to physionet.org (physionet.org)|18.18.42.54|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1839504686 (1.7G) [application/zip]
Saving to: ‘ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.3.zip’


IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [None]:
!unzip ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.3.zip -d p

In [32]:
import pandas as pd
import numpy as np
import wfdb
import ast
import pickle
import os

def load_raw_data(df, sampling_rate, path):
    if sampling_rate == 100:
        data = [wfdb.rdsamp(path+f) for f in df.filename_lr]
    else:
        data = [wfdb.rdsamp(path+f) for f in df.filename_hr]
    data = np.array([signal for signal, meta in data])
    return data

path = 'ptb-xl-1.0.3/ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.3/'
sampling_rate=100

# load and convert annotation data
Y = pd.read_csv(path+'ptbxl_database.csv', index_col='ecg_id')
Y.scp_codes = Y.scp_codes.apply(lambda x: ast.literal_eval(x))

# Load raw signal data
X = load_raw_data(Y, sampling_rate, path)

# Load scp_statements.csv for diagnostic aggregation
agg_df = pd.read_csv(path+'scp_statements.csv', index_col=0)

def aggregate_diagnostic(y_dic):
    tmp = []
    for key in y_dic.keys():
        if key in agg_df.index:
            if key in ['CLBBB', 'ILBBB']:
                key = 'LBBB'
            elif key in ['CRBBB', 'IRBBB']:
                key = 'RBBB'
            elif key == '1AVB':
                key = '1dAVB'
            if key in ['AFIB', 'AFLT', '1dAVb', 'RBBB', 'LBBB']:
                tmp.append(key)
    return list(set(tmp))

# Apply diagnostic superclass
Y['subclass'] = Y.scp_codes.apply(aggregate_diagnostic)
Y = Y[Y.subclass.apply(lambda x: len(x) > 0)]

# Split data into train and test (https://physionet.org/content/ptb-xl/1.0.1/)
test_fold = 10
dev_fold = 9
# Train: 2844 records
X_train = X[np.where((Y.strat_fold != test_fold) & (Y.strat_fold != dev_fold))]
y_train = Y[(Y.strat_fold != test_fold) & (Y.strat_fold != dev_fold)].subclass
# Validation: 357 records
X_dev = X[np.where(Y.strat_fold == dev_fold)]
y_dev = Y[(Y.strat_fold == dev_fold)].subclass
# Test: 362 records
X_test = X[np.where(Y.strat_fold == test_fold)]
y_test = Y[Y.strat_fold == test_fold].subclass

In [34]:
# Conditions covered
set([','.join(r) for r in y_train])

{'AFIB',
 'AFIB,AFLT',
 'AFIB,LBBB',
 'AFIB,RBBB',
 'AFLT',
 'AFLT,RBBB',
 'LBBB',
 'LBBB,AFLT',
 'LBBB,RBBB',
 'RBBB'}

In [35]:
preprocessed_dir = 'Processed'
os.makedirs(preprocessed_dir, exist_ok=True)

pickle_out = open(os.path.join(preprocessed_dir, "X.pickle"),"wb")
pickle.dump(X, pickle_out)
pickle_out.close()

pickle_out = open(os.path.join(preprocessed_dir, "y.pickle"),"wb")
pickle.dump(Y, pickle_out)
pickle_out.close()

pickle_out = open(os.path.join(preprocessed_dir, "X_train.pickle"),"wb")
pickle.dump(X_train, pickle_out)
pickle_out.close()

pickle_out = open(os.path.join(preprocessed_dir, "y_train.pickle"),"wb")
pickle.dump(y_train, pickle_out)
pickle_out.close()

pickle_out = open(os.path.join(preprocessed_dir, "X_dev.pickle"),"wb")
pickle.dump(X_dev, pickle_out)
pickle_out.close()

pickle_out = open(os.path.join(preprocessed_dir, "y_dev.pickle"),"wb")
pickle.dump(y_dev, pickle_out)
pickle_out.close()

pickle_out = open(os.path.join(preprocessed_dir, "X_test.pickle"),"wb")
pickle.dump(X_test, pickle_out)
pickle_out.close()

pickle_out = open(os.path.join(preprocessed_dir, "y_test.pickle"),"wb")
pickle.dump(y_test, pickle_out)
pickle_out.close()

In [36]:
def labelstovector(X,y):
  '''
  Convert the labels in y into vectors:
  Multi-label problem:
  AFIB: [0,0,0,0]
  AFLT: [1,0,0,0]
  1dAVb: [0,1,0,0]
  RBBB: [0,0,1,0]
  LBBB: [0,0,0,1]
  Combination example:
  AFLT and LBBB: [1,0,0,1]
  LBBB and RBBB and 1dAVB: [0,1,1,1]
  -----------------------------------------------------------
  Args: X (number of examples, signal length, number of leads)
        y (number of examples, )
  '''
  y_list = []
  X_list = []
  for label, ecg in zip(y,X):
    if len(label)!=0: #ignore examples with label = []
      aux_vec = np.zeros(4)
      if 'AFLT' in label:
        aux_vec[0] = 1
      if '1dAVB' in label:
        aux_vec[1] = 1
      if 'RBBB' in label:
        aux_vec[2] = 1
      if 'LBBB' in label:
        aux_vec[3] = 1

      y_list.append(aux_vec)
      X_list.append(ecg)	

  return X_list, y_list

In [37]:
X_train_processed, y_train_processed = labelstovector(X_train, y_train)
X_dev_processed, y_dev_processed = labelstovector(X_dev, y_dev)
X_test_processed, y_test_processed = labelstovector(X_test, y_test)

pickle_out = open(os.path.join(preprocessed_dir, "X_train_processed.pickle"),"wb")
pickle.dump(X_train_processed, pickle_out)
pickle_out.close()

pickle_out = open(os.path.join(preprocessed_dir, "y_train_processed.pickle"),"wb")
pickle.dump(y_train_processed, pickle_out)
pickle_out.close()

pickle_out = open(os.path.join(preprocessed_dir, "X_dev_processed.pickle"),"wb")
pickle.dump(X_dev_processed, pickle_out)
pickle_out.close()

pickle_out = open(os.path.join(preprocessed_dir, "y_dev_processed.pickle"),"wb")
pickle.dump(y_dev_processed, pickle_out)
pickle_out.close()

pickle_out = open(os.path.join(preprocessed_dir, "X_test_processed.pickle"),"wb")
pickle.dump(X_test_processed, pickle_out)
pickle_out.close()

pickle_out = open(os.path.join(preprocessed_dir, "y_test_processed.pickle"),"wb")
pickle.dump(y_test_processed, pickle_out)
pickle_out.close()

print(X_train.shape)
print(X_dev.shape)
print(X_test.shape)

(2844, 1000, 12)
(357, 1000, 12)
(362, 1000, 12)


In [50]:
# Prepare 2D dataset
!mkdir -p Images/dev/labels
!mkdir -p Images/dev/images
!mkdir -p Images/train/labels
!mkdir -p Images/train/images
!mkdir -p Images/test/labels
!mkdir -p Images/test/images
# Save processed dataset at
!mkdir -p Processed/model_specific/X_rnn_train
!mkdir -p Processed/model_specific/X_rnn_dev
!mkdir -p Processed/model_specific/X_rnn_test
!mkdir -p Processed/model_specific/X_cnn_train
!mkdir -p Processed/model_specific/X_cnn_dev
!mkdir -p Processed/model_specific/X_cnn_test
!mkdir -p Processed/model_specific/labels_train
!mkdir -p Processed/model_specific/labels_dev
!mkdir -p Processed/model_specific/labels_test

# Modify `create_dataset.py` appropriately to
# specify dataset path and save directory path
!python DL_ECG_Classification/Dataset/create_dataset.py

[0m

## Training
Update code for specific models to ensure dataset length is correctly specified:
```python
# _examples_ = [17111,2156,2163]
_examples_ = [2844,357,362]
```

Matrix printed at the end contains the column: TP, FN, FP, TN

In [None]:
# 1D models
## RNN
!python3 DL_ECG_Classification/rnn.py -data 'Processed/model_specific/' -epochs 100 -batch_size 256 -path_save_model 'saved_models/' -gpu_id 0 -learning_rate 0.01
## LSTM
!python3 DL_ECG_Classification/lstm.py -data 'Processed/model_specific/' -epochs 100 -batch_size 256 -path_save_model 'saved_models/' -gpu_id 0 -learning_rate 0.01
## GRU
!python3 DL_ECG_Classification/gru.py -data 'Processed/model_specific/' -epochs 100 -batch_size 256 -path_save_model 'saved_models/' -gpu_id 0 -learning_rate 0.01

In [58]:
# 2D models
## AlexNet
!python3 DL_ECG_Classification/AlexNet.py -data 'Processed/model_specific/' -epochs 2 -batch_size 256 -path_save_model 'saved_models/' -gpu_id 0 -learning_rate 0.01
# ## VGGNet
# !python3 DL_ECG_Classification/vggnet.py -data 'Processed/model_specific/' -epochs 100 -batch_size 256 -path_save_model 'saved_models/' -gpu_id 0 -learning_rate 0.01
# ## ResNet
# !python3 DL_ECG_Classification/resnet.py -data 'Processed/model_specific/' -epochs 100 -batch_size 256 -path_save_model 'saved_models/' -gpu_id 0 -learning_rate 0.01

Loading data...
Training epoch 1
  return F.conv2d(input, weight, bias, self.stride,
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
Training loss: 2.4530
Training epoch 2
Training loss: 2.4164
Final Test Results:
  return F.conv2d(input, weight, bias, self.stride,
[[  0.   7.   0. 355.]
 [  0.   0. 362.   0.]
 [  0. 166.   0. 196.]
 [ 62.   0. 300.   0.]]


In [74]:
import os
import numpy as np

# View distribution
train_dir = 'Processed/model_specific/labels_train/'
dev_dir = 'Processed/model_specific/labels_dev/'
test_dir = 'Processed/model_specific/labels_test/'

for path in [train_dir, dev_dir, test_dir]:
    counter = dict()
    files = os.listdir(path)
    for file in files:
        vector = np.load(os.path.join(path, file))
        encoding = ''.join(vector.astype(int).astype(str))
        if encoding not in counter:
            counter[encoding] = 0
        counter[encoding] += 1
    print(path.split('/')[-2].split('_')[-1])
    print(counter)
    print()

train
{'0010': 1323, '0001': 487, '0000': 974, '1000': 55, '1001': 2, '1010': 2, '0011': 1}

dev
{'0010': 166, '0000': 123, '0001': 61, '1000': 7}

test
{'0010': 166, '0000': 127, '0001': 62, '1000': 7}

