In [None]:
#%pip uninstall importlib_metadata -y
#%pip install importlib_metadata --force-reinstall

In [None]:
#%pip install keras

In [None]:
#%env SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True

In [None]:
#%pip install --force-reinstall -v "wfdb==1.3.9"

In [None]:
#%pip install --force-reinstall -v "wfdb==4.1.2"

In [None]:
%pip install wfdb

In [3]:
import glob
import os
import pandas as pd
import matplotlib.pyplot as plt 
import tensorflow as tf
from sklearn.model_selection import train_test_split
import wfdb

1. Data Collection:
·Datasets Used:
·CHF-RR Dataset: Congestive heart failure RR interval database. It comprised patients with heart failure. This dataset contained records of 29 patients from CHF201 to CHF229.
·NSR-RR Dataset: Normal sinus rhythm RR interval database. This dataset had 54 normal sinus rhythm recordings ranging in age from 28 to 76. Records from NSR001 through NSR054 of 54 patients were available.
·Both datasets were provided by PhysioBank.
2. Data Preprocessing:
·Conversion: Datasets were converted to CSV files or directly imported into Python.
·Library Used: The wfdblibrary was employed to read, write, and process WFDB signals and annotations. The library can be accessed at wfdb library.
·Grouping Approach: Among various grouping methods for the TSC algorithm, intervals were chosen over whole series. Data was normalized and divided into intervals of 5sec, 20sec, 40sec, 60sec, and 80sec.
·Data Formatting: Data was formatted into univariate UCR time series.
3. Data Augmentation:
·Data Augmentation Strategy: To combat issues of underfitting and overfitting, data augmentation was mentioned as a strategy. However, specific augmentation techniques were not detailed in the provided information.

# Downloading Data

In [None]:
dbs = wfdb.get_dbs()
display(dbs)

In [None]:
def download(database):
    cwd = os.getcwd()
    dl_dir = os.path.join(cwd, database)
    wfdb.dl_database(database, dl_dir=dl_dir)
    display(os.listdir(dl_dir))

In [None]:
download('nsrdb')

In [None]:
download('chfdb')

# Converting Data

In [4]:
def wfdb_to_dataframe(record):
    #df = record.to_dataframe()
    df = pd.DataFrame(record.p_signal, columns=record.sig_name)
    return df
 
def load_record(uri):
    record = wfdb.rdrecord(uri)
    return wfdb_to_dataframe(record)

In [None]:
a = load_record('nsrdb/16265')
plt.plot(a['ECG1'][:212])
plt.show()

In [None]:
plt.plot(a['ECG2'][:212])

In [None]:
load_record('chfdb/chf03')

In [6]:
nsrdb_list = glob.glob("nsrdb/*.hea")
len(nsrdb_list)

18

In [7]:
chfdb_list = glob.glob("chfdb/*.hea")
len(chfdb_list)

15

In [8]:
def create_db(file_list, record_class, record_len, record_split=1):
    df = pd.DataFrame()
    for file in file_list:
        record_df = load_record(file.removesuffix(".hea"))['ECG1'][:record_len]
        split = record_len//record_split
        i=0
        while i+split <= record_len:
            split_df = record_df[i:i+split].reset_index()
            i = i + split
            
            if df.empty:
                df = split_df
            else:
                df = pd.concat([df, split_df],axis=1, ignore_index=True, sort=False)
            
    df = df.transpose()
    df["class"] = [record_class]*df.shape[0]
    return df

In [116]:
record_len = 100

In [117]:
nsrdb_df = create_db(nsrdb_list, 0, record_len)
nsrdb_df.shape

(36, 101)

In [118]:
chfdb_df = create_db(chfdb_list, 1, record_len)
chfdb_df.shape

(30, 101)

In [119]:
full_df = pd.concat([nsrdb_df, chfdb_df],axis=0, ignore_index=True, sort=False)
full_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,91,92,93,94,95,96,97,98,99,class
0,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,...,91.0,92.0,93.0,94.0,95.0,96.0,97.0,98.0,99.0,0
1,-0.165,-0.155,-0.195,-0.205,-0.185,-0.155,-0.135,-0.095,-0.075,-0.065,...,-0.155,-0.105,-0.125,-0.135,-0.125,-0.115,-0.105,-0.135,-0.135,0
2,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,...,91.0,92.0,93.0,94.0,95.0,96.0,97.0,98.0,99.0,0
3,-0.095,-0.085,-0.085,-0.085,-0.065,-0.045,-0.045,-0.045,-0.045,-0.025,...,-0.025,-0.015,-0.025,-0.025,-0.045,-0.055,-0.085,-0.115,-0.135,0
4,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,...,91.0,92.0,93.0,94.0,95.0,96.0,97.0,98.0,99.0,0


# Split Data

In [120]:
y_df = full_df.pop("class")
x_df = full_df

In [121]:
X_train, X_test, y_train, y_test = train_test_split(x_df, y_df, test_size=0.30, random_state=42)

In [122]:
X_train, X_validate, y_train, y_validate = train_test_split(X_train, y_train, test_size=0.20, random_state=42)

In [123]:
print("Training Data:", X_train.shape)
print("Validation Data:", X_validate.shape)
print("Testing Data:", X_test.shape)

Training Data: (36, 100)
Validation Data: (10, 100)
Testing Data: (20, 100)


# Model

In [124]:
input_shape = (X_train.shape[1], 1, )

The first convolutional layer will consist of 20 filters, followed by a max-pooling layer of length 2.
 
The second convolutional layer will have 80 filters with a max-pooling layer of length 4.
 
There will be 32 neurons used in the fully connected convolution layers to feed into the softmax classification layer.

In [130]:
#TODO: Finish model implimentation
model = tf.keras.Sequential(
    [
        tf.keras.Input(shape=input_shape),
        tf.keras.layers.Conv1D(
            20,
            5,
            ),
        tf.keras.layers.MaxPooling1D(
            2,
            ),
        tf.keras.layers.Conv1D(
            80,
            5),
        tf.keras.layers.MaxPooling1D(
            4, 
            ),
        tf.keras.layers.Dense(
            32),
        tf.keras.layers.Softmax(),
    ]
)

In [131]:
model.summary()

Model: "sequential_18"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_39 (Conv1D)          (None, 96, 20)            120       
                                                                 
 max_pooling1d_37 (MaxPoolin  (None, 48, 20)           0         
 g1D)                                                            
                                                                 
 conv1d_40 (Conv1D)          (None, 44, 80)            8080      
                                                                 
 max_pooling1d_38 (MaxPoolin  (None, 11, 80)           0         
 g1D)                                                            
                                                                 
 dense_18 (Dense)            (None, 11, 32)            2592      
                                                                 
 softmax_18 (Softmax)        (None, 11, 32)          

# Model Training

In [132]:
batch_size = 128
epochs = 15

In [133]:
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

In [134]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)

Epoch 1/15


ValueError: in user code:

    File "/usr/local/lib/python3.10/dist-packages/keras/engine/training.py", line 1284, in train_function  *
        return step_function(self, iterator)
    File "/usr/local/lib/python3.10/dist-packages/keras/engine/training.py", line 1268, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/usr/local/lib/python3.10/dist-packages/keras/engine/training.py", line 1249, in run_step  **
        outputs = model.train_step(data)
    File "/usr/local/lib/python3.10/dist-packages/keras/engine/training.py", line 1051, in train_step
        loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "/usr/local/lib/python3.10/dist-packages/keras/engine/training.py", line 1109, in compute_loss
        return self.compiled_loss(
    File "/usr/local/lib/python3.10/dist-packages/keras/engine/compile_utils.py", line 265, in __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "/usr/local/lib/python3.10/dist-packages/keras/losses.py", line 142, in __call__
        losses = call_fn(y_true, y_pred)
    File "/usr/local/lib/python3.10/dist-packages/keras/losses.py", line 268, in call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "/usr/local/lib/python3.10/dist-packages/keras/losses.py", line 1984, in categorical_crossentropy
        return backend.categorical_crossentropy(
    File "/usr/local/lib/python3.10/dist-packages/keras/backend.py", line 5559, in categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)

    ValueError: Shapes (None, 1) and (None, 11, 32) are incompatible


# Model Evaluation

In [98]:
score = model.evaluate(x_test, y_test, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])

NameError: name 'x_test' is not defined