In [None]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# move to desginated directory
import os
os.chdir('/content/drive/MyDrive/DLH Final Project/')

In [None]:
import csv
import numpy as np
import os as os
import pandas as pd
import scipy.io
import skvideo.io
import tensorflow as tf

ECG Model Training Attempt from resources given from DeepHeartBeat (We were not able to reproduce using the code provided, given the core problem of having different dependencies, as the code largely depended on TF2.2.0)

In [None]:
# physio_data processing
physio_data = dict()

for filename in os.listdir('data/physio_training/'):
  if filename.endswith('.mat'):
    mat_data = scipy.io.loadmat('data/physio_training/'+ filename)
    physio_data[filename[:-4]] = {
        'measurements': mat_data['val'][0],
        'frequency': 300
    }

In [None]:
from sklearn.model_selection import train_test_split
from models.ecg import ECGModel

In [None]:
# Load Physionet ECG data
print('%i subjects loaded' % len(physio_data))

# Train-validation split
ids = np.array(list(physio_data.keys()))
train_ids, val_ids = train_test_split(ids, test_size=0.2, random_state=38)

train_data = [physio_data[id] for id in train_ids]
val_data = [physio_data[id] for id in val_ids]

In [None]:

trained_model_path = './self_trained_models/physionet'
model = ECGModel(latent_space_dim=8, batch_size=64, hidden_dim=128, learning_rate=5e-4, log_dir=trained_model_path)
model.fit(train_data, val_data)

model.save_weights(trained_model_path)

ECG Model (self reproduction with help of LLM)

*  Ablation in a window_data procesor, but the results after processing, reducing epochs size are still not ideal



In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models

WINDOW_SIZE = 256
STRIDE = 128
EPOCHS = 10
BATCH_SIZE = 16

def window_data(data_dict, window_size, stride):
    all_windows = []
    for sample in data_dict.values():
        signal = sample['measurements']
        for start in range(0, len(signal) - window_size + 1, stride):
            window = signal[start:start + window_size]
            all_windows.append(window)
    return tf.ragged.constant(all_windows, dtype=tf.float32)

windows = window_data(physio_data, WINDOW_SIZE, STRIDE)
print(f"Total windows: {windows.shape[0]}")

def split_train_test(ragged_tensor, train_frac=0.8):
    total = ragged_tensor.shape[0]
    split = int(total * train_frac)
    train = ragged_tensor[:split].to_tensor()
    test = ragged_tensor[split:].to_tensor()
    return train, test

train_x, test_x = split_train_test(windows)

def build_model(input_shape):
    model = models.Sequential([
        layers.Input(shape=input_shape),
        layers.Reshape((input_shape[0], 1)),
        layers.Conv1D(16, 3, activation='relu', padding='same'),
        layers.MaxPooling1D(2),
        layers.Conv1D(8, 3, activation='relu', padding='same'),
        layers.GlobalAveragePooling1D(),
        layers.Dense(input_shape[0]),
        layers.Reshape((input_shape[0],))
    ])
    return model

model = build_model(train_x.shape[1:])
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

history = model.fit(
    train_x, train_x,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    validation_data=(test_x, test_x)
)

eval_loss, eval_mae = model.evaluate(test_x, test_x)
print(f"\nTest MSE: {eval_loss:.4f}, Test MAE: {eval_mae:.4f}")

Total windows: 50834
Epoch 1/10
[1m2542/2542[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 5ms/step - loss: 54188.5742 - mae: 120.7465 - val_loss: 50506.8672 - val_mae: 118.5531
Epoch 2/10
[1m2542/2542[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - loss: 47858.4258 - mae: 117.4641 - val_loss: 50540.6016 - val_mae: 117.9903
Epoch 3/10
[1m2542/2542[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 4ms/step - loss: 49336.3086 - mae: 117.9712 - val_loss: 50430.1758 - val_mae: 118.3711
Epoch 4/10
[1m2542/2542[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 5ms/step - loss: 50040.8711 - mae: 119.0928 - val_loss: 50394.4688 - val_mae: 118.6937
Epoch 5/10
[1m2542/2542[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 3ms/step - loss: 48966.3750 - mae: 117.9886 - val_loss: 50394.2227 - val_mae: 117.5844
Epoch 6/10
[1m2542/2542[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 4ms/step - loss: 51864.8711 - mae: 118.9607 - val_loss: 50249.51

EchocardioModel Model Training Attempt (were not able to succeed due to limitations in data processing, need to expand a zip of 8GB and after multiple tries in unzipping, there are still a large portion of missing files that do not match with filelistlabel)

In [None]:
video_cache_folder = './cache/EchoNet-Dynamic/Videos'

if not os.path.exists(video_cache_folder):
    os.makedirs(video_cache_folder)

data_info = pd.read_csv('./data/EchoNet-Dynamic/FileList.csv')
data_info['globalID'] = data_info['FileName'].apply(lambda s: s[:-4]).astype('string')
data_info.set_index('globalID', inplace=True)

files = dict()
for index, row in data_info.iterrows():
    filepath = './content/drive/MyDrive/DLH Final Project/data/EchoNet-Dynamic/Videos/' + index + '.avi'
    filepath_cached = video_cache_folder + '/' + index + '.npz'

    if not os.path.exists(filepath_cached):

        frames = skvideo.io.vread(filepath)

        frames = [frame[:, :, 0] for frame in frames]

        time_base = 1/data_info.loc[index]['FPS']
        times = [i*time_base for i in range(len(frames))]

        np.savez(filepath_cached, frames=frames, times=times)

    files[index] = filepath_cached

In [None]:
echonet_train_ids = data_info[data_info.Split == 'TRAIN'].index.values
echonet_val_ids = data_info[data_info.Split == 'VAL'].index.values
ids = list(echonet_train_ids) + list(echonet_val_ids)

files = np.array([files[id] for id in ids])
kf = KFold(n_splits=5, shuffle=True, random_state=230)
for i, (train_index, val_index) in enumerate(kf.split(files)):

    train_files = files[train_index]
    val_files = files[val_index]

    trained_model_path = './self_trained_modles/echonet_dynamic_' + str(i)

    model = EchocardioModel(latent_space_dim=128, batch_size=32, hidden_dim=128, log_dir=trained_model_path)
    model.fit(train_files, val_files)

    model.save_weights(trained_model_path)

Ablation in rem