In [3]:
%env CUDA_HOME=/usr/lib/cuda  # Replace with your actual CUDA path
%env CUDA_PATH=/usr/lib/cuda

env: CUDA_HOME=/usr/lib/cuda  # Replace with your actual CUDA path
env: CUDA_PATH=/usr/lib/cuda


In [4]:
%env CUDA_VISIBLE_DEVICES=0 

env: CUDA_VISIBLE_DEVICES=0


In [None]:
import os
import glob
import h5py
import matplotlib.pyplot as plt
import numpy as np
import neurokit2 as nk
from scipy.signal import butter, filtfilt
from sklearn.cluster import KMeans
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.decomposition import PCA

In [1]:
!module load PyTorch/1.12.1-foss-2022a-CUDA-11.7.0

In [6]:
import torch
print(torch.cuda.is_available()) 

False


In [None]:
RECORDS_FOLDER = "/mnt/iridia/sehlalou/thesis/ECGs"
SAMPLING_RATE = 200
WINDOW_SIZE = 8192  # Nombre de points par segment ECG
STEP_SIZE = 4096    # Chevauchement de 50%
BATCH_SIZE = 128
EPOCHS = 50 

## Segmentation du signal ECG en fenêtre de taille fixe

In [None]:
def segment_signal(signal, window_size, step_size):
    segments = []
    for i in range(0, len(signal) - window_size, step_size):
        segments.append(signal[i : i + window_size])
    return np.array(segments)

## Filtrage passe-bande

In [None]:
def bandpass_filter(signal, lowcut=0.5, highcut=50, fs=200, order=2):
    nyquist = 0.5 * fs
    low, high = lowcut / nyquist, highcut / nyquist
    b, a = butter(order, [low, high], btype='band')
    return filtfilt(b, a, signal)

## Chargement en mémoire des records (à la volée)

In [None]:
def data_generator(records_folder):
    record_files = sorted(glob.glob(os.path.join(records_folder, "*.h5")))
    for record_file in record_files:
        with h5py.File(record_file, "r") as f:
            ecg_data = f["ecg"][:]  # Load ECG data
            raw_ecg_lead1 = ecg_data[6000:, 0]
            filtered_ecg_lead1 = bandpass_filter(raw_ecg_lead1)

            # Calcul du bruit supprimé
            noise_removed = raw_ecg_lead1 - filtered_ecg_lead1
            segments = segment_signal(filtered_ecg_lead1, WINDOW_SIZE, STEP_SIZE)

            plt.figure(figsize=(12, 6))
                
            plt.subplot(3, 1, 1)
            plt.plot(raw_ecg_lead1, color='gray', alpha=0.7, label="ECG brut (avec bruit)")
            plt.legend()
            
            plt.subplot(3, 1, 2)
            plt.plot(filtered_ecg_lead1, color='blue', label="ECG filtré")
            plt.legend()
            
            plt.subplot(3, 1, 3)
            plt.plot(noise_removed, color='red', label="Bruit supprimé")
            plt.legend()
            
            plt.xlabel("Temps (échantillons)")
            plt.tight_layout()
            plt.show()
            
            for seg in segments:
                seg = np.expand_dims(seg, axis=-1)  # Add a dimension for CNN
                seg = seg.astype(np.float32)
                yield seg, seg  # Yield (input, target)


In [None]:
dataset = tf.data.Dataset.from_generator(
    lambda: data_generator(RECORDS_FOLDER),
    output_types=(tf.float32, tf.float32),
    output_shapes=((WINDOW_SIZE, 1), (WINDOW_SIZE, 1))
)
dataset = dataset.batch(BATCH_SIZE)

## Design of the autoencoder

In [None]:
input_shape = (WINDOW_SIZE, 1)
inputs = tf.keras.Input(shape=input_shape)
x = layers.Conv1D(32, 3, activation="relu", padding="same")(inputs)
x = layers.MaxPooling1D(2, padding="same")(x)
x = layers.Conv1D(16, 3, activation="relu", padding="same")(x)
x = layers.MaxPooling1D(2, padding="same")(x)
x = layers.Flatten()(x)
encoded = layers.Dense(10, activation="relu")(x)  # Espace latent

x = layers.Dense(64, activation="relu")(encoded)
x = layers.Reshape((64, 1))(x)
x = layers.Conv1DTranspose(16, 3, activation="relu", padding="same")(x)
x = layers.UpSampling1D(2)(x)
x = layers.Conv1DTranspose(32, 3, activation="relu", padding="same")(x)
x = layers.UpSampling1D(2)(x)
decoded = layers.Conv1D(1, 3, activation="sigmoid", padding="same")(x)

## Autoencoder training

In [None]:
autoencoder = models.Model(inputs, decoded)
autoencoder.compile(optimizer="adam", loss="mse")
autoencoder.summary()

In [None]:
autoencoder.fit(dataset, epochs= EPOCHS)


In [None]:
encoder = models.Model(inputs=autoencoder.input, outputs=autoencoder.get_layer('encoded').output)
latent_features = encoder.predict(dataset)

In [None]:
n_clusters = 2 

# Initialize and fit K-Means
kmeans = KMeans(n_clusters=n_clusters, random_state=0)
kmeans.fit(latent_features)

# Retrieve cluster labels
cluster_labels = kmeans.labels_