In [3]:
import pandas as pd
import numpy as np
from scipy.signal import find_peaks
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans

# --- 1. Data Loading and Preprocessing ---
def load_and_preprocess(data_path):
    df = pd.read_csv(data_path)
    peaks, _ = find_peaks(df["MLII"], distance=150)
    heartbeats = []
    for peak in peaks:
        start = max(0, peak - 150)
        end = min(len(df), peak + 150)
        heartbeat = df["MLII"][start:end].values
        if len(heartbeat) == 300:
            heartbeats.append(heartbeat)
    heartbeats = np.array(heartbeats).reshape(-1, 300, 1)
    return heartbeats

def augment(heartbeat):
    noise = np.random.normal(0, 0.01, heartbeat.shape)
    time_shift = np.random.randint(-10, 10)
    augmented = tf.roll(heartbeat + noise, shift=time_shift, axis=0)
    return augmented

data_path = "C:/Users/abdulssekyanzi/EDA Dataset.csv/100.csv"
heartbeats = load_and_preprocess(data_path)

# --- 2. Model Architecture ---
def create_encoder(input_shape):
    model = models.Sequential([
        layers.Conv1D(64, 5, activation="relu", input_shape=input_shape),
        layers.MaxPooling1D(2),
        layers.Conv1D(128, 3, activation="relu"),
        layers.GlobalAveragePooling1D(),
        layers.Dense(128),
    ])
    return model

encoder = create_encoder((300, 1))

# NT-Xent Loss
def nt_xent_loss(embeddings, temperature=0.1):
    batch_size = tf.shape(embeddings)[0] // 2
    embeddings_a = embeddings[:batch_size]
    embeddings_b = embeddings[batch_size:]
    normalized_embeddings_a = tf.nn.l2_normalize(embeddings_a, axis=1)
    normalized_embeddings_b = tf.nn.l2_normalize(embeddings_b, axis=1)
    logits = tf.matmul(normalized_embeddings_a, tf.transpose(normalized_embeddings_b)) / temperature
    labels = tf.range(batch_size)
    loss_a = tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)
    loss_b = tf.keras.losses.sparse_categorical_crossentropy(labels, tf.transpose(logits), from_logits=True)
    return (tf.reduce_mean(loss_a) + tf.reduce_mean(loss_b)) / 2

# --- 3. Training (Contrastive) ---
optimizer = optimizers.Adam(0.001)

@tf.function
def train_step(heartbeats_list):
    augmented_heartbeats_a = tf.stack([augment(hb) for hb in heartbeats_list])
    augmented_heartbeats_b = tf.stack([augment(hb) for hb in heartbeats_list])
    with tf.GradientTape() as tape:
        embeddings_a = encoder(augmented_heartbeats_a, training=True)
        embeddings_b = encoder(augmented_heartbeats_b, training=True)
        embeddings = tf.concat([embeddings_a, embeddings_b], axis=0)
        loss = nt_xent_loss(embeddings)
    gradients = tape.gradient(loss, encoder.trainable_variables)
    optimizer.apply_gradients(zip(gradients, encoder.trainable_variables))
    return loss

epochs_contrastive = 10
batch_size = 32
for epoch in range(epochs_contrastive):
    for i in range(0, len(heartbeats), batch_size):
        batch = heartbeats[i:i + batch_size]
        loss = train_step(list(batch))
    print(f"Contrastive Epoch {epoch + 1}, Loss: {loss.numpy()}")

# --- 4. Pseudo-Labeling (Clustering) ---
embeddings = encoder.predict(heartbeats)
kmeans = KMeans(n_clusters=4, random_state=42)
pseudo_labels = kmeans.fit_predict(embeddings)

# --- 5. Supervised Fine-tuning (with Pseudo-Labels) ---
X_train_pseudo, X_test_pseudo, y_train_pseudo, y_test_pseudo = train_test_split(
    heartbeats, pseudo_labels, test_size=0.2, random_state=42
)

classification_head = models.Sequential([
    encoder,  # Use trained encoder
    layers.Dense(len(np.unique(pseudo_labels)), activation="softmax")
])
classification_head.compile(
    optimizer=optimizers.Adam(0.001), 
    loss='sparse_categorical_crossentropy', 
    metrics=['accuracy']
)

epochs_finetune = 10
classification_history = classification_head.fit(
    X_train_pseudo, y_train_pseudo, epochs=epochs_finetune, batch_size=32, validation_split=0.2
)

# --- 6. Evaluation ---
loss, accuracy = classification_head.evaluate(X_test_pseudo, y_test_pseudo)
print(f"Pseudo-Test Accuracy: {accuracy}")

# ✅ **Save the Model Properly**
classification_head.save("Contrastive.h5")
print("Model saved as Contrastive.h5 ✅")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Contrastive Epoch 1, Loss: 1.28780198097229
Contrastive Epoch 2, Loss: 1.3298695087432861
Contrastive Epoch 3, Loss: 1.2333710193634033
Contrastive Epoch 4, Loss: 1.183311939239502
Contrastive Epoch 5, Loss: 1.1762003898620605
Contrastive Epoch 6, Loss: 1.1023666858673096
Contrastive Epoch 7, Loss: 0.9886217713356018
Contrastive Epoch 8, Loss: 0.9920879602432251
Contrastive Epoch 9, Loss: 1.013812780380249
Contrastive Epoch 10, Loss: 1.0290682315826416
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step   
Epoch 1/10
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 38ms/step - accuracy: 0.5773 - loss: 1.2200 - val_accuracy: 0.6146 - val_loss: 0.9552
Epoch 2/10
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 27ms/step - accuracy: 0.6231 - loss: 0.8884 - val_accuracy: 0.7390 - val_loss: 0.6108
Epoch 3/10
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 28ms/step - accuracy: 0.8142 - loss: 0.5900 - val_accuracy: 0.7805 - v



Pseudo-Test Accuracy: 0.91796875
Model saved as Contrastive.h5 ✅
