In [1]:
import pandas as pd
import numpy as np
from scipy.signal import find_peaks
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score

# --- 1. Data Loading and Preprocessing ---
def load_and_preprocess(data_path):
    df = pd.read_csv(data_path)
    peaks, _ = find_peaks(df["MLII"], distance=150)
    heartbeats = []
    for peak in peaks:
        start = max(0, peak - 150)
        end = min(len(df), peak + 150)
        heartbeat = df["MLII"][start:end].values
        if len(heartbeat) == 300:
            heartbeats.append(heartbeat)
    heartbeats = np.array(heartbeats).reshape(-1, 300, 1)
    return heartbeats

def augment(heartbeat):
    noise = np.random.normal(0, 0.01, heartbeat.shape)
    time_shift = np.random.randint(-10, 10)
    augmented = tf.roll(heartbeat + noise, shift=time_shift, axis=0)
    return augmented

data_path = "C:/Users/abdulssekyanzi/EDA Dataset.csv/100.csv" #replace
heartbeats = load_and_preprocess(data_path)

# --- 2. Model Architecture ---
def create_encoder(input_shape):
    model = models.Sequential([
        layers.Conv1D(64, 5, activation="relu", input_shape=input_shape),
        layers.MaxPooling1D(2),
        layers.Conv1D(128, 3, activation="relu"),
        layers.GlobalAveragePooling1D(),
        layers.Dense(128),
    ])
    return model

encoder = create_encoder((300, 1))

# NT-Xent Loss
def nt_xent_loss(embeddings, temperature=0.1):
    batch_size = tf.shape(embeddings)[0] // 2
    embeddings_a = embeddings[:batch_size]
    embeddings_b = embeddings[batch_size:]
    normalized_embeddings_a = tf.nn.l2_normalize(embeddings_a, axis=1)
    normalized_embeddings_b = tf.nn.l2_normalize(embeddings_b, axis=1)
    logits = tf.matmul(normalized_embeddings_a, tf.transpose(normalized_embeddings_b)) / temperature
    labels = tf.range(batch_size)
    loss_a = tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)
    loss_b = tf.keras.losses.sparse_categorical_crossentropy(labels, tf.transpose(logits), from_logits=True)
    return (tf.reduce_mean(loss_a) + tf.reduce_mean(loss_b)) / 2

# --- 3. Training (Contrastive) ---
optimizer = optimizers.Adam(0.001)

@tf.function
def train_step(heartbeats_list):
    augmented_heartbeats_a = tf.stack([augment(hb) for hb in heartbeats_list])
    augmented_heartbeats_b = tf.stack([augment(hb) for hb in heartbeats_list])
    with tf.GradientTape() as tape:
        embeddings_a = encoder(augmented_heartbeats_a, training=True)
        embeddings_b = encoder(augmented_heartbeats_b, training=True)
        embeddings = tf.concat([embeddings_a, embeddings_b], axis=0)
        loss = nt_xent_loss(embeddings)
    gradients = tape.gradient(loss, encoder.trainable_variables)
    optimizer.apply_gradients(zip(gradients, encoder.trainable_variables))
    return loss

epochs_contrastive = 50
batch_size = 32
for epoch in range(epochs_contrastive):
    for i in range(0, len(heartbeats), batch_size):
        batch = heartbeats[i:i + batch_size]
        loss = train_step(list(batch))
    print(f"Contrastive Epoch {epoch + 1}, Loss: {loss.numpy()}")

# --- 4. Pseudo-Labeling (Clustering) ---
embeddings = encoder.predict(heartbeats)
kmeans = KMeans(n_clusters=4, random_state=42) #Adjust number of clusters
pseudo_labels = kmeans.fit_predict(embeddings)

# --- 5. Supervised Fine-tuning (with Pseudo-Labels) ---
X_train_pseudo, X_test_pseudo, y_train_pseudo, y_test_pseudo = train_test_split(heartbeats, pseudo_labels, test_size=0.2, random_state=42)

classification_head = models.Sequential([
    encoder,
    layers.Dense(len(np.unique(pseudo_labels)), activation="softmax")
])
classification_head.compile(optimizer=optimizers.Adam(0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

epochs_finetune = 30
classification_history = classification_head.fit(X_train_pseudo, y_train_pseudo, epochs=epochs_finetune, batch_size=32, validation_split=0.2)

# --- 6. Evaluation ---
loss, accuracy = classification_head.evaluate(X_test_pseudo, y_test_pseudo)
print(f"Pseudo-Test Accuracy: {accuracy}")
import pickle

# Assuming `Contrastive` is your trained machine learning model
with open('contrastive_new.pkl', 'wb') as file:
    pickle.dump(Contrastive, file)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Contrastive Epoch 1, Loss: 1.2971758842468262
Contrastive Epoch 2, Loss: 1.2793577909469604
Contrastive Epoch 3, Loss: 1.2103981971740723
Contrastive Epoch 4, Loss: 1.161797046661377
Contrastive Epoch 5, Loss: 1.22334623336792
Contrastive Epoch 6, Loss: 1.1572153568267822
Contrastive Epoch 7, Loss: 1.1240514516830444
Contrastive Epoch 8, Loss: 1.0323011875152588
Contrastive Epoch 9, Loss: 0.9670186042785645
Contrastive Epoch 10, Loss: 0.9370958805084229
Contrastive Epoch 11, Loss: 0.9415258169174194
Contrastive Epoch 12, Loss: 0.9420788288116455
Contrastive Epoch 13, Loss: 0.9122030138969421
Contrastive Epoch 14, Loss: 0.8657336235046387
Contrastive Epoch 15, Loss: 0.8368385434150696
Contrastive Epoch 16, Loss: 0.7614505887031555
Contrastive Epoch 17, Loss: 0.7314350008964539
Contrastive Epoch 18, Loss: 0.7150031924247742
Contrastive Epoch 19, Loss: 0.706174910068512
Contrastive Epoch 20, Loss: 0.6742541790008545
Contrastive Epoch 21, Loss: 0.6208217144012451
Contrastive Epoch 22, Loss

In [4]:
import pickle

# Assuming `model` is your trained machine learning model
with open('contrastive_new.pkl', 'wb') as file:
    pickle.dump(Contrastive, file)

NameError: name 'Contrastive' is not defined