In [4]:
%run "Data_PreProcessing.ipynb"

  0%|                                                                                          | 0/549 [00:00<?, ?it/s]

Filtering Database


100%|████████████████████████████████████████████████████████████████████████████████| 549/549 [01:01<00:00,  8.93it/s]
  5%|████▎                                                                           | 12/221 [00:00<00:01, 118.54it/s]

221 remaining out of 290
normalising and preproccessing signals using DWT


100%|███████████████████████████████████████████████████████████████████████████████| 221/221 [00:01<00:00, 134.65it/s]


In [28]:
#using one channel only for now

def get_diagnosis_counts(labels, diagnosis_set):
    diagnosis_counts = {}
    labels = list(labels)
    for diagnosis in diagnosis_set:
        diag_count = labels.count(diagnosis)
        diagnosis_counts[diagnosis] = diag_count
    return diagnosis_counts


#getting labels for patient set
detailed_health_state = []
for i in range(0, no_patients):
    detailed_health_state.append(allowed_patients.get_patients(i).get_diagnosis())

detailed_health_state = np.array(detailed_health_state)

#getting nan indices
nan_indices = []
for j in range(0, no_channels):
    signal_nan_indices = []
    for i, signal in enumerate(denoised_signals[:, j]):
        if np.isnan(signal).all():
            signal_nan_indices.append(False)
        else:
            signal_nan_indices.append(True)
    nan_indices.append(signal_nan_indices)

nan_indices = np.array(nan_indices)

diag_counts = get_diagnosis_counts(detailed_health_state[nan_indices[0]], allowed_patients.get_diagnosis_list())
print(diag_counts)

#these have very few appearances in data set so are removed
diagnoses_to_remove = ['Stable angina', 'Myocarditis', 'Valvular heart disease', 'Hypertrophy']

#finding indices with which to remove from the dataset
diagnosis_indices = []
health_state = []
for j in range(0, no_channels):
    filter_diagnosis = []
    for diagnosis in detailed_health_state[nan_indices[j]]:
        if diagnosis in diagnoses_to_remove:
            filter_diagnosis.append(False)
        else:
            filter_diagnosis.append(True)
    diagnosis_indices.append(filter_diagnosis)
    health_state.append(detailed_health_state[nan_indices[j]][diagnosis_indices[j]])

data = denoised_signals[:, 0][nan_indices[0]][diagnosis_indices[0]]
labels = health_state[0]

{'Myocardial infarction': 111, 'Healthy control': 44, 'Dysrhythmia': 9, 'Cardiomyopathy': 10, 'Hypertrophy': 7, 'Bundle branch block': 10, 'Valvular heart disease': 1, 'Stable angina': 1, 'Myocarditis': 3}


In [20]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import numpy as np

# Define a simple neural network as the embedding function
class EmbeddingNet(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(EmbeddingNet, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, output_dim)
        )

    def forward(self, x):
        return self.network(x)

# Define the Prototypical Network class
class PrototypicalNetwork(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(PrototypicalNetwork, self).__init__()
        self.encoder = EmbeddingNet(input_dim, hidden_dim, output_dim)

    def forward(self, x):
        return self.encoder(x)

    def compute_prototypes(self, support_data, support_labels, n_classes):
        prototypes = []
        for i in range(n_classes):
            class_samples = support_data[support_labels == i]
            class_prototype = class_samples.mean(dim=0)
            prototypes.append(class_prototype)
        return torch.stack(prototypes)

    def classify(self, prototypes, query_data):
        distances = torch.cdist(query_data, prototypes)
        return torch.argmin(distances, dim=1)

# Define a function to train the prototypical network
def train_prototypical_network(model, train_loader, optimizer, criterion, n_classes):
    model.train()
    for batch in train_loader:
        support_data, support_labels, query_data, query_labels = batch
        optimizer.zero_grad()

        # Compute embeddings for support and query data
        support_embeddings = model(support_data)
        query_embeddings = model(query_data)

        # Compute prototypes
        prototypes = model.compute_prototypes(support_embeddings, support_labels, n_classes)

        # Classify query samples
        predictions = model.classify(prototypes, query_embeddings)
        loss = criterion(predictions, query_labels)
        loss.backward()
        optimizer.step()

# Define a function to evaluate the prototypical network
def evaluate_prototypical_network(model, test_loader, n_classes):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in test_loader:
            support_data, support_labels, query_data, query_labels = batch

            # Compute embeddings for support and query data
            support_embeddings = model(support_data)
            query_embeddings = model(query_data)

            # Compute prototypes
            prototypes = model.compute_prototypes(support_embeddings, support_labels, n_classes)

            # Classify query samples
            predictions = model.classify(prototypes, query_embeddings)
            correct += (predictions == query_labels).sum().item()
            total += query_labels.size(0)

    accuracy = correct / total
    return accuracy

class PTBDataSet(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]


# Parameters
input_dim = 10
hidden_dim = 64
output_dim = 32
n_classes = 5
n_support = 5
n_query = 15
learning_rate = 0.001
n_epochs = 100


# Split data into support and query sets
support_data = torch.tensor(data[:n_support*n_classes], dtype=torch.float32)
support_labels = torch.tensor(labels[:n_support*n_classes], dtype=torch.long)
query_data = torch.tensor(data[n_support*n_classes:], dtype=torch.float32)
query_labels = torch.tensor(labels[n_support*n_classes:], dtype=torch.long)

# Create DataLoader
train_loader = DataLoader(PTBDataset(support_data, support_labels), batch_size=1, shuffle=True)
test_loader = DataLoader(PTBDataset(query_data, query_labels), batch_size=1, shuffle=False)

# Initialize model, optimizer, and loss function
model = PrototypicalNetwork(input_dim, hidden_dim, output_dim)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

# Train and evaluate the model
for epoch in range(n_epochs):
    train_prototypical_network(model, train_loader, optimizer, criterion, n_classes)
    accuracy = evaluate_prototypical_network(model, test_loader, n_classes)
    print(f'Epoch {epoch+1}/{n_epochs}, Accuracy: {accuracy*100:.2f}%')


ImportError: cannot import name 'TypeAlias' from 'typing_extensions' (C:\Users\court\Documents\anaconda\lib\site-packages\typing_extensions.py)

In [2]:
!pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.13.1-cp38-cp38-win_amd64.whl (1.9 kB)


ERROR: Could not find a version that satisfies the requirement tensorflow-intel==2.13.1; platform_system == "Windows" (from tensorflow) (from versions: 0.0.1, 2.10.0.dev20220728, 2.10.0rc0, 2.10.0rc1, 2.10.0rc2, 2.10.0rc3, 2.10.0, 2.10.1, 2.11.0rc0, 2.11.0rc1, 2.11.0rc2, 2.11.0, 2.11.1, 2.12.0rc0, 2.12.0rc1, 2.12.0, 2.12.1, 2.13.0rc0, 2.13.0rc1, 2.13.0rc2, 2.13.0)
ERROR: No matching distribution found for tensorflow-intel==2.13.1; platform_system == "Windows" (from tensorflow)


In [1]:
from jmlib.data import Data
from jmlib.data.loaders import PTBXL

from jmlib.data.processing.common import LambdaModule
from jmlib.data.writers.common import Writer
from jmlib.data.generators.fewshot import FewShotGenerator
from jmlib.data.splitters.common import ClassSplitter 

from typing import Unpack
from keras.layers import TimeDistributed, Lambda, Activation, Flatten, Dense
from keras.layers import BatchNormalization, MaxPooling1D, Conv1D
from keras.models import Sequential
from keras.optimizers.legacy import Adam
from keras.callbacks import ReduceLROnPlateau
from keras.losses import CategoricalCrossentropy

from jmlib.models.common import BaseModel, BaseModelParams
from jmlib.util.models import reduce_tensor, reshape_query, proto_dist
from jmlib.util.models import LinearFusion

from keras.metrics import AUC

import numpy as np
import matplotlib.pyplot as plt

ptbxl = Data(name="raw_PTBXL", verbose=True)
ptbxl.add(
    PTBXL(data_dir="/Users/jbthompson/Documents/final_folder/ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.3"),
    ClassSplitter({"train": 0.6, "val": 0.2, "test": 0.2}),
    FewShotGenerator(way=5, shot=5, query= 5, batch_size=100)
)
ptbxl.run()

KERNEL_TUPLE = tuple[tuple, tuple]
EPOCHS = 100

class PMCNN2022(BaseModel):
    """Zicong Li et al., 2022 - https://doi.org/10.1109/BHI56158.2022.9926948.

    Implementation by Joe McMahon. Adapted from from code provided by
    Zicong Li. Summary from paper: "a parallel multi-scale CNN (PM-CNN) based
    prototypical network for arrhythmia classification".
    This implementation supports categorical classification.
    Designed for use on the CPSC-2018 dataset.

    Parameters
    ----------
    lr : float, default=0.001
        Learning rate.
    depth : int, default=4
        Number of convolutional layers in the prototypical network.
    fd : int, default=512
        The shape of the 1D feature vector output of the fusion layer.
    kernels : tuple of tuples of ints, default=((3,3),(7,7))
        Convolutional kernel shapes.
    filters : int, default=64
        Number of filters in convolutional layers.
    **kwargs
        keyword arguments to pass to super class. See jmlib.models.BaseClass.
    """

    LR: float
    DEPTH: int
    FD: int
    KERNELS: KERNEL_TUPLE
    FILTERS: int

    def __init__(self,
                 lr: float = 0.001,
                 depth: int = 4,
                 fd: int = 512,
                 kernels: KERNEL_TUPLE = (3, 7),
                 filters: int = 64,
                 **kwargs: Unpack[BaseModelParams]):
        self.LR = lr
        self.DEPTH = depth
        self.FD = fd
        self.KERNELS = kernels
        self.FILTERS = filters
        super().__init__(**kwargs)

    def _layers(self, X):
        Xs, Xq = X
        shot = Xs.shape[-4]
        query = Xq.shape[-4]

        proto_model3 = TimeDistributed(
            self._proto_model(self.KERNELS[0]), name="Prototype_CNN_3"
        )
        proto_model7 = TimeDistributed(
            self._proto_model(self.KERNELS[1]), name="Prototype_CNN_7"
        )

        Xs3 = proto_model3(Xs)
        Xq3 = proto_model3(Xq)

        Xs7 = proto_model7(Xs)
        Xq7 = proto_model7(Xq)

        Xs = LinearFusion(shot, self.FD)(Xs7, Xs3)
        Xq = LinearFusion(query, self.FD)(Xq7, Xq3)

        Xs = Lambda(reduce_tensor, name="Reduce_Support")(Xs)
        Xq = Lambda(reshape_query, name="Reshape_Query")(Xq)

        X = Lambda(proto_dist, name="Prototype_Distance")([Xs, Xq])

        return X

    def _proto_model(self, k) -> Sequential:
        cnn = Sequential()
        for _ in range(self.DEPTH):  # type: ignore
            cnn.add(Conv1D(self.FILTERS, k, padding='same'))
            cnn.add(BatchNormalization())
            cnn.add(Activation('relu'))
            cnn.add(MaxPooling1D())

        cnn.add(Flatten())
        cnn.add(Dense(self.FD))
        return cnn

    @property
    def optimizer(self):
        """Adam Optimizer."""
        return Adam(learning_rate=self.LR)  # type: ignore

    @property
    def loss(self):
        """Categorical Crossentropy Loss."""
        return CategoricalCrossentropy()

    @property
    def callbacks(self):
        """Callbacks."""
        reduce_lr = ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.4,
            patience=2,
            min_lr=1e-8,  # type: ignore
            cooldown=2
        )
        return [reduce_lr]
    
mymodel = PMCNN2022(input_tensors=ptbxl.tensor)
mymodel.model.compile(optimizer=mymodel.optimizer, loss=mymodel.loss, metrics=['categorical_accuracy'])
mymodel.model.summary()
results = mymodel.model.fit(ptbxl.generators['train'], epochs=EPOCHS, validation_data=ptbxl.generators['val'], verbose=2)
test_results = mymodel.model.evaluate(ptbxl.generators['test'], verbose=2, return_dict=True)
print(test_results)


x_data = np.arange(1, EPOCHS + 1)
plt.figure()
plt.plot(x_data, results.history['categorical_accuracy'], label='Training Accuracy')
plt.plot(x_data, results.history['val_categorical_accuracy'],label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.savefig('accuracy')
plt.show()

plt.figure()
plt.plot(x_data, results.history['loss'], label='Training Loss')
plt.plot(x_data, results.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.savefig('loss')
plt.show()

from sklearn.metrics import f1_score, roc_auc_score
repeated_array = np.tile(ptbxl.generators['test']['data'][1], (100, 1))
true_labels_categorical = np.argmax(repeated_array, axis=1)

# Assuming `test_results` is a dictionary containing predictions and true labels
predictions = mymodel.model.predict(ptbxl.generators['test'])
true_labels = true_labels_categorical

# Assuming your predictions are probabilities and you want to convert them to class labels
predicted_labels = predictions.argmax(axis=1)

# Calculate F1 score
f1 = f1_score(true_labels, predicted_labels, average='weighted')

# Calculate AUC
auc = roc_auc_score(true_labels, predictions, multi_class='ovr')

print("Overall F1 Score:", f1)
print('Overall AUC Score:', auc)

ModuleNotFoundError: No module named 'jmlib'