<a href="https://colab.research.google.com/github/sreesriavvaru123/MI/blob/main/MI_detection_v1_0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Google Colab-compatible notebook for MI detection using PTB-XL with 12-leads and 10-fold CV



In [None]:
pip install wfdb

Collecting wfdb
  Downloading wfdb-4.3.0-py3-none-any.whl.metadata (3.8 kB)
Collecting pandas>=2.2.3 (from wfdb)
  Downloading pandas-2.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (91 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.2/91.2 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
Downloading wfdb-4.3.0-py3-none-any.whl (163 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m163.8/163.8 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pandas-2.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.4/12.4 MB[0m [31m114.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pandas, wfdb
  Attempting uninstall: pandas
    Found existing installation: pandas 2.2.2
    Uninstalling pandas-2.2.2:
      Successfully uninstalled pandas-2.2.2
[31mERROR: pip's dependency resolver does not currently take into accoun

## STEP 1: Install necessary packages



In [None]:
!pip install wfdb scikit-learn matplotlib tensorflow --quiet


## STEP 2: Import required libraries


In [None]:
import os
import numpy as np
import pandas as pd
import wfdb
import ast
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, GlobalAveragePooling1D, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam

## STEP 3: Load metadata and diagnostic class mapping

In [None]:
def load_ptbxl_metadata(path):
    data = pd.read_csv(os.path.join(path, 'ptbxl_database.csv'))
    data.scp_codes = data.scp_codes.apply(lambda x: ast.literal_eval(x))
    return data

# Load SCP statements
def load_diagnostic_class_mapping(path):
    return pd.read_csv(os.path.join(path, 'scp_statements.csv'), index_col=0)

## STEP 4: Apply MI vs Non-MI filter

In [None]:
def apply_diagnostic_filter(data, class_map):
    data['diagnostic_class'] = data.scp_codes.apply(
        lambda x: list(set([class_map.loc[k].diagnostic_class for k in x if k in class_map.index])))
    data['is_MI'] = data.diagnostic_class.apply(lambda x: 1 if 'MI' in x else 0)
    return data


## STEP 5: Load ECG signals

In [None]:
def load_raw_data(df, sampling_rate, path):
    data = []
    for i in range(len(df)):
        record_name = df.iloc[i].filename_lr if sampling_rate == 100 else df.iloc[i].filename_hr
        record_path = os.path.join(path, record_name)
        signal, _ = wfdb.rdsamp(record_path)
        data.append(signal)
    return np.array(data)

## STEP 6: Define ResNet-like model

In [None]:
def build_model(input_shape):
    model = Sequential()
    model.add(Conv1D(64, kernel_size=7, activation='relu', padding='same', input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(128, kernel_size=5, activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(256, kernel_size=3, activation='relu', padding='same'))
    model.add(GlobalAveragePooling1D())
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(2, activation='softmax'))
    return model

## STEP 7: Main pipeline

In [None]:
from google.colab import files
files.upload()  # Upload kaggle.json

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"avvarusrinivasulu","key":"fe47c5051ef67f1130ff6e2e1fa439be"}'}

In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Install kaggle CLI if not already
!pip install -q kaggle

# Download the PTB-XL dataset
!kaggle datasets download -d khyeh0719/ptb-xl-dataset

# Unzip the dataset
!unzip ptb-xl-dataset.zip -d ptb-xl


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: ptb-xl/ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.1/records500/19000/19338_hr.hea  
  inflating: ptb-xl/ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.1/records500/19000/19339_hr.dat  
  inflating: ptb-xl/ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.1/records500/19000/19339_hr.hea  
  inflating: ptb-xl/ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.1/records500/19000/19340_hr.dat  
  inflating: ptb-xl/ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.1/records500/19000/19340_hr.hea  
  inflating: ptb-xl/ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.1/records500/19000/19341_hr.dat  
  inflating: ptb-xl/ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.1/records500/19000/19341_hr.hea  
  inflating: ptb-xl/ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.1/records

In [None]:
ptbxl_path = '/content/ptb-xl/ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.1'


# Prepare dataset
df = load_ptbxl_metadata(ptbxl_path)
scp_map = load_diagnostic_class_mapping(ptbxl_path)
scp_map = scp_map[scp_map.diagnostic_class.notnull()]
df['diagnostic_superclass'] = df.scp_codes.apply(lambda x: list(set([scp_map.loc[k].diagnostic_class for k in x if k in scp_map.index])))
df['MI'] = df.diagnostic_superclass.apply(lambda x: 1 if 'MI' in x else 0)

# Filter for 12-lead 10-sec ECGs
df_filtered = df[df['MI'].isin([0, 1])]


# Load ECG data (100Hz, 12 leads, 10 seconds)
X = load_raw_data(df_filtered, sampling_rate=100, path=ptbxl_path)
y = df_filtered['MI'].values

# Normalize
X = (X - np.mean(X)) / np.std(X)

# Convert labels to categorical
y_cat = to_categorical(y)

# Split data (60% train, 20% val, 20% test)
X_train, X_temp, y_train, y_temp = train_test_split(X, y_cat, test_size=0.4, random_state=42, stratify=y_cat)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp)

## Model setup and Training

In [None]:
# Model setup
model = build_model(X_train.shape[1:])
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# Callbacks
checkpoint = ModelCheckpoint('best_model.h5', monitor='val_accuracy', save_best_only=True, mode='max')
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train model
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=100, batch_size=32, callbacks=[checkpoint, early_stopping])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 273ms/step - accuracy: 0.7816 - loss: 0.4870



[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m127s[0m 300ms/step - accuracy: 0.7817 - loss: 0.4869 - val_accuracy: 0.8463 - val_loss: 0.3613
Epoch 2/100
[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 289ms/step - accuracy: 0.8368 - loss: 0.3803



[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m147s[0m 314ms/step - accuracy: 0.8368 - loss: 0.3803 - val_accuracy: 0.8580 - val_loss: 0.3252
Epoch 3/100
[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 297ms/step - accuracy: 0.8583 - loss: 0.3400 - val_accuracy: 0.8525 - val_loss: 0.3557
Epoch 4/100
[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 270ms/step - accuracy: 0.8688 - loss: 0.3141



[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 295ms/step - accuracy: 0.8688 - loss: 0.3141 - val_accuracy: 0.8727 - val_loss: 0.3055
Epoch 5/100
[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 285ms/step - accuracy: 0.8816 - loss: 0.2904



[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m148s[0m 311ms/step - accuracy: 0.8816 - loss: 0.2904 - val_accuracy: 0.8734 - val_loss: 0.2937
Epoch 6/100
[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 295ms/step - accuracy: 0.8819 - loss: 0.2819



[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 320ms/step - accuracy: 0.8819 - loss: 0.2819 - val_accuracy: 0.8750 - val_loss: 0.2959
Epoch 7/100
[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 292ms/step - accuracy: 0.8884 - loss: 0.2716



[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 342ms/step - accuracy: 0.8884 - loss: 0.2716 - val_accuracy: 0.8773 - val_loss: 0.2979
Epoch 8/100
[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m129s[0m 315ms/step - accuracy: 0.8826 - loss: 0.2778 - val_accuracy: 0.8713 - val_loss: 0.2898
Epoch 9/100
[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 291ms/step - accuracy: 0.8934 - loss: 0.2573



[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m152s[0m 341ms/step - accuracy: 0.8934 - loss: 0.2573 - val_accuracy: 0.8862 - val_loss: 0.2775
Epoch 10/100
[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m128s[0m 313ms/step - accuracy: 0.8933 - loss: 0.2546 - val_accuracy: 0.8736 - val_loss: 0.2965
Epoch 11/100
[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 319ms/step - accuracy: 0.8956 - loss: 0.2533 - val_accuracy: 0.8834 - val_loss: 0.2869
Epoch 12/100
[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m128s[0m 311ms/step - accuracy: 0.8909 - loss: 0.2487 - val_accuracy: 0.8741 - val_loss: 0.3073
Epoch 13/100
[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 312ms/step - accuracy: 0.9023 - loss: 0.2331 - val_accuracy: 0.8857 - val_loss: 0.2780
Epoch 14/100
[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 310ms/step - accuracy: 0.9005 - loss: 0.2277 - val_accuracy: 0.8860 - val_loss: 0.2641
Epoch



[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 309ms/step - accuracy: 0.9101 - loss: 0.2139 - val_accuracy: 0.8885 - val_loss: 0.2772
Epoch 21/100
[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 305ms/step - accuracy: 0.9158 - loss: 0.1931 - val_accuracy: 0.8848 - val_loss: 0.3046
Epoch 22/100
[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 307ms/step - accuracy: 0.9194 - loss: 0.1877 - val_accuracy: 0.8837 - val_loss: 0.2997
Epoch 23/100
[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 307ms/step - accuracy: 0.9226 - loss: 0.1831 - val_accuracy: 0.8750 - val_loss: 0.2988
Epoch 24/100
[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m126s[0m 308ms/step - accuracy: 0.9188 - loss: 0.1848 - val_accuracy: 0.8711 - val_loss: 0.3797


## Testing the Model

In [None]:
# Evaluate on test set
y_pred = model.predict(X_test)
y_pred_labels = np.argmax(y_pred, axis=1)
y_true_labels = np.argmax(y_test, axis=1)

# Confusion Matrix and Classification Report
print("Classification Report:")
print(classification_report(y_true_labels, y_pred_labels))

print("Confusion Matrix:")
print(confusion_matrix(y_true_labels, y_pred_labels))

# Save final model
model.save("mi_detection_resnet_model.h5")

[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 139ms/step




Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.96      0.93      3271
           1       0.84      0.70      0.76      1097

    accuracy                           0.89      4368
   macro avg       0.87      0.83      0.85      4368
weighted avg       0.89      0.89      0.89      4368

Confusion Matrix:
[[3124  147]
 [ 332  765]]


## STEP 8: 5-Fold Cross-Validation

In [None]:
# skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
# fold = 1
# histories, reports = [], []

# for train_index, test_index in skf.split(X, y):
#     print(f"\n--- Fold {fold} ---")
#     X_train, X_test = X[train_index], X[test_index]
#     y_train, y_test = y[train_index], y[test_index]

#     model = build_resnet_model(X.shape[1:])
#     model.compile(optimizer=Adam(1e-3), loss='binary_crossentropy', metrics=['accuracy'])
#     early_stop = EarlyStopping(patience=5, restore_best_weights=True)

#     history = model.fit(X_train, y_train, epochs=30, batch_size=32,
#                         validation_split=0.2, callbacks=[early_stop], verbose=0)
#     histories.append(history)

#     y_pred = (model.predict(X_test) > 0.5).astype(int)
#     print(classification_report(y_test, y_pred))
#     cm = confusion_matrix(y_test, y_pred)
#     ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Non-MI', 'MI']).plot()
#     plt.title(f"Fold {fold} Confusion Matrix")
#     plt.show()
#     reports.append(classification_report(y_test, y_pred, output_dict=True))

#     # Save model from one fold for future testing
#     if fold == 1:
#         model.save('/content/mi_detector_resnet_fold1.h5')

#     fold += 1