In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [104]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from imblearn.over_sampling import SMOTE
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Dropout, BatchNormalization,GlobalAveragePooling1D
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import classification_report
from tensorflow.keras.optimizers import Adam



In [62]:
# Load the dataset
dataset_path = '/content/drive/MyDrive/MIT-BIH Supraventricular Arrhythmia Database.csv'
df = pd.read_csv(dataset_path)

In [63]:
# Drop the 'record' column
df = df.drop(columns=['record'])

In [64]:
# Encode the class labels
label_encoder = LabelEncoder()
df['type'] = label_encoder.fit_transform(df['type'])

In [65]:
# Create a mapping of labels to encoded values
label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))

print("Class Label to Digit Mapping:")
for class_label, encoded_value in label_mapping.items():
    print(f"{class_label}: {encoded_value}")

Class Label to Digit Mapping:
F: 0
N: 1
Q: 2
SVEB: 3
VEB: 4


In [66]:
df.shape

(184428, 33)

In [67]:
df.head(184428)

Unnamed: 0,type,0_pre-RR,0_post-RR,0_pPeak,0_tPeak,0_rPeak,0_sPeak,0_qPeak,0_qrs_interval,0_pq_interval,...,1_qPeak,1_qrs_interval,1_pq_interval,1_qt_interval,1_st_interval,1_qrs_morph0,1_qrs_morph1,1_qrs_morph2,1_qrs_morph3,1_qrs_morph4
0,1,168.0,167.0,-0.000659,-0.076691,1.079977,-0.958842,-0.009849,10,1,...,-0.072295,7,3,14,4,-0.072295,-0.055469,0.003311,0.077157,0.060548
1,1,167.0,169.0,-0.007237,-0.080968,1.117509,-0.993945,-0.037873,14,6,...,-0.011806,1,2,6,3,-0.011806,-0.011806,-0.011806,-0.011806,-0.011806
2,1,169.0,170.0,-0.015322,-0.065713,1.091163,-0.984797,-0.047307,15,5,...,-0.009710,0,2,4,2,-0.009710,-0.009710,-0.009710,-0.009710,-0.009710
3,1,170.0,166.0,-0.011573,-0.067132,1.075468,-0.912288,-0.018548,11,2,...,-0.060843,9,3,16,4,-0.060843,-0.055684,0.011955,0.039944,0.066575
4,1,166.0,169.0,-0.004269,-0.081436,1.163276,-0.991680,-0.013624,10,2,...,-0.029659,2,3,8,3,-0.029659,-0.029659,-0.029659,-0.005076,-0.005076
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
184423,1,102.0,96.0,0.011782,-0.702667,2.537575,-1.717832,-0.019031,15,2,...,-0.366454,2,6,12,4,-0.366454,-0.366454,-0.366454,-0.489696,-0.489696
184424,1,96.0,27.0,0.151691,-1.074162,2.549375,-1.553742,0.028627,14,2,...,-0.489704,2,6,12,4,-0.489704,-0.489704,-0.489704,-0.710598,-0.710598
184425,1,79.0,96.0,0.037697,0.402691,2.431716,-1.536314,0.032153,14,2,...,-0.225756,2,1,6,3,-0.225756,-0.225756,-0.225756,-0.505484,-0.505484
184426,1,96.0,59.0,-0.021873,-0.301590,2.621748,-1.815166,-0.025689,17,1,...,-0.367450,3,4,11,4,-0.367450,-0.367450,-0.326364,-0.326364,-0.423273


In [68]:
# Normalize the input features
scaler = StandardScaler()
X = df.drop(columns=['type']).values
X_scaled = scaler.fit_transform(X)


In [69]:
# Extract the target variable
y = df['type'].values

In [84]:

# Encode the target variable if it's categorical
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

In [80]:
# Get the maximum number of samples in any class
class_distribution = df['type'].value_counts()
max_samples_per_class = class_distribution.max()


In [85]:
# Adjust SMOTE configuration to match the maximum samples per class
smote = SMOTE(sampling_strategy={0: max_samples_per_class, 1: max_samples_per_class, 2: max_samples_per_class, 3: max_samples_per_class, 4: max_samples_per_class}, random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y_encoded)

In [86]:
# Check the number of samples per class after SMOTE
resampled_class_counts = pd.Series(y_resampled).value_counts()
print("Number of samples per class after SMOTE:")
print(resampled_class_counts)

Number of samples per class after SMOTE:
1    162195
3    162195
4    162195
0    162195
2    162195
Name: count, dtype: int64


In [96]:
# Combine resampled data into a DataFrame for easy handling
resampled_df = pd.DataFrame(X_resampled)
resampled_df['type'] = y_resampled

# Take exactly 20,000 samples per class
desired_samples_per_class = 20000
balanced_df = resampled_df.groupby('type').apply(lambda x: x.sample(n=desired_samples_per_class, random_state=42)).reset_index(drop=True)

In [97]:
# Separate features and target
X_balanced = balanced_df.drop(columns=['type']).values
y_balanced = balanced_df['type'].values

In [98]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_balanced, y_balanced, test_size=0.3, random_state=42)


In [99]:
# Reshape the input data to fit the CNN model
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

In [100]:
# Convert the labels to categorical (one-hot encoding)
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)

In [101]:
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', padding='same', input_shape=(X_train.shape[1], 1)))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=2))

model.add(Conv1D(filters=128, kernel_size=3, activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=2))

model.add(Conv1D(filters=256, kernel_size=3, activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=2))

model.add(Conv1D(filters=512, kernel_size=3, activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(GlobalAveragePooling1D())

model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(label_encoder.classes_), activation='softmax'))

In [105]:
# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [106]:
# Train the model
history = model.fit(X_train, y_train_categorical, validation_data=(X_test, y_test_categorical), epochs=25, batch_size=32)


Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [108]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test_categorical)
print(f'Test Accuracy: {test_accuracy:.4f}')

# Evaluate the model on the training set
train_loss, train_accuracy = model.evaluate(X_train, y_train_categorical)
print(f'Train Accuracy: {train_accuracy:.4f}')

Test Accuracy: 0.9791
Train Accuracy: 0.9916


In [110]:
# Generate predictions
y_pred = model.predict(X_test)
y_pred_classes = y_pred.argmax(axis=1)

# Decode the true and predicted labels
y_test_classes = y_test_categorical.argmax(axis=1)

# Convert label_encoder.classes_ to a list of strings
class_names = list(map(str, label_encoder.classes_))

report = classification_report(y_test_classes, y_pred_classes, target_names=class_names)
print("Classification Report:")
print(report)

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      5978
           1       0.97      0.96      0.97      6060
           2       1.00      1.00      1.00      5994
           3       0.96      0.95      0.95      5982
           4       0.97      0.98      0.98      5986

    accuracy                           0.98     30000
   macro avg       0.98      0.98      0.98     30000
weighted avg       0.98      0.98      0.98     30000



In [112]:
# Save the model
model.save('/content/drive/MyDrive/ecg_cnn_model.h5')