In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

# a. Load and preprocess ECG dataset
data = pd.read_csv('/Users/akshay/Downloads/demo_dl/datasets1/ECGdataset(Ass4)/ecg_autoencoder_dataset.csv', header=None)

# Last column (140) is the class label, rest are features
X = data.iloc[:, :-1].values  # All columns except last
y = data.iloc[:, -1].values   # Last column is the label (0=anomaly, 1=normal)

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Separate normal data (Class = 1) for training the autoencoder
x_train = X_scaled[y == 1]  # Train only on normal heartbeats

# Test data (normal + anomalies)
x_test = X_scaled
y_test = y

print(f"Training samples (normal, class=1): {x_train.shape[0]}")
print(f"Test samples (total): {x_test.shape[0]}")
print(f"  - Normal (class=1): {np.sum(y_test == 1)}")
print(f"  - Anomaly (class=0): {np.sum(y_test == 0)}")
print(f"Feature dimensions: {x_train.shape[1]}")

# b. Build Autoencoder model
input_dim = x_train.shape[1]  # 140 features
inp = Input((input_dim,))
enc = Dense(64, activation='relu')(inp)
lat = Dense(32, activation='relu')(enc)
dec = Dense(64, activation='relu')(lat)
out = Dense(input_dim, activation='linear')(dec)
model = Model(inp, out)

# c. Compile & Train
model.compile(optimizer='adam', loss='mse', metrics=['mae'])
history = model.fit(x_train, x_train, epochs=10, batch_size=32, validation_split=0.2, verbose=1)

# d. Reconstruction on test data
reconstructions = model.predict(x_test)

# e. Calculate reconstruction error
mse = np.mean(np.power(x_test - reconstructions, 2), axis=1)

# f. Set threshold for anomaly detection (using 95th percentile of training errors)
train_reconstructions = model.predict(x_train)
train_mse = np.mean(np.power(x_train - train_reconstructions, 2), axis=1)
threshold = np.percentile(train_mse, 95)
print(f"\nThreshold (95th percentile of training errors): {threshold:.6f}")

# g. Predict anomalies (mse > threshold means anomaly, so predict 0)
y_pred = (mse > threshold).astype(int)  # 1 if mse > threshold (anomaly)
y_pred = 1 - y_pred  # Flip: 0=anomaly, 1=normal to match original labels

# h. Evaluation
print("\nConfusion Matrix:")
cm = confusion_matrix(y_test, y_pred)
print(cm)
print("\nConfusion Matrix explanation:")
print(f"True Anomalies correctly identified: {cm[0,0]}")
print(f"True Anomalies incorrectly labeled as Normal: {cm[0,1]}")
print(f"True Normals incorrectly labeled as Anomaly: {cm[1,0]}")
print(f"True Normals correctly identified: {cm[1,1]}")

print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Anomaly (0)', 'Normal (1)']))

print(f"\nAccuracy: {accuracy_score(y_test, y_pred):.4f}")

# Additional metrics
from sklearn.metrics import precision_score, recall_score, f1_score
print(f"Precision (for anomaly detection): {precision_score(y_test, y_pred, pos_label=0):.4f}")
print(f"Recall (for anomaly detection): {recall_score(y_test, y_pred, pos_label=0):.4f}")
print(f"F1-Score (for anomaly detection): {f1_score(y_test, y_pred, pos_label=0):.4f}")

Training samples (normal, class=1): 2919
Test samples (total): 4998
  - Normal (class=1): 2919
  - Anomaly (class=0): 2079
Feature dimensions: 140
Epoch 1/10
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.5430 - mae: 0.5227 - val_loss: 0.2653 - val_mae: 0.3550
Epoch 2/10
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 947us/step - loss: 0.1981 - mae: 0.3061 - val_loss: 0.1672 - val_mae: 0.2757
Epoch 3/10
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.1397 - mae: 0.2550 - val_loss: 0.1299 - val_mae: 0.2390
Epoch 4/10
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 952us/step - loss: 0.1090 - mae: 0.2241 - val_loss: 0.1093 - val_mae: 0.2175
Epoch 5/10
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 905us/step - loss: 0.0940 - mae: 0.2073 - val_loss: 0.0996 - val_mae: 0.2068
Epoch 6/10
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 880us/step - loss: 0.0