In [None]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report, confusion_matrix

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [None]:
import kagglehub
path = kagglehub.dataset_download("shayanfazeli/heartbeat")
print("Path to dataset files:", path)

Using Colab cache for faster access to the 'heartbeat' dataset.
Path to dataset files: /kaggle/input/heartbeat


In [None]:
class ECGDataset(Dataset):
    def __init__(self, csv_path, mean=None, std=None, fit_stats=False):
        df = pd.read_csv(csv_path, header=None)
        data = df.values
        X = data[:, :-1].astype(np.float32)   # 187 features
        y = data[:, -1].astype(np.int64)      # labels 0..4

        if fit_stats:
            # compute normalization stats from training data only
            self.mean = X.mean(axis=0, keepdims=True)
            self.std = X.std(axis=0, keepdims=True) + 1e-8
        else:
            self.mean = mean
            self.std = std

        X = (X - self.mean) / self.std

        self.X = torch.from_numpy(X)
        self.y = torch.from_numpy(y)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]


In [None]:
train_csv = os.path.join(path, "mitbih_train.csv")
test_csv  = os.path.join(path, "mitbih_test.csv")

tmp_train = pd.read_csv(train_csv, header=None).values
train_mean = tmp_train[:, :-1].astype(np.float32).mean(axis=0, keepdims=True)
train_std = tmp_train[:, :-1].astype(np.float32).std(axis=0, keepdims=True) + 1e-8

train_dataset = ECGDataset(train_csv, mean=train_mean, std=train_std, fit_stats=False)
test_dataset  = ECGDataset(test_csv,  mean=train_mean, std=train_std, fit_stats=False)

batch_size = 256

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=False)
test_loader  = DataLoader(test_dataset,  batch_size=batch_size, shuffle=False, drop_last=False)

num_features = train_dataset.X.shape[1]   # should be 187
num_classes = len(torch.unique(train_dataset.y))  # should be 5


In [None]:
#Addressing class imbalance using weights. i.e assigning highers weights to minority classes, and vice versa
labels_np = train_dataset.y.numpy()
class_counts = np.bincount(labels_np)
class_weights = 1.0 / (class_counts + 1e-8)
class_weights = class_weights * (len(class_counts) / class_weights.sum())  # normalize a bit
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float32).to(device)
print("Class counts:", class_counts)
print("Class weights:", class_weights)


Class counts: [72471  2223  5788   641  6431]
Class weights: [0.02933416 0.95630948 0.36729025 3.31649917 0.33056694]


In [None]:
#FNN Model
class ECGFNN(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(ECGFNN, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.2),

            nn.Linear(64, num_classes)
        )

    def forward(self, x):
        return self.net(x)

model = ECGFNN(num_features, num_classes).to(device)
criterion = nn.CrossEntropyLoss(weight=class_weights_tensor)  # you can remove "weight=..." at first
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)


In [None]:
# ================== Training & Evaluation Loops ==================
def train_one_epoch(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for X_batch, y_batch in loader:
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)

        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * X_batch.size(0)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == y_batch).sum().item()
        total += y_batch.size(0)

    avg_loss = running_loss / total
    acc = correct / total
    return avg_loss, acc

@torch.no_grad()
def evaluate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    all_labels = []
    all_preds = []

    for X_batch, y_batch in loader:
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)

        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)

        running_loss += loss.item() * X_batch.size(0)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == y_batch).sum().item()
        total += y_batch.size(0)

        all_labels.append(y_batch.cpu().numpy())
        all_preds.append(predicted.cpu().numpy())

    avg_loss = running_loss / total
    acc = correct / total
    all_labels = np.concatenate(all_labels)
    all_preds = np.concatenate(all_preds)
    return avg_loss, acc, all_labels, all_preds


In [None]:
num_epochs = 20

for epoch in range(1, num_epochs + 1):
    # ----- FNN -----
    train_loss_fnn, train_acc_fnn = train_one_epoch(
        model, train_loader, criterion, optimizer, device
    )
    val_loss_fnn, val_acc_fnn, _, _ = evaluate(
        model, test_loader, criterion, device
    )

    print(f"Epoch {epoch:02d}")
    print(f"  [FNN] Train Loss: {train_loss_fnn:.4f}, Train Acc: {train_acc_fnn:.4f} | "
          f"Test Loss: {val_loss_fnn:.4f}, Test Acc: {val_acc_fnn:.4f}")

Epoch 01
  [FNN] Train Loss: 0.7134, Train Acc: 0.6276 | Test Loss: 0.5716, Test Acc: 0.8155
Epoch 02
  [FNN] Train Loss: 0.4893, Train Acc: 0.7776 | Test Loss: 0.5020, Test Acc: 0.8480
Epoch 03
  [FNN] Train Loss: 0.4270, Train Acc: 0.8072 | Test Loss: 0.3411, Test Acc: 0.8953
Epoch 04
  [FNN] Train Loss: 0.3956, Train Acc: 0.8324 | Test Loss: 0.3377, Test Acc: 0.8973
Epoch 05
  [FNN] Train Loss: 0.3702, Train Acc: 0.8372 | Test Loss: 0.4859, Test Acc: 0.8325
Epoch 06
  [FNN] Train Loss: 0.3416, Train Acc: 0.8458 | Test Loss: 0.3239, Test Acc: 0.9000
Epoch 07
  [FNN] Train Loss: 0.3294, Train Acc: 0.8538 | Test Loss: 0.3739, Test Acc: 0.8716
Epoch 08
  [FNN] Train Loss: 0.3204, Train Acc: 0.8602 | Test Loss: 0.3393, Test Acc: 0.8864
Epoch 09
  [FNN] Train Loss: 0.3008, Train Acc: 0.8639 | Test Loss: 0.3732, Test Acc: 0.8791
Epoch 10
  [FNN] Train Loss: 0.3027, Train Acc: 0.8657 | Test Loss: 0.3427, Test Acc: 0.8965
Epoch 11
  [FNN] Train Loss: 0.2908, Train Acc: 0.8713 | Test Loss: 0.

In [None]:
#CNN Model
class ECGCNN1D(nn.Module):
    def __init__(self, num_classes, input_length=187):
        super(ECGCNN1D, self).__init__()

        self.features = nn.Sequential(
            nn.Conv1d(1, 32, kernel_size=5, padding=2),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2),   # 187 -> 93

            nn.Conv1d(32, 64, kernel_size=5, padding=2),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2),   # 93 -> 46

            nn.Conv1d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm1d(128),
            nn.ReLU(),                     # length = 46
        )

        # ðŸ”¥ MPS-safe global pooling
        self.global_pool = nn.AdaptiveAvgPool1d(23)    # always works (46 â†’ 1)

        # 128 channels * 1 time step = 128 features
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(2944, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        x = x.unsqueeze(1)  # (batch, 187) â†’ (batch, 1, 187)
        x = self.features(x)
        x = self.global_pool(x)  # (batch, 128, 1)
        return self.classifier(x)  # (batch, num_classes)

model2 = ECGCNN1D(num_classes=num_classes).to(device)

criterion2 = nn.CrossEntropyLoss(weight=class_weights_tensor)

optimizer2 = torch.optim.Adam(model2.parameters(), lr=1e-3)

In [None]:
num_epochs = 20

for epoch in range(1, num_epochs + 1):
    # ----- CNN -----
    train_loss_cnn, train_acc_cnn = train_one_epoch(
        model2, train_loader, criterion2, optimizer2, device
    )
    val_loss_cnn, val_acc_cnn, _, _ = evaluate(
        model2, test_loader, criterion2, device
    )

    print(f"Epoch {epoch:02d}")
    print(f"  [CNN] Train Loss: {train_loss_cnn:.4f}, Train Acc: {train_acc_cnn:.4f} | "
          f"Test Loss: {val_loss_cnn:.4f}, Test Acc: {val_acc_cnn:.4f}")

Epoch 01
  [CNN] Train Loss: 0.4984, Train Acc: 0.7824 | Test Loss: 0.5525, Test Acc: 0.8270
Epoch 02
  [CNN] Train Loss: 0.3202, Train Acc: 0.8619 | Test Loss: 0.2211, Test Acc: 0.9374
Epoch 03
  [CNN] Train Loss: 0.2804, Train Acc: 0.8832 | Test Loss: 0.2457, Test Acc: 0.9216
Epoch 04
  [CNN] Train Loss: 0.2315, Train Acc: 0.8994 | Test Loss: 0.2820, Test Acc: 0.9051
Epoch 05
  [CNN] Train Loss: 0.2061, Train Acc: 0.9102 | Test Loss: 0.1375, Test Acc: 0.9587
Epoch 06
  [CNN] Train Loss: 0.1966, Train Acc: 0.9165 | Test Loss: 0.2215, Test Acc: 0.9240
Epoch 07
  [CNN] Train Loss: 0.1774, Train Acc: 0.9194 | Test Loss: 0.1921, Test Acc: 0.9356
Epoch 08
  [CNN] Train Loss: 0.1709, Train Acc: 0.9256 | Test Loss: 0.1527, Test Acc: 0.9530
Epoch 09
  [CNN] Train Loss: 0.1478, Train Acc: 0.9298 | Test Loss: 0.1480, Test Acc: 0.9514
Epoch 10
  [CNN] Train Loss: 0.1368, Train Acc: 0.9347 | Test Loss: 0.1776, Test Acc: 0.9437
Epoch 11
  [CNN] Train Loss: 0.1290, Train Acc: 0.9378 | Test Loss: 0.

In [None]:
# ================== Final Evaluation: FNN ==================
fnn_test_loss, fnn_test_acc, fnn_y_true, fnn_y_pred = evaluate(
    model, test_loader, criterion, device
)

print("\n===== FNN Results =====")
print("Final Test Loss (FNN):", fnn_test_loss)
print("Final Test Accuracy (FNN):", fnn_test_acc)

print("\n[FNN] Classification Report:")
print(classification_report(fnn_y_true, fnn_y_pred, digits=4))

print("[FNN] Confusion Matrix:")
print(confusion_matrix(fnn_y_true, fnn_y_pred))





===== FNN Results =====
Final Test Loss (FNN): 0.2882586213830357
Final Test Accuracy (FNN): 0.9005116024118399

[FNN] Classification Report:
              precision    recall  f1-score   support

           0     0.9935    0.8922    0.9401     18118
           1     0.2729    0.8453    0.4126       556
           2     0.8220    0.9344    0.8746      1448
           3     0.2668    0.9074    0.4123       162
           4     0.9272    0.9826    0.9541      1608

    accuracy                         0.9005     21892
   macro avg     0.6565    0.9124    0.7188     21892
weighted avg     0.9536    0.9005    0.9195     21892

[FNN] Confusion Matrix:
[[16164  1225   262   352   115]
 [   65   470    15     2     4]
 [   29    13  1353    48     5]
 [    5     3     7   147     0]
 [    6    11     9     2  1580]]


In [None]:
# ================== Final Evaluation: CNN ==================
cnn_test_loss, cnn_test_acc, cnn_y_true, cnn_y_pred = evaluate(
    model2, test_loader, criterion2, device
)

print("\n===== CNN Results =====")
print("Final Test Loss (CNN):", cnn_test_loss)
print("Final Test Accuracy (CNN):", cnn_test_acc)

print("\n[CNN] Classification Report:")
print(classification_report(cnn_y_true, cnn_y_pred, digits=4))

print("[CNN] Confusion Matrix:")
print(confusion_matrix(cnn_y_true, cnn_y_pred))


===== CNN Results =====
Final Test Loss (CNN): 0.11860908842214213
Final Test Accuracy (CNN): 0.9624977160606615

[CNN] Classification Report:
              precision    recall  f1-score   support

           0     0.9936    0.9662    0.9797     18118
           1     0.6789    0.8291    0.7466       556
           2     0.9102    0.9378    0.9238      1448
           3     0.3202    0.9506    0.4790       162
           4     0.9815    0.9900    0.9858      1608

    accuracy                         0.9625     21892
   macro avg     0.7769    0.9348    0.8230     21892
weighted avg     0.9743    0.9625    0.9669     21892

[CNN] Confusion Matrix:
[[17506   210   116   262    24]
 [   78   461    12     4     1]
 [   18     7  1358    60     5]
 [    5     1     2   154     0]
 [   11     0     4     1  1592]]


In [None]:
# ===== Softer class weights for LSTM only =====
labels_np = train_dataset.y.numpy()
class_counts = np.bincount(labels_np)

# Inverse frequency
inv_freq = 1.0 / (class_counts + 1e-8)

#  Soften it: sqrt instead of raw inverse
class_weights_lstm = np.sqrt(inv_freq)

# Normalize a bit so magnitudes are reasonable (optional)
class_weights_lstm = class_weights_lstm * (len(class_counts) / class_weights_lstm.sum())

class_weights_lstm_tensor = torch.tensor(
    class_weights_lstm, dtype=torch.float32
).to(device)

print("Class counts:", class_counts)
print("LSTM class weights:", class_weights_lstm)

Class counts: [72471  2223  5788   641  6431]
LSTM class weights: [0.20628726 1.17783593 0.72994522 2.19343877 0.69249282]


In [None]:
class ECGLSTM(nn.Module):
    def __init__(
        self,
        num_classes,
        input_size=1,       # 1 feature per timestep (ECG amplitude)
        hidden_size=64,
        num_layers=2,       # DEEPER: 2-layer LSTM
        bidirectional=True,
        dropout=0.3         # a bit more dropout inside LSTM
    ):
        super(ECGLSTM, self).__init__()

        self.bidirectional = bidirectional

        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,          # input: (batch, seq_len, features)
            bidirectional=bidirectional,
            dropout=dropout if num_layers > 1 else 0.0,
        )

        dir_factor = 2 if bidirectional else 1

        # We will use BOTH mean-pool and max-pool over time and concat them:
        # out: (batch, seq_len, hidden * dir_factor)
        # mean + max â†’ 2 * hidden * dir_factor
        fc_in = hidden_size * dir_factor * 2

        self.classifier = nn.Sequential(
            nn.Linear(fc_in, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, num_classes),
        )

    def forward(self, x):
        # x: (batch, 187) â†’ treat as sequence of length 187, 1 feature per step
        x = x.unsqueeze(-1)                # (batch, 187, 1)

        out, _ = self.lstm(x)              # (batch, seq_len=187, hidden*dirs)

        # Temporal mean pooling
        mean_pooled = out.mean(dim=1)      # (batch, hidden*dirs)

        # Temporal max pooling
        max_pooled, _ = out.max(dim=1)     # (batch, hidden*dirs)

        # Concatenate mean + max
        features = torch.cat([mean_pooled, max_pooled], dim=1)  # (batch, 2*hidden*dirs)

        logits = self.classifier(features) # (batch, num_classes)
        return logits

num_classes = len(torch.unique(train_dataset.y))

model3 = ECGLSTM(
    num_classes=num_classes,
    hidden_size=64,
    num_layers=2,
    bidirectional=True,
    dropout=0.3
).to(device)

criterion3 = nn.CrossEntropyLoss(weight=class_weights_lstm_tensor)
optimizer3 = torch.optim.Adam(model.parameters(), lr=1e-3)

In [None]:
num_epochs = 20

for epoch in range(1, num_epochs + 1):
    train_loss, train_acc = train_one_epoch(
        model3, train_loader, criterion3, optimizer3, device
    )
    val_loss, val_acc, _, _ = evaluate(
        model3, test_loader, criterion3, device
    )

    print(f"[LSTM] Epoch {epoch:02d} | "
          f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | "
          f"Test Loss: {val_loss:.4f}, Test Acc: {val_acc:.4f}")

[LSTM] Epoch 01 | Train Loss: 0.9415, Train Acc: 0.8488 | Test Loss: 0.3818, Test Acc: 0.9057
[LSTM] Epoch 02 | Train Loss: 0.5130, Train Acc: 0.9131 | Test Loss: 0.2157, Test Acc: 0.9389
[LSTM] Epoch 03 | Train Loss: 0.3871, Train Acc: 0.9321 | Test Loss: 0.1931, Test Acc: 0.9505
[LSTM] Epoch 04 | Train Loss: 0.3416, Train Acc: 0.9396 | Test Loss: 0.1651, Test Acc: 0.9549
[LSTM] Epoch 05 | Train Loss: 0.2996, Train Acc: 0.9490 | Test Loss: 0.1577, Test Acc: 0.9577
[LSTM] Epoch 06 | Train Loss: 0.2766, Train Acc: 0.9529 | Test Loss: 0.1400, Test Acc: 0.9627
[LSTM] Epoch 07 | Train Loss: 0.2622, Train Acc: 0.9537 | Test Loss: 0.1557, Test Acc: 0.9591
[LSTM] Epoch 08 | Train Loss: 0.2508, Train Acc: 0.9540 | Test Loss: 0.1460, Test Acc: 0.9604
[LSTM] Epoch 09 | Train Loss: 0.2346, Train Acc: 0.9582 | Test Loss: 0.1183, Test Acc: 0.9684
[LSTM] Epoch 10 | Train Loss: 0.2109, Train Acc: 0.9616 | Test Loss: 0.1327, Test Acc: 0.9616
[LSTM] Epoch 11 | Train Loss: 0.2105, Train Acc: 0.9612 | Te

In [None]:
test_loss, test_acc, y_true, y_pred = evaluate(
    model, test_loader, criterion, device
)

print("\n===== LSTM Results =====")
print("Final Test Loss:", test_loss)
print("Final Test Accuracy:", test_acc)

print("\n[LSTM] Classification Report:")
print(classification_report(y_true, y_pred, digits=4))

print("[LSTM] Confusion Matrix:")
print(confusion_matrix(y_true, y_pred))


===== LSTM Results =====
Final Test Loss: 0.09216902880129116
Final Test Accuracy: 0.9749223460624886

[LSTM] Classification Report:
              precision    recall  f1-score   support

           0     0.9890    0.9855    0.9873     18118
           1     0.8316    0.7284    0.7766       556
           2     0.9013    0.9523    0.9261      1448
           3     0.6119    0.8272    0.7034       162
           4     0.9794    0.9757    0.9776      1608

    accuracy                         0.9749     21892
   macro avg     0.8626    0.8938    0.8742     21892
weighted avg     0.9757    0.9749    0.9751     21892

[LSTM] Confusion Matrix:
[[17856    76   115    45    26]
 [  135   405    11     2     3]
 [   27     3  1379    36     3]
 [   17     0    10   134     1]
 [   19     3    15     2  1569]]


In [None]:
class ECGCNNLSTM(nn.Module):
    def __init__(
        self,
        num_classes,
        conv_channels=(32, 64, 128),
        lstm_hidden=64,
        lstm_layers=1,
        bidirectional=True,
        lstm_dropout=0.2,
        fc_hidden=128
    ):
        super(ECGCNNLSTM, self).__init__()

        # ---------- 1D CNN feature extractor ----------
        # Input shape: (batch, 1, 187)
        self.cnn = nn.Sequential(
            nn.Conv1d(1, conv_channels[0], kernel_size=5, padding=2),
            nn.BatchNorm1d(conv_channels[0]),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2),            # 187 -> 93

            nn.Conv1d(conv_channels[0], conv_channels[1], kernel_size=5, padding=2),
            nn.BatchNorm1d(conv_channels[1]),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2),            # 93 -> 46

            nn.Conv1d(conv_channels[1], conv_channels[2], kernel_size=3, padding=1),
            nn.BatchNorm1d(conv_channels[2]),
            nn.ReLU(),                              # (batch, 128, 46)
        )

        # ---------- LSTM over CNN feature sequence ----------
        self.bidirectional = bidirectional
        self.lstm_input_size = conv_channels[2]     # 128
        self.lstm_hidden = lstm_hidden

        self.lstm = nn.LSTM(
            input_size=self.lstm_input_size,        # features per timestep
            hidden_size=lstm_hidden,
            num_layers=lstm_layers,
            batch_first=True,                       # (batch, seq_len, feat)
            bidirectional=bidirectional,
            dropout=lstm_dropout if lstm_layers > 1 else 0.0,
        )

        dir_factor = 2 if bidirectional else 1

        # we will use mean + max pooling over time â†’ 2 * hidden * dir_factor
        fc_in = lstm_hidden * dir_factor * 2

        # ---------- Classifier ----------
        self.classifier = nn.Sequential(
            nn.Linear(fc_in, fc_hidden),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_hidden, num_classes),
        )

    def forward(self, x):
        # x: (batch, 187)
        x = x.unsqueeze(1)                 # (batch, 1, 187)

        # ----- CNN -----
        x = self.cnn(x)                    # (batch, C=128, L=46)

        # Prepare for LSTM: (batch, seq_len, features)
        x = x.transpose(1, 2)              # (batch, 46, 128)

        # ----- LSTM -----
        out, _ = self.lstm(x)              # (batch, 46, hidden*dirs)

        # Global temporal pooling over the 46 steps
        mean_pooled = out.mean(dim=1)      # (batch, hidden*dirs)
        max_pooled, _ = out.max(dim=1)     # (batch, hidden*dirs)

        features = torch.cat([mean_pooled, max_pooled], dim=1)  # (batch, 2*hidden*dirs)

        # ----- Classifier -----
        logits = self.classifier(features) # (batch, num_classes)
        return logits


num_classes = len(torch.unique(train_dataset.y))

model_hybrid = ECGCNNLSTM(
    num_classes=num_classes,
    conv_channels=(32, 64, 128),
    lstm_hidden=64,
    lstm_layers=1,
    bidirectional=True,
    lstm_dropout=0.2,
    fc_hidden=128
).to(device)

criterion_hybrid = nn.CrossEntropyLoss(weight=class_weights_tensor)
optimizer_hybrid = torch.optim.Adam(model_hybrid.parameters(), lr=1e-3)

In [None]:
num_epochs = 20

for epoch in range(1, num_epochs + 1):
    train_loss, train_acc = train_one_epoch(
        model_hybrid, train_loader, criterion_hybrid, optimizer_hybrid, device
    )
    val_loss, val_acc, _, _ = evaluate(
        model_hybrid, test_loader, criterion_hybrid, device
    )

    print(f"[CNN-LSTM] Epoch {epoch:02d} | "
          f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | "
          f"Test Loss: {val_loss:.4f}, Test Acc: {val_acc:.4f}")

[CNN-LSTM] Epoch 01 | Train Loss: 0.6325, Train Acc: 0.6778 | Test Loss: 0.3551, Test Acc: 0.9027
[CNN-LSTM] Epoch 02 | Train Loss: 0.3548, Train Acc: 0.8422 | Test Loss: 0.3174, Test Acc: 0.9011
[CNN-LSTM] Epoch 03 | Train Loss: 0.3014, Train Acc: 0.8686 | Test Loss: 0.2280, Test Acc: 0.9420
[CNN-LSTM] Epoch 04 | Train Loss: 0.2656, Train Acc: 0.8806 | Test Loss: 0.3670, Test Acc: 0.8848
[CNN-LSTM] Epoch 05 | Train Loss: 0.2451, Train Acc: 0.8893 | Test Loss: 0.3616, Test Acc: 0.8727
[CNN-LSTM] Epoch 06 | Train Loss: 0.2337, Train Acc: 0.8960 | Test Loss: 0.1933, Test Acc: 0.9388
[CNN-LSTM] Epoch 07 | Train Loss: 0.1939, Train Acc: 0.9088 | Test Loss: 0.1293, Test Acc: 0.9635
[CNN-LSTM] Epoch 08 | Train Loss: 0.1837, Train Acc: 0.9146 | Test Loss: 0.2433, Test Acc: 0.9234
[CNN-LSTM] Epoch 09 | Train Loss: 0.1970, Train Acc: 0.9113 | Test Loss: 0.2225, Test Acc: 0.9268
[CNN-LSTM] Epoch 10 | Train Loss: 0.1730, Train Acc: 0.9199 | Test Loss: 0.3153, Test Acc: 0.8892
[CNN-LSTM] Epoch 11 

In [None]:
test_loss, test_acc, y_true, y_pred = evaluate(
    model_hybrid, test_loader, criterion_hybrid, device
)

print("\n===== CNN-LSTM Results =====")
print("Final Test Loss:", test_loss)
print("Final Test Accuracy:", test_acc)
print("\n[CNN-LSTM] Classification Report:")
print(classification_report(y_true, y_pred, digits=4))
print("[CNN-LSTM] Confusion Matrix:")
print(confusion_matrix(y_true, y_pred))


===== CNN-LSTM Results =====
Final Test Loss: 0.1307317849276555
Final Test Accuracy: 0.959208843413119

[CNN-LSTM] Classification Report:
              precision    recall  f1-score   support

           0     0.9931    0.9622    0.9774     18118
           1     0.5066    0.8237    0.6274       556
           2     0.9113    0.9434    0.9270      1448
           3     0.4833    0.8951    0.6277       162
           4     0.9767    0.9925    0.9846      1608

    accuracy                         0.9592     21892
   macro avg     0.7742    0.9234    0.8288     21892
weighted avg     0.9704    0.9592    0.9631     21892

[CNN-LSTM] Confusion Matrix:
[[17434   438   108   108    30]
 [   75   458    12    10     1]
 [   33     6  1366    36     7]
 [    7     1     9   145     0]
 [    6     1     4     1  1596]]
