In [None]:
import pandas as pd


In [None]:
data = pd.read_csv('../dataset/FINAL_DATASET_2000.csv')

data.info()

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, auc, precision_score, f1_score, matthews_corrcoef, accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import xgboost as xgb
import time
import os
import warnings
import gc
from tqdm import tqdm
import torch.nn.functional as F

In [None]:
warnings.filterwarnings('ignore')

# Set environment variables for stability
os.environ['OMP_NUM_THREADS'] = '1'
os.environ['MKL_NUM_THREADS'] = '1'

# Set font family globally
plt.rcParams['font.family'] = 'Times New Roman'

# Base directory
BASE_DIR = "Results"
os.makedirs(BASE_DIR, exist_ok=True)

In [None]:
# Device setup
device = torch.device("cpu")
try:
    if torch.backends.mps.is_available():
        device = torch.device("mps")
        print("MPS device detected, using MPS")
    elif torch.cuda.is_available():
        device = torch.device("cuda")
        print("CUDA device detected, using CUDA")
    else:
        print("Using CPU")
    print(f"Device: {device}")
except:
    print("Using CPU (default)")

print("=" * 70)
print("LOADING AND PREPARING DATA")
print("=" * 70)

In [None]:
# Load dataset
data = pd.read_csv('../dataset/Combined_Common_Genes_With_Target_ML.csv')
print(f"Dataset shape: {data.shape}")
print(f"Columns: {list(data.columns[-5:])}")  # Show last 5 columns to confirm 'target'

# Prepare features and target
X = data.drop('target', axis=1).values
y = data['target'].values

# Ensure y is 1D
y = y.ravel()

In [None]:
print(f"Features shape: {X.shape}")
print(f"Target shape: {y.shape}")
print(f"Classes: {np.unique(y)}")
print(f"Class distribution: {np.bincount(y) / len(y)}")

num_classes = len(np.unique(y))
num_features = X.shape[1]

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
X_train_sub, X_val, y_train_sub, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42,
                                                          stratify=y_train)

print(f"Train subset: {X_train_sub.shape}, {y_train_sub.shape}")
print(f"Validation: {X_val.shape}, {y_val.shape}")
print(f"Test: {X_test.shape}, {y_test.shape}")

In [None]:
# ==================== NEURAL NETWORK MODELS ====================

class CNN(nn.Module):
    def __init__(self, num_classes, input_features):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv1d(1, 64, kernel_size=5, padding=2)
        self.conv2 = nn.Conv1d(64, 32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv1d(32, 16, kernel_size=3, padding=1)
        self.pool = nn.MaxPool1d(2)
        self.dropout = nn.Dropout(0.5)

        # Calculate output size after pooling operations
        conv_output_size = input_features // 8  # After 3 pooling operations
        self.fc1 = nn.Linear(16 * conv_output_size, 128)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = x.transpose(1, 2)  # [batch, 1, features]
        x = torch.relu(self.conv1(x))
        x = self.pool(x)
        x = torch.relu(self.conv2(x))
        x = self.pool(x)
        x = torch.relu(self.conv3(x))
        x = self.pool(x)
        x = x.reshape(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

In [None]:
# SIMPLE BUT IMPROVED AttBiLSTM
class SimpleImprovedAttBiLSTM(nn.Module):
    def __init__(self, num_classes):
        super(SimpleImprovedAttBiLSTM, self).__init__()

        # Simple but effective LSTM
        self.lstm = nn.LSTM(1, 128, num_layers=2, bidirectional=True,
                            batch_first=True, dropout=0.3)

        # Simple attention
        self.attention = nn.Linear(256, 1)  # 128*2 for bidirectional

        # Simple but effective classifier
        self.classifier = nn.Sequential(
            nn.Dropout(0.4),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        # LSTM
        lstm_out, _ = self.lstm(x)  # [batch, seq_len, 256]

        # Simple attention
        attn_weights = F.softmax(self.attention(lstm_out), dim=1)  # [batch, seq_len, 1]
        context = torch.sum(lstm_out * attn_weights, dim=1)  # [batch, 256]

        # Classification
        output = self.classifier(context)
        return output

In [None]:

class FixedAttBiLSTM(nn.Module):
    def __init__(self, num_classes, input_features):
        super(FixedAttBiLSTM, self).__init__()

        self.input_features = input_features

        # Input projection to reduce dimensionality and add non-linearity
        self.input_projection = nn.Sequential(
            nn.Linear(1, 64),
            nn.ReLU(),
            nn.Dropout(0.2)
        )

        # BiLSTM layers
        self.lstm1 = nn.LSTM(64, 128, batch_first=True, bidirectional=True, dropout=0.3)
        self.lstm2 = nn.LSTM(256, 64, batch_first=True, bidirectional=True, dropout=0.3)

        # Improved attention mechanism
        self.attention = nn.Sequential(
            nn.Linear(128, 64),
            nn.Tanh(),
            nn.Linear(64, 1),
            nn.Softmax(dim=1)
        )

        # Classification head
        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(128, 256),
            nn.ReLU(),
            nn.BatchNorm1d(256),
            nn.Dropout(0.4),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Dropout(0.3),
            nn.Linear(128, num_classes)
        )

        # Initialize weights
        self._init_weights()

    def _init_weights(self):
        for name, param in self.named_parameters():
            if 'weight' in name:
                if len(param.shape) >= 2:
                    nn.init.xavier_uniform_(param)
                else:
                    nn.init.uniform_(param, -0.1, 0.1)
            elif 'bias' in name:
                nn.init.constant_(param, 0)

    def forward(self, x):
        batch_size, seq_len, _ = x.shape

        # Project input features
        x = self.input_projection(x)  # [batch, seq_len, 64]

        # First LSTM layer
        lstm_out1, _ = self.lstm1(x)  # [batch, seq_len, 256]

        # Second LSTM layer
        lstm_out2, _ = self.lstm2(lstm_out1)  # [batch, seq_len, 128]

        # Attention mechanism
        attention_weights = self.attention(lstm_out2)  # [batch, seq_len, 1]

        # Apply attention
        context_vector = torch.sum(lstm_out2 * attention_weights, dim=1)  # [batch, 128]

        # Classification
        output = self.classifier(context_vector)

        return output

In [None]:
def calculate_metrics(y_true, y_pred, y_prob=None):
    """Calculate all evaluation metrics"""
    # Basic metrics
    acc = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='macro', zero_division=0)
    f1 = f1_score(y_true, y_pred, average='macro', zero_division=0)
    mcc = matthews_corrcoef(y_true, y_pred)

    # Confusion matrix components for binary classification
    if num_classes == 2:
        tn = np.sum((y_true == 0) & (y_pred == 0))
        tp = np.sum((y_true == 1) & (y_pred == 1))
        fn = np.sum((y_true == 1) & (y_pred == 0))
        fp = np.sum((y_true == 0) & (y_pred == 1))

        specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
        sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
    else:
        # For multiclass, calculate average
        specificities = []
        sensitivities = []
        for class_i in range(num_classes):
            tp = np.sum((y_true == class_i) & (y_pred == class_i))
            tn = np.sum((y_true != class_i) & (y_pred != class_i))
            fp = np.sum((y_true != class_i) & (y_pred == class_i))
            fn = np.sum((y_true == class_i) & (y_pred != class_i))

            spec = tn / (tn + fp) if (tn + fp) > 0 else 0
            sens = tp / (tp + fn) if (tp + fn) > 0 else 0

            specificities.append(spec)
            sensitivities.append(sens)

        specificity = np.mean(specificities)
        sensitivity = np.mean(sensitivities)

    results = {
        'ACC': acc,
        'PRE': precision,
        'SP': specificity,
        'SN': sensitivity,
        'F1': f1,
        'MCC': mcc
    }

    # Calculate AUC if probabilities provided
    if y_prob is not None:
        if num_classes == 2:
            fpr, tpr, _ = roc_curve(y_true, y_prob[:, 1])
            results['AUC'] = auc(fpr, tpr)
            roc_data = [(fpr, tpr, results['AUC'])]
        else:
            aucs = []
            roc_data = []
            for i in range(num_classes):
                fpr, tpr, _ = roc_curve(y_true == i, y_prob[:, i])
                class_auc = auc(fpr, tpr)
                aucs.append(class_auc)
                roc_data.append((fpr, tpr, class_auc))
            results['AUC'] = np.mean(aucs)
    else:
        results['AUC'] = 0.0
        roc_data = []

    return results, roc_data

In [None]:
def fixed_train_attbilstm(model, X_train, y_train, X_val, y_val, epochs=80, batch_size=32):
    """Fixed training function for AttBiLSTM"""
    print("Training Fixed AttBiLSTM...")

    model.to(device)

    # Normalize data - CRITICAL for LSTM
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)

    # Reshape for LSTM: treat each feature as a time step
    X_train_tensor = torch.FloatTensor(X_train_scaled).unsqueeze(-1)  # [batch, features, 1]
    X_val_tensor = torch.FloatTensor(X_val_scaled).unsqueeze(-1)
    y_train_tensor = torch.LongTensor(y_train)
    y_val_tensor = torch.LongTensor(y_val)

    # Data loaders
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, drop_last=False)

    # Improved optimizer setup
    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)  # Label smoothing for better generalization
    optimizer = optim.AdamW(model.parameters(), lr=0.002, weight_decay=1e-4)  # AdamW instead of Adam
    scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2)

    # Early stopping
    best_val_loss = float('inf')
    best_val_acc = 0.0
    patience = 20
    patience_counter = 0
    best_model_state = None

    # Training history
    train_losses = []
    val_losses = []
    val_accuracies = []

    print(f"Starting training for {epochs} epochs...")
    print(f"Train batches: {len(train_loader)}, Val batches: {len(val_loader)}")

    for epoch in range(epochs):
        # Training phase
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0

        for batch_idx, (X_batch, y_batch) in enumerate(train_loader):
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)

            optimizer.zero_grad()

            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)

            # Backward pass with gradient clipping
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            train_total += y_batch.size(0)
            train_correct += (predicted == y_batch).sum().item()

            # Print progress every 10 batches
            if batch_idx % 10 == 0:
                print(f"Epoch {epoch + 1}/{epochs}, Batch {batch_idx}/{len(train_loader)}, "
                      f"Loss: {loss.item():.4f}")

        # Validation phase
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)

                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                val_total += y_batch.size(0)
                val_correct += (predicted == y_batch).sum().item()

        # Calculate metrics
        avg_train_loss = train_loss / len(train_loader)
        avg_val_loss = val_loss / len(val_loader)
        train_acc = 100.0 * train_correct / train_total
        val_acc = 100.0 * val_correct / val_total

        train_losses.append(avg_train_loss)
        val_losses.append(avg_val_loss)
        val_accuracies.append(val_acc)

        # Learning rate scheduling
        scheduler.step()
        current_lr = optimizer.param_groups[0]['lr']

        # Print epoch results
        print(f"Epoch {epoch + 1}/{epochs}:")
        print(f"  Train Loss: {avg_train_loss:.4f}, Train Acc: {train_acc:.2f}%")
        print(f"  Val Loss: {avg_val_loss:.4f}, Val Acc: {val_acc:.2f}%")
        print(f"  Learning Rate: {current_lr:.6f}")
        print("-" * 50)

        # Early stopping based on validation loss and accuracy
        if avg_val_loss < best_val_loss or val_acc > best_val_acc:
            if avg_val_loss < best_val_loss:
                best_val_loss = avg_val_loss
            if val_acc > best_val_acc:
                best_val_acc = val_acc

            best_model_state = model.state_dict().copy()
            patience_counter = 0
            print(f"  >>> New best model! Val Loss: {best_val_loss:.4f}, Val Acc: {best_val_acc:.2f}%")
        else:
            patience_counter += 1
            print(f"  No improvement. Patience: {patience_counter}/{patience}")

        if patience_counter >= patience:
            print(f"Early stopping at epoch {epoch + 1}")
            break

    # Load best model
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
        print(f"Loaded best model with Val Loss: {best_val_loss:.4f}, Val Acc: {best_val_acc:.2f}%")

    return model, scaler

In [None]:
def fixed_evaluate_attbilstm(model, scaler, X_test, y_test, batch_size=32):
    """Fixed evaluation for AttBiLSTM"""
    model.eval()

    # Apply same scaling and reshaping
    X_test_scaled = scaler.transform(X_test)
    X_test_tensor = torch.FloatTensor(X_test_scaled).unsqueeze(-1)
    test_dataset = TensorDataset(X_test_tensor, torch.LongTensor(y_test))
    test_loader = DataLoader(test_dataset, batch_size=batch_size)

    all_preds = []
    all_probs = []
    start_time = time.time()

    with torch.no_grad():
        for X_batch, _ in test_loader:
            X_batch = X_batch.to(device)
            outputs = model(X_batch)

            # Get probabilities and predictions
            probs = torch.softmax(outputs, dim=1).cpu().numpy()
            all_probs.append(probs)
            all_preds.append(outputs.cpu().numpy())

    test_time = time.time() - start_time

    # Combine results
    all_probs = np.vstack(all_probs)
    predicted_classes = np.argmax(all_probs, axis=1)

    return predicted_classes, all_probs, test_time

In [None]:
# Updated training function to use the fixed version
def train_fixed_attbilstm():
    """Train Fixed AttBiLSTM"""
    print("\n" + "=" * 50)
    print("TRAINING FIXED ATTBILSTM")
    print("=" * 50)

    start_time = time.time()

    # Create fixed model
    bilstm_model = FixedAttBiLSTM(num_classes, num_features)

    # Train with fixed function
    bilstm_model, scaler = fixed_train_attbilstm(
        bilstm_model, X_train_sub, y_train_sub, X_val, y_val,
        epochs=80, batch_size=32
    )
    train_time = time.time() - start_time

    # Evaluate
    y_pred, y_prob, test_time = fixed_evaluate_attbilstm(
        bilstm_model, scaler, X_test, y_test
    )

    results, roc_data = calculate_metrics(y_test, y_pred, y_prob)
    results['Training Time'] = train_time
    results['Testing Time'] = test_time

    print(f"Training time: {train_time:.4f}s")
    print(f"Testing time: {test_time:.4f}s")
    print(f"Accuracy: {results['ACC']:.4f}")
    print(f"AUC: {results['AUC']:.4f}")
    print(f"F1-Score: {results['F1']:.4f}")
    print(f"MCC: {results['MCC']:.4f}")

    # Cleanup
    del bilstm_model
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    elif torch.backends.mps.is_available():
        torch.mps.empty_cache()
    gc.collect()

    return results, roc_data

In [None]:
def train_pytorch_model(model, X_train, y_train, X_val, y_val, epochs=50, batch_size=32):
    """Train PyTorch models (for CNN)"""
    model.to(device)

    # Create data loaders
    X_train_tensor = torch.FloatTensor(X_train).unsqueeze(-1)
    X_val_tensor = torch.FloatTensor(X_val).unsqueeze(-1)
    y_train_tensor = torch.LongTensor(y_train)
    y_val_tensor = torch.LongTensor(y_val)

    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)

    # Training setup
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5)

    # Training loop
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)

            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = model(X_batch)
                val_loss += criterion(outputs, y_batch).item()

        val_loss = val_loss / len(val_loader)
        scheduler.step(val_loss)

        if epoch % 10 == 0:
            print(
                f"Epoch {epoch + 1}/{epochs} - Train Loss: {train_loss / len(train_loader):.4f}, Val Loss: {val_loss:.4f}")

    return model

In [None]:
def simple_train_attbilstm(model, X_train, y_train, X_val, y_val, epochs=60, batch_size=64):
    """Simple but effective training for AttBiLSTM"""
    model.to(device)

    # MOST IMPORTANT: Normalize the data!
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)

    # Create tensors
    X_train_tensor = torch.FloatTensor(X_train_scaled).unsqueeze(-1)
    X_val_tensor = torch.FloatTensor(X_val_scaled).unsqueeze(-1)
    y_train_tensor = torch.LongTensor(y_train)
    y_val_tensor = torch.LongTensor(y_val)

    # Data loaders
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)

    # Simple but effective training setup
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)

    # Early stopping
    best_val_loss = float('inf')
    patience = 15
    patience_counter = 0
    best_model_state = None

    print(f"Training Simple AttBiLSTM for {epochs} epochs...")

    # Training loop
    for epoch in range(epochs):
        # Training
        model.train()
        train_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)

            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = model(X_batch)
                val_loss += criterion(outputs, y_batch).item()

        avg_train_loss = train_loss / len(train_loader)
        avg_val_loss = val_loss / len(val_loader)

        scheduler.step()

        # Early stopping
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            best_model_state = model.state_dict().copy()
            patience_counter = 0
        else:
            patience_counter += 1

        if epoch % 10 == 0:
            print(f"Epoch {epoch + 1}/{epochs} - Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}")

        if patience_counter >= patience:
            print(f"Early stopping at epoch {epoch + 1}")
            break

    # Load best model
    if best_model_state is not None:
        model.load_state_dict(best_model_state)

    return model, scaler

In [None]:
def evaluate_pytorch_model(model, X_test, y_test, batch_size=32):
    """Evaluate PyTorch models (for CNN)"""
    model.eval()

    X_test_tensor = torch.FloatTensor(X_test).unsqueeze(-1)
    test_dataset = TensorDataset(X_test_tensor, torch.LongTensor(y_test))
    test_loader = DataLoader(test_dataset, batch_size=batch_size)

    all_preds = []
    start_time = time.time()

    with torch.no_grad():
        for X_batch, _ in test_loader:
            X_batch = X_batch.to(device)
            outputs = model(X_batch).cpu().numpy()
            all_preds.append(outputs)

    test_time = time.time() - start_time

    predictions = np.vstack(all_preds)
    probabilities = torch.softmax(torch.tensor(predictions), dim=1).numpy()
    predicted_classes = np.argmax(probabilities, axis=1)

    return predicted_classes, probabilities, test_time

In [None]:
def simple_evaluate_attbilstm(model, scaler, X_test, y_test, batch_size=64):
    """Simple evaluation for AttBiLSTM"""
    model.eval()

    # Apply same scaling
    X_test_scaled = scaler.transform(X_test)
    X_test_tensor = torch.FloatTensor(X_test_scaled).unsqueeze(-1)
    test_dataset = TensorDataset(X_test_tensor, torch.LongTensor(y_test))
    test_loader = DataLoader(test_dataset, batch_size=batch_size)

    all_preds = []
    start_time = time.time()

    with torch.no_grad():
        for X_batch, _ in test_loader:
            X_batch = X_batch.to(device)
            outputs = model(X_batch).cpu().numpy()
            all_preds.append(outputs)

    test_time = time.time() - start_time

    predictions = np.vstack(all_preds)
    probabilities = torch.softmax(torch.tensor(predictions), dim=1).numpy()
    predicted_classes = np.argmax(probabilities, axis=1)

    return predicted_classes, probabilities, test_time

In [None]:
def train_random_forest():
    """Train Random Forest"""
    print("\n" + "=" * 50)
    print("TRAINING RANDOM FOREST")
    print("=" * 50)

    start_time = time.time()
    rf_model = RandomForestClassifier(
        n_estimators=100,
        max_depth=10,
        random_state=42,
        n_jobs=-1
    )
    rf_model.fit(X_train_sub, y_train_sub)
    train_time = time.time() - start_time

    start_time = time.time()
    y_pred = rf_model.predict(X_test)
    y_prob = rf_model.predict_proba(X_test)
    test_time = time.time() - start_time

    results, roc_data = calculate_metrics(y_test, y_pred, y_prob)
    results['Training Time'] = train_time
    results['Testing Time'] = test_time

    print(f"Training time: {train_time:.4f}s")
    print(f"Testing time: {test_time:.4f}s")
    print(f"Accuracy: {results['ACC']:.4f}")
    print(f"AUC: {results['AUC']:.4f}")

    return results, roc_data

In [None]:
def train_svm():
    """Train SVM"""
    print("\n" + "=" * 50)
    print("TRAINING SVM")
    print("=" * 50)

    start_time = time.time()
    svm_model = SVC(
        kernel='rbf',
        C=1.0,
        probability=True,
        random_state=42
    )
    svm_model.fit(X_train_sub, y_train_sub)
    train_time = time.time() - start_time

    start_time = time.time()
    y_pred = svm_model.predict(X_test)
    y_prob = svm_model.predict_proba(X_test)
    test_time = time.time() - start_time

    results, roc_data = calculate_metrics(y_test, y_pred, y_prob)
    results['Training Time'] = train_time
    results['Testing Time'] = test_time

    print(f"Training time: {train_time:.4f}s")
    print(f"Testing time: {test_time:.4f}s")
    print(f"Accuracy: {results['ACC']:.4f}")
    print(f"AUC: {results['AUC']:.4f}")

    return results, roc_data

In [None]:
def train_xgboost():
    """Train XGBoost"""
    print("\n" + "=" * 50)
    print("TRAINING XGBOOST")
    print("=" * 50)

    try:
        # Force CPU usage for XGBoost to avoid MPS conflicts
        os.environ['CUDA_VISIBLE_DEVICES'] = ''

        start_time = time.time()

        # Use XGBClassifier instead of xgb.train for better compatibility
        if num_classes == 2:
            xgb_model = xgb.XGBClassifier(
                objective='binary:logistic',
                eval_metric='logloss',
                max_depth=6,
                learning_rate=0.1,
                subsample=0.8,
                colsample_bytree=0.8,
                n_estimators=100,
                early_stopping_rounds=10,
                random_state=42,
                verbosity=0,
                tree_method='hist',  # Use histogram method for stability
                device='cpu'  # Force CPU
            )
        else:
            xgb_model = xgb.XGBClassifier(
                objective='multi:softprob',
                eval_metric='mlogloss',
                max_depth=6,
                learning_rate=0.1,
                subsample=0.8,
                colsample_bytree=0.8,
                n_estimators=100,
                early_stopping_rounds=10,
                random_state=42,
                verbosity=0,
                tree_method='hist',
                device='cpu'
            )

        # Fit with evaluation set
        xgb_model.fit(
            X_train_sub, y_train_sub,
            eval_set=[(X_val, y_val)],
            verbose=False
        )
        train_time = time.time() - start_time

        start_time = time.time()
        y_pred = xgb_model.predict(X_test)
        y_prob = xgb_model.predict_proba(X_test)
        test_time = time.time() - start_time

        results, roc_data = calculate_metrics(y_test, y_pred, y_prob)
        results['Training Time'] = train_time
        results['Testing Time'] = test_time

        print(f"Training time: {train_time:.4f}s")
        print(f"Testing time: {test_time:.4f}s")
        print(f"Accuracy: {results['ACC']:.4f}")
        print(f"AUC: {results['AUC']:.4f}")

        return results, roc_data

    except Exception as e:
        print(f"XGBoost training failed: {e}")
        print("Attempting fallback with basic parameters...")

        try:
            # Fallback with minimal parameters
            start_time = time.time()
            xgb_model = xgb.XGBClassifier(
                n_estimators=50,
                max_depth=3,
                learning_rate=0.1,
                random_state=42,
                verbosity=0,
                device='cpu'
            )
            xgb_model.fit(X_train_sub, y_train_sub)
            train_time = time.time() - start_time

            start_time = time.time()
            y_pred = xgb_model.predict(X_test)
            y_prob = xgb_model.predict_proba(X_test)
            test_time = time.time() - start_time

            results, roc_data = calculate_metrics(y_test, y_pred, y_prob)
            results['Training Time'] = train_time
            results['Testing Time'] = test_time

            print(f"Fallback successful!")
            print(f"Training time: {train_time:.4f}s")
            print(f"Testing time: {test_time:.4f}s")
            print(f"Accuracy: {results['ACC']:.4f}")
            print(f"AUC: {results['AUC']:.4f}")

            return results, roc_data

        except Exception as e2:
            print(f"XGBoost fallback also failed: {e2}")
            return {
                'ACC': 0.0, 'AUC': 0.0, 'PRE': 0.0, 'SP': 0.0, 'SN': 0.0,
                'F1': 0.0, 'MCC': 0.0, 'Training Time': 0.0, 'Testing Time': 0.0,
                'Error': str(e2)
            }, []

In [None]:
def train_cnn():
    """Train CNN"""
    print("\n" + "=" * 50)
    print("TRAINING CNN")
    print("=" * 50)

    start_time = time.time()
    cnn_model = CNN(num_classes, num_features)
    cnn_model = train_pytorch_model(cnn_model, X_train_sub, y_train_sub, X_val, y_val, epochs=50)
    train_time = time.time() - start_time

    y_pred, y_prob, test_time = evaluate_pytorch_model(cnn_model, X_test, y_test)

    results, roc_data = calculate_metrics(y_test, y_pred, y_prob)
    results['Training Time'] = train_time
    results['Testing Time'] = test_time

    print(f"Training time: {train_time:.4f}s")
    print(f"Testing time: {test_time:.4f}s")
    print(f"Accuracy: {results['ACC']:.4f}")
    print(f"AUC: {results['AUC']:.4f}")

    # Cleanup
    del cnn_model
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    elif torch.backends.mps.is_available():
        torch.mps.empty_cache()
    gc.collect()

    return results, roc_data

In [None]:
def train_attbilstm():
    """Train Simple Improved AttBiLSTM"""
    print("\n" + "=" * 50)
    print("TRAINING SIMPLE IMPROVED ATTBILSTM")
    print("=" * 50)

    start_time = time.time()

    # Simple but improved model
    bilstm_model = SimpleImprovedAttBiLSTM(num_classes)

    # Simple but effective training
    bilstm_model, scaler = simple_train_attbilstm(
        bilstm_model, X_train_sub, y_train_sub, X_val, y_val,
        epochs=100, batch_size=64
    )
    train_time = time.time() - start_time

    # Simple evaluation
    y_pred, y_prob, test_time = simple_evaluate_attbilstm(
        bilstm_model, scaler, X_test, y_test
    )

    results, roc_data = calculate_metrics(y_test, y_pred, y_prob)
    results['Training Time'] = train_time
    results['Testing Time'] = test_time

    print(f"Training time: {train_time:.4f}s")
    print(f"Testing time: {test_time:.4f}s")
    print(f"Accuracy: {results['ACC']:.4f}")
    print(f"AUC: {results['AUC']:.4f}")
    print(f"F1-Score: {results['F1']:.4f}")

    # Cleanup
    del bilstm_model
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    elif torch.backends.mps.is_available():
        torch.mps.empty_cache()
    gc.collect()

    return results, roc_data

In [None]:
print("\n" + "=" * 70)
print("STARTING 5-MODEL COMPARISON WITH SIMPLE IMPROVED ATTBILSTM")
print("=" * 70)

all_results = {}
all_roc_data = {}

# Define models to run
models = [
    ("RF", train_random_forest),
    ("SVM", train_svm),
    ("XGBoost", train_xgboost),
    ("CNN", train_cnn),
    ("Fixed_AttBiLSTM", train_fixed_attbilstm)
]

# Train all models
for model_name, train_func in models:
    try:
        print(f"\nStarting {model_name}...")

        # Add memory cleanup before each model
        gc.collect()
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        elif torch.backends.mps.is_available():
            torch.mps.empty_cache()

        results, roc_data = train_func()
        all_results[model_name] = results
        all_roc_data[model_name] = roc_data

        print(f"{model_name} completed successfully!")

    except Exception as e:
        print(f"Error training {model_name}: {e}")
        print(f"Continuing with next model...")
        all_results[model_name] = {
            'ACC': 0.0, 'AUC': 0.0, 'PRE': 0.0, 'SP': 0.0, 'SN': 0.0,
            'F1': 0.0, 'MCC': 0.0, 'Training Time': 0.0, 'Testing Time': 0.0,
            'Error': str(e)
        }
        all_roc_data[model_name] = []

# Create results summary
print("\n" + "=" * 70)
print("FINAL RESULTS SUMMARY")
print("=" * 70)

results_df = pd.DataFrame(all_results).T
column_order = ['ACC', 'AUC', 'PRE', 'SP', 'SN', 'F1', 'MCC', 'Training Time', 'Testing Time']
results_df = results_df.reindex(columns=column_order)

# Save results
results_df.to_csv(os.path.join(BASE_DIR, 'simple_improved_model_comparison_results.csv'))
print("\nResults Table:")
print(results_df.round(4))

# Plot ROC curves
plt.figure(figsize=(10, 8))
colors = ['blue', 'green', 'red', 'purple', 'orange']

for i, (model_name, roc_data) in enumerate(all_roc_data.items()):
    if roc_data and len(roc_data) > 0:
        # Use first class ROC for display (or the only one for binary)
        fpr, tpr, auc_value = roc_data[0]
        label_name = model_name.replace('_', ' ')
        plt.plot(fpr, tpr, color=colors[i],
                 label=f'{label_name} (AUC = {auc_value:.3f})', linewidth=3)

plt.plot([0, 1], [0, 1], 'k--', linewidth=2)

# Set plot styling
ax = plt.gca()
for spine in ax.spines.values():
    spine.set_linewidth(2)
    spine.set_color('black')

plt.xlabel('False Positive Rate', fontsize=24)
plt.ylabel('True Positive Rate', fontsize=24)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.title('ROC Curves - Simple Improved AttBiLSTM vs Other Models', fontsize=28, pad=20)
plt.legend(loc='lower right', fontsize=18)
plt.tight_layout()

# Save plot
plt.savefig(os.path.join(BASE_DIR, 'simple_improved_models_roc.png'), dpi=1000)
plt.savefig(os.path.join(BASE_DIR, 'simple_improved_models_roc.pdf'), dpi=1000)
plt.show()

# Model rankings
print("\n" + "=" * 50)
print("MODEL RANKING BY ACCURACY")
print("=" * 50)

acc_ranking = results_df['ACC'].sort_values(ascending=False)
for i, (model, acc) in enumerate(acc_ranking.items(), 1):
    if not pd.isna(acc):
        print(f"{i}. {model}: {acc:.4f}")

print("\n" + "=" * 50)
print("MODEL RANKING BY AUC")
print("=" * 50)

auc_ranking = results_df['AUC'].sort_values(ascending=False)
for i, (model, auc_score) in enumerate(auc_ranking.items(), 1):
    if not pd.isna(auc_score):
        print(f"{i}. {model}: {auc_score:.4f}")

print("\n" + "=" * 50)
print("MODEL RANKING BY F1-SCORE")
print("=" * 50)

f1_ranking = results_df['F1'].sort_values(ascending=False)
for i, (model, f1_score) in enumerate(f1_ranking.items(), 1):
    if not pd.isna(f1_score):
        print(f"{i}. {model}: {f1_score:.4f}")

print(f"\nAll results saved to: {BASE_DIR}/")
print("Generated files:")
print("- simple_improved_model_comparison_results.csv")
print("- simple_improved_models_roc.png")
print("- simple_improved_models_roc.pdf")