In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from imblearn.over_sampling import RandomOverSampler

# ✅ Check GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


Using device: cuda


In [3]:
# Load your CSV file
df = pd.read_csv("IL_T600_cleaned.csv")  # Replace with the correct filename if needed

# Check the initial NaN count per column
print("NaN count before fixing:\n", df.isnull().sum())

# Fill NaN in 'spectral_contrast' with the column mean (example approach)
#mean_val = df['spectral_contrast'].mean()
#df['spectral_contrast'].fillna(mean_val, inplace=True)
df.dropna(inplace=True)


# Double-check NaNs are resolved
print("\nNaN count after filling:\n", df.isnull().sum())


NaN count before fixing:
 label                 0
Mean                  0
RMS                   0
Variance              0
Standard Deviation    0
Peak-to-Peak          0
Crest Factor          0
Skewness              0
Kurtosis              0
Energy                0
Entropy               0
Max                   0
Min                   0
peak_magnitude        0
spectral_centroid     0
spectral_bandwidth    0
spectral_flatness     0
spectral_rolloff      0
spectral_entropy      0
spectral_contrast     1
dtype: int64

NaN count after filling:
 label                 0
Mean                  0
RMS                   0
Variance              0
Standard Deviation    0
Peak-to-Peak          0
Crest Factor          0
Skewness              0
Kurtosis              0
Energy                0
Entropy               0
Max                   0
Min                   0
peak_magnitude        0
spectral_centroid     0
spectral_bandwidth    0
spectral_flatness     0
spectral_rolloff      0
spectral_entropy      

In [4]:
# Separate features (X) and label (y)
X = df.drop(columns=['label']).values  # all columns except 'label'
y = df['label'].values                 # label column

print(f"Features shape: {X.shape}, Label shape: {y.shape}")
print("Unique labels:", np.unique(y))

# Scale features using MinMaxScaler
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Define sequence length for LSTM
timesteps = 20

# Create sequences
def create_sequences(data, labels, seq_len=5):
    seqs, labs = [], []
    for i in range(len(data) - seq_len + 1):
        seqs.append(data[i : i + seq_len])
        labs.append(labels[i + seq_len - 1])
    return np.array(seqs), np.array(labs)

X_seq, y_seq = create_sequences(X_scaled, y, seq_len=timesteps)

print("Sequence X_seq shape:", X_seq.shape)
print("Sequence y_seq shape:", y_seq.shape)


Features shape: (19999, 19), Label shape: (19999,)
Unique labels: [0 1]
Sequence X_seq shape: (19980, 20, 19)
Sequence y_seq shape: (19980,)


In [5]:
# Split into train/test
X_train, X_test, y_train, y_test = train_test_split(
    X_seq, y_seq,
    test_size=0.2,
    random_state=42,
    stratify=y_seq
)

print("X_train shape:", X_train.shape, "X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape, "y_test shape:", y_test.shape)

# Convert to PyTorch tensors and move to device
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device)
X_test_tensor  = torch.tensor(X_test,  dtype=torch.float32).to(device)
y_test_tensor  = torch.tensor(y_test,  dtype=torch.long).to(device)

# Check for any NaNs in final tensors
print("Any NaNs in X_train_tensor?", torch.isnan(X_train_tensor).any().item())
print("Any NaNs in y_train_tensor?", torch.isnan(y_train_tensor.float()).any().item())


X_train shape: (15984, 20, 19) X_test shape: (3996, 20, 19)
y_train shape: (15984,) y_test shape: (3996,)
Any NaNs in X_train_tensor? False
Any NaNs in y_train_tensor? False


In [6]:
batch_size = 64

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset  = TensorDataset(X_test_tensor,  y_test_tensor)
train_loader  = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader   = DataLoader(test_dataset,  batch_size=batch_size, shuffle=False)

print("Train batches:", len(train_loader))
print("Test batches:", len(test_loader))


Train batches: 250
Test batches: 63


In [7]:
class DeeperLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim=128, output_dim=2, num_layers=2, dropout=0.3):
        super(DeeperLSTM, self).__init__()
        self.lstm = nn.LSTM(
            input_size=input_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout  # dropout applied between LSTM layers (only if num_layers > 1)
        )
        # Optional: A simple MLP head for extra capacity
        self.fc1 = nn.Linear(hidden_dim, hidden_dim // 2)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim // 2, output_dim)
        
    def forward(self, x):
        # x shape: (batch, timesteps, features)
        lstm_out, _ = self.lstm(x)      # shape: (batch, timesteps, hidden_dim)
        last_step = lstm_out[:, -1, :]  # use the final timestep: shape (batch, hidden_dim)
        x = self.fc1(last_step)
        x = self.relu(x)
        out = self.fc2(x)
        return out  # raw logits

num_features = X.shape[1]  # should be 19
model = DeeperLSTM(input_dim=num_features, hidden_dim=128, output_dim=2, num_layers=2, dropout=0.3).to(device)
print(model)


DeeperLSTM(
  (lstm): LSTM(19, 128, num_layers=2, batch_first=True, dropout=0.3)
  (fc1): Linear(in_features=128, out_features=64, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=64, out_features=2, bias=True)
)


In [8]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-5)  # a relatively small LR

print("Criterion:", criterion)
print("Optimizer:", optimizer)


Criterion: CrossEntropyLoss()
Optimizer: Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 1e-05
    maximize: False
    weight_decay: 0
)


In [9]:
epochs = 100
print("\nTraining started...")

for epoch in range(1, epochs + 1):
    model.train()
    running_loss, correct, total = 0.0, 0, 0
    
    for Xb, yb in train_loader:
        optimizer.zero_grad()
        outputs = model(Xb)          # shape: (batch, 2)
        loss = criterion(outputs, yb)
        loss.backward()
        
        # Gradient clipping
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += yb.size(0)
        correct += (predicted == yb).sum().item()
    
    epoch_loss = running_loss / len(train_loader)
    epoch_acc  = 100.0 * correct / total
    print(f"Epoch {epoch}/{epochs} | Loss: {epoch_loss:.4f} | Acc: {epoch_acc:.2f}%")



Training started...
Epoch 1/100 | Loss: 0.6969 | Acc: 49.95%
Epoch 2/100 | Loss: 0.6943 | Acc: 49.95%
Epoch 3/100 | Loss: 0.6930 | Acc: 49.95%
Epoch 4/100 | Loss: 0.6926 | Acc: 51.32%
Epoch 5/100 | Loss: 0.6925 | Acc: 53.65%
Epoch 6/100 | Loss: 0.6924 | Acc: 51.95%
Epoch 7/100 | Loss: 0.6922 | Acc: 53.15%
Epoch 8/100 | Loss: 0.6919 | Acc: 51.65%
Epoch 9/100 | Loss: 0.6915 | Acc: 51.68%
Epoch 10/100 | Loss: 0.6909 | Acc: 57.58%
Epoch 11/100 | Loss: 0.6897 | Acc: 51.56%
Epoch 12/100 | Loss: 0.6862 | Acc: 54.64%
Epoch 13/100 | Loss: 0.6687 | Acc: 58.35%
Epoch 14/100 | Loss: 0.6236 | Acc: 66.70%
Epoch 15/100 | Loss: 0.6093 | Acc: 67.57%
Epoch 16/100 | Loss: 0.6031 | Acc: 67.63%
Epoch 17/100 | Loss: 0.5995 | Acc: 68.36%
Epoch 18/100 | Loss: 0.5916 | Acc: 68.86%
Epoch 19/100 | Loss: 0.5884 | Acc: 68.99%
Epoch 20/100 | Loss: 0.5868 | Acc: 69.58%
Epoch 21/100 | Loss: 0.5812 | Acc: 69.54%
Epoch 22/100 | Loss: 0.5780 | Acc: 69.87%
Epoch 23/100 | Loss: 0.5768 | Acc: 69.96%
Epoch 24/100 | Loss: 0

In [10]:
# ---------------------------------------------------------------
# 9) Evaluate on Test Set  +  Confusion Matrix  +  ROC-AUC
# ---------------------------------------------------------------
import torch.nn.functional as F
from sklearn.metrics import (accuracy_score, precision_score, recall_score,
                             f1_score, roc_auc_score, confusion_matrix)

model.eval()
all_preds, all_true, all_scores = [], [], []          # ← new list for AUC scores

with torch.no_grad():
    for Xb, yb in test_loader:
        logits = model(Xb)                            # shape: [batch, 2]

        # ── probability that each sample is class-1 (Trojan) ──
        prob_pos = F.softmax(logits, dim=1)[:, 1]     # if model uses softmax
        # prob_pos = torch.sigmoid(logits).squeeze()  # use this instead if model ends with a single-logit sigmoid layer

        _, predicted = torch.max(logits, 1)

        all_scores.append(prob_pos.cpu().numpy())     # ← collect scores
        all_preds.append(predicted.cpu().numpy())
        all_true.append(yb.cpu().numpy())

all_scores = np.concatenate(all_scores)               # ← for roc_auc_score
all_preds  = np.concatenate(all_preds)
all_true   = np.concatenate(all_true)

# Metrics
acc  = accuracy_score(all_true, all_preds)  * 100
prec = precision_score(all_true, all_preds, zero_division=0) * 100
rec  = recall_score(all_true, all_preds, zero_division=0) * 100
f1   = f1_score(all_true, all_preds, zero_division=0) * 100
auc  = roc_auc_score(all_true, all_scores) * 100       # ← NEW

# Display
print("\n✅ Final Test Results:")
print(f"AUC:        {auc:.2f}%")
print(f"Accuracy:   {acc:.2f}%")
print(f"Precision:  {prec:.2f}%")
print(f"Recall:     {rec:.2f}%")
print(f"F1-score:   {f1:.2f}%")

cm = confusion_matrix(all_true, all_preds)
print("\nConfusion Matrix:\n", cm)



✅ Final Test Results:
AUC:        84.10%
Accuracy:   75.93%
Precision:  78.83%
Recall:     70.95%
F1-score:   74.68%

Confusion Matrix:
 [[1615  381]
 [ 581 1419]]
