In [None]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

Looking in indexes: https://download.pytorch.org/whl/cu118
INFO: pip is looking at multiple versions of torch to determine which version is compatible with other requirements. This could take a while.
Collecting torch
  Downloading https://download.pytorch.org/whl/cu118/torch-2.7.0%2Bcu118-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (28 kB)
Collecting sympy>=1.13.3 (from torch)
  Downloading https://download.pytorch.org/whl/sympy-1.13.3-py3-none-any.whl.metadata (12 kB)
Collecting nvidia-cuda-nvrtc-cu11==11.8.89 (from torch)
  Downloading https://download.pytorch.org/whl/cu118/nvidia_cuda_nvrtc_cu11-11.8.89-py3-none-manylinux1_x86_64.whl (23.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.2/23.2 MB[0m [31m67.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-runtime-cu11==11.8.89 (from torch)
  Downloading https://download.pytorch.org/whl/cu118/nvidia_cuda_runtime_cu11-11.8.89-py3-none-manylinux1_x86_64.whl (875 kB)
[2K     [90m━━━━━━━━━━━━━━━━

In [None]:
!pip install pytorch-tabnet

Collecting pytorch-tabnet
  Downloading pytorch_tabnet-4.1.0-py3-none-any.whl.metadata (15 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.3->pytorch-tabnet)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.3->pytorch-tabnet)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.3->pytorch-tabnet)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.3->pytorch-tabnet)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.3->pytorch-tabnet)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 

In [None]:
import torch
from pytorch_tabnet.tab_model import TabNetClassifier

print("Torch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())


Torch version: 2.6.0+cu124
CUDA available: True


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from pytorch_tabnet.tab_model import TabNetClassifier
from sklearn.preprocessing import LabelEncoder

In [None]:
netphish = pd.read_csv('/content/NetPhishV4.csv')

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import RFE

# Step 1: Don't convert to .values so we keep column names
X = netphish.drop('class', axis=1)
y = netphish['class']

# Step 2: Encode target
le = LabelEncoder()
y = le.fit_transform(y)

# Step 3: Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_scaled_df = pd.DataFrame(X_scaled, columns=X.columns)

# Step 4: Train-test split
X_train_full, X_test_full, y_train, y_test = train_test_split(X_scaled_df, y, test_size=0.2, random_state=42)

# Step 5: RFE with RandomForest
model = RandomForestClassifier(random_state=42)
rfe = RFE(model, n_features_to_select=20)
rfe.fit(X_train_full, y_train)

# Step 6: Select top features
selected_features = X.columns[rfe.support_]
X_train = X_train_full[selected_features]
X_test = X_test_full[selected_features]

# Step 7: Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

train_ds = TensorDataset(X_train_tensor, y_train_tensor)
test_ds = TensorDataset(X_test_tensor, y_test_tensor)
train_dl = DataLoader(train_ds, batch_size=64, shuffle=True)
test_dl = DataLoader(test_ds, batch_size=64)

In [None]:
class MLP(nn.Module):
    def __init__(self, input_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.out = nn.Linear(32, 2)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.out(x)

model = MLP(X_train.shape[1])
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(20):
    model.train()
    for xb, yb in train_dl:
        optimizer.zero_grad()
        preds = model(xb)
        loss = criterion(preds, yb)
        loss.backward()
        optimizer.step()

# Evaluation
model.eval()
with torch.no_grad():
    preds = model(X_test_tensor)
    y_pred = preds.argmax(dim=1).numpy()

print("MLP Performance:")
print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print(f"Precision: {precision_score(y_test, y_pred):.4f}")
print(f"Recall: {recall_score(y_test, y_pred):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred):.4f}")


MLP Performance:
Accuracy: 0.9371
Precision: 0.9448
Recall: 0.9279
F1 Score: 0.9363


In [None]:
class CNN1D(nn.Module):
    def __init__(self, input_dim):
        super(CNN1D, self).__init__()
        self.conv1 = nn.Conv1d(1, 16, kernel_size=3)
        self.conv2 = nn.Conv1d(16, 32, kernel_size=3)
        self.fc1 = nn.Linear((input_dim - 4) * 32, 64)
        self.out = nn.Linear(64, 2)

    def forward(self, x):
        x = x.unsqueeze(1)  # Add channel dimension
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        return self.out(x)

cnn_model = CNN1D(X_train.shape[1])
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn_model.parameters(), lr=0.001)

# Training CNN
for epoch in range(20):
    cnn_model.train()
    for xb, yb in train_dl:
        optimizer.zero_grad()
        preds = cnn_model(xb)
        loss = criterion(preds, yb)
        loss.backward()
        optimizer.step()

# Evaluation
cnn_model.eval()
with torch.no_grad():
    preds = cnn_model(X_test_tensor)
    y_pred = preds.argmax(dim=1).numpy()

print("1D CNN Performance:")
print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print(f"Precision: {precision_score(y_test, y_pred):.4f}")
print(f"Recall: {recall_score(y_test, y_pred):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred):.4f}")

1D CNN Performance:
Accuracy: 0.9463
Precision: 0.9523
Recall: 0.9392
F1 Score: 0.9457


In [None]:
tabnet_clf = TabNetClassifier()

tabnet_clf.fit(
    X_train=X_train.values,           # convert DataFrame to NumPy array
    y_train=y_train,                  # if y_train is already np.array or Series, it's fine
    eval_set=[(X_test.values, y_test)],
    eval_metric=['accuracy'],
    max_epochs=100,
    patience=10,
    batch_size=1024,
    virtual_batch_size=128,
    num_workers=0
)

y_pred = tabnet_clf.predict(X_test.values)

print("TabNet Performance:")
print(f"Accuracy:  {accuracy_score(y_test, y_pred):.4f}")
print(f"Precision: {precision_score(y_test, y_pred):.4f}")
print(f"Recall:    {recall_score(y_test, y_pred):.4f}")
print(f"F1 Score:  {f1_score(y_test, y_pred):.4f}")


epoch 0  | loss: 0.46767 | val_0_accuracy: 0.86769 |  0:00:03s
epoch 1  | loss: 0.32677 | val_0_accuracy: 0.87848 |  0:00:05s
epoch 2  | loss: 0.29461 | val_0_accuracy: 0.89095 |  0:00:07s
epoch 3  | loss: 0.28127 | val_0_accuracy: 0.8953  |  0:00:09s
epoch 4  | loss: 0.26281 | val_0_accuracy: 0.90361 |  0:00:10s
epoch 5  | loss: 0.24828 | val_0_accuracy: 0.90807 |  0:00:12s
epoch 6  | loss: 0.23748 | val_0_accuracy: 0.90747 |  0:00:14s
epoch 7  | loss: 0.23431 | val_0_accuracy: 0.91222 |  0:00:16s
epoch 8  | loss: 0.22705 | val_0_accuracy: 0.91687 |  0:00:18s
epoch 9  | loss: 0.22277 | val_0_accuracy: 0.91549 |  0:00:20s
epoch 10 | loss: 0.21611 | val_0_accuracy: 0.91094 |  0:00:22s
epoch 11 | loss: 0.20832 | val_0_accuracy: 0.91677 |  0:00:23s
epoch 12 | loss: 0.20609 | val_0_accuracy: 0.92261 |  0:00:25s
epoch 13 | loss: 0.2031  | val_0_accuracy: 0.92053 |  0:00:27s
epoch 14 | loss: 0.20131 | val_0_accuracy: 0.92301 |  0:00:29s
epoch 15 | loss: 0.20141 | val_0_accuracy: 0.92281 |  0



TabNet Performance:
Accuracy:  0.9382
Precision: 0.9471
Recall:    0.9279
F1 Score:  0.9374


In [None]:
# Cell C: Layer 1 - Enhanced Random Forest and XGBoost Base Models
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_predict
import numpy as np

# Initialize enhanced base models with better hyperparameters
rf_model = RandomForestClassifier(
    n_estimators=300,
    max_depth=20,
    min_samples_split=3,
    min_samples_leaf=1,
    max_features='sqrt',
    bootstrap=True,
    random_state=42,
    n_jobs=-1
)

xgb_model = XGBClassifier(
    n_estimators=300,
    max_depth=8,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    gamma=0.1,
    reg_alpha=0.1,
    reg_lambda=1,
    random_state=42,
    eval_metric='logloss'
)

# Train base models and get out-of-fold predictions (using 10-fold CV for better generalization)
# Use existing X_train and y_train from your preprocessing
rf_train_preds = cross_val_predict(rf_model, X_train, y_train, cv=10, method='predict_proba')
xgb_train_preds = cross_val_predict(xgb_model, X_train, y_train, cv=10, method='predict_proba')

# Fit models on full training set for test predictions
rf_model.fit(X_train, y_train)
xgb_model.fit(X_train, y_train)

# Get test predictions (both probability classes for richer information)
# Use existing X_test from your preprocessing
rf_test_preds = rf_model.predict_proba(X_test)
xgb_test_preds = xgb_model.predict_proba(X_test)

# Create enhanced stacked features with both probability classes
X_train_stacked = np.column_stack([
    rf_train_preds[:, 0], rf_train_preds[:, 1],
    xgb_train_preds[:, 0], xgb_train_preds[:, 1]
])

X_test_stacked = np.column_stack([
    rf_test_preds[:, 0], rf_test_preds[:, 1],
    xgb_test_preds[:, 0], xgb_test_preds[:, 1]
])

print(f"Stacked training features shape: {X_train_stacked.shape}")
print(f"Stacked test features shape: {X_test_stacked.shape}")

Stacked training features shape: (40417, 4)
Stacked test features shape: (10105, 4)


In [None]:
# Cell D: Layer 2 - Enhanced CNN 1D Meta-learner (using existing tensors structure)
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader

class Enhanced_CNN1D_MetaLearner(nn.Module):
    def __init__(self, input_dim):
        super(Enhanced_CNN1D_MetaLearner, self).__init__()
        # Enhanced convolution layers
        self.conv1 = nn.Conv1d(1, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(64, 128, kernel_size=3, padding=1)
        self.conv3 = nn.Conv1d(128, 256, kernel_size=2, padding=1)

        # Batch normalization for stable training
        self.bn1 = nn.BatchNorm1d(64)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(256)

        # Global pooling
        self.global_avg_pool = nn.AdaptiveAvgPool1d(1)
        self.global_max_pool = nn.AdaptiveMaxPool1d(1)

        # Enhanced fully connected layers
        self.fc1 = nn.Linear(512, 256)  # 256*2 for avg+max pooling
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)

        # Dropout for regularization
        self.dropout1 = nn.Dropout(0.3)
        self.dropout2 = nn.Dropout(0.4)
        self.dropout3 = nn.Dropout(0.2)

        self.out = nn.Linear(64, 2)

    def forward(self, x):
        x = x.unsqueeze(1)  # Add channel dimension

        # Convolutional layers with batch norm and activation
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.dropout1(x)

        x = F.relu(self.bn2(self.conv2(x)))
        x = self.dropout2(x)

        x = F.relu(self.bn3(self.conv3(x)))

        # Global pooling (both avg and max)
        avg_pool = self.global_avg_pool(x)
        max_pool = self.global_max_pool(x)

        # Concatenate pooled features
        x = torch.cat([avg_pool, max_pool], dim=1)
        x = x.view(x.size(0), -1)

        # Fully connected layers with dropout
        x = F.relu(self.fc1(x))
        x = self.dropout3(x)

        x = F.relu(self.fc2(x))
        x = self.dropout3(x)

        x = F.relu(self.fc3(x))

        return self.out(x)

# Convert stacked features to PyTorch tensors (following your existing pattern)
X_train_stacked_tensor = torch.tensor(X_train_stacked, dtype=torch.float32)
X_test_stacked_tensor = torch.tensor(X_test_stacked, dtype=torch.float32)

# Use existing y_train_tensor and y_test_tensor from your preprocessing
train_stacked_ds = TensorDataset(X_train_stacked_tensor, y_train_tensor)
test_stacked_ds = TensorDataset(X_test_stacked_tensor, y_test_tensor)
train_stacked_dl = DataLoader(train_stacked_ds, batch_size=64, shuffle=True)
test_stacked_dl = DataLoader(test_stacked_ds, batch_size=64)

# Initialize enhanced meta-learner
meta_cnn = Enhanced_CNN1D_MetaLearner(X_train_stacked.shape[1])

# Enhanced training setup
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(meta_cnn.parameters(), lr=0.001, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=15, factor=0.5)

# Training with early stopping
best_loss = float('inf')
patience_counter = 0
patience = 25

for epoch in range(100):
    meta_cnn.train()
    train_loss = 0

    for xb, yb in train_stacked_dl:
        optimizer.zero_grad()
        preds = meta_cnn(xb)
        loss = criterion(preds, yb)
        loss.backward()

        # Gradient clipping
        torch.nn.utils.clip_grad_norm_(meta_cnn.parameters(), max_norm=1.0)

        optimizer.step()
        train_loss += loss.item()

    # Validation
    meta_cnn.eval()
    val_loss = 0
    with torch.no_grad():
        for xb, yb in test_stacked_dl:
            preds = meta_cnn(xb)
            loss = criterion(preds, yb)
            val_loss += loss.item()

    avg_val_loss = val_loss / len(test_stacked_dl)
    scheduler.step(avg_val_loss)

    # Early stopping
    if avg_val_loss < best_loss:
        best_loss = avg_val_loss
        patience_counter = 0
        best_model_state = meta_cnn.state_dict().copy()
    else:
        patience_counter += 1

    if patience_counter >= patience:
        print(f"Early stopping at epoch {epoch+1}")
        break

    if (epoch + 1) % 20 == 0:
        print(f"Epoch {epoch+1}: Train Loss: {train_loss/len(train_stacked_dl):.4f}, Val Loss: {avg_val_loss:.4f}")

# Load best model
meta_cnn.load_state_dict(best_model_state)
print("Enhanced CNN 1D meta-learner training completed")

Epoch 20: Train Loss: 0.1168, Val Loss: 0.1156
Epoch 40: Train Loss: 0.1148, Val Loss: 0.1119
Epoch 60: Train Loss: 0.1151, Val Loss: 0.1112
Epoch 80: Train Loss: 0.1142, Val Loss: 0.1124
Early stopping at epoch 88
Enhanced CNN 1D meta-learner training completed


In [None]:
# Cell E: Evaluation and Metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# Get final predictions using existing y_test from your preprocessing
meta_cnn.eval()
with torch.no_grad():
    final_preds = meta_cnn(X_test_stacked_tensor)
    y_pred_final = final_preds.argmax(dim=1).numpy()

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred_final)
precision = precision_score(y_test, y_pred_final)
recall = recall_score(y_test, y_pred_final)
f1 = f1_score(y_test, y_pred_final)

print("=== ENHANCED STACKING MODEL PERFORMANCE ===")
print(f"Accuracy:  {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"F1-Score:  {f1:.4f}")
print("\n=== Detailed Classification Report ===")
print(classification_report(y_test, y_pred_final))

=== ENHANCED STACKING MODEL PERFORMANCE ===
Accuracy:  0.9567
Precision: 0.9590
Recall:    0.9537
F1-Score:  0.9564

=== Detailed Classification Report ===
              precision    recall  f1-score   support

           0       0.95      0.96      0.96      5071
           1       0.96      0.95      0.96      5034

    accuracy                           0.96     10105
   macro avg       0.96      0.96      0.96     10105
weighted avg       0.96      0.96      0.96     10105

