In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import (accuracy_score, precision_score, recall_score,
                           f1_score, roc_auc_score, average_precision_score,
                           classification_report, confusion_matrix)
from sklearn.ensemble import IsolationForest
from torch.utils.data import DataLoader, TensorDataset, Dataset

import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)

In [2]:
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()

In [3]:
print("Torch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
print("CUDA version:", torch.version.cuda)

Torch version: 2.5.1+cu121
CUDA available: True
CUDA version: 12.1


In [4]:
import pandas as pd

df = pd.read_parquet("cicdarknet2020.parquet", engine="fastparquet")
df.info()
df['Label'].value_counts()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 103121 entries, 0 to 103120
Data columns (total 79 columns):
 #   Column                      Non-Null Count   Dtype   
---  ------                      --------------   -----   
 0   Protocol                    103121 non-null  int8    
 1   Flow Duration               103121 non-null  int32   
 2   Total Fwd Packet            103121 non-null  int32   
 3   Total Bwd packets           103121 non-null  int32   
 4   Total Length of Fwd Packet  103121 non-null  int32   
 5   Total Length of Bwd Packet  103121 non-null  int32   
 6   Fwd Packet Length Max       103121 non-null  int32   
 7   Fwd Packet Length Min       103121 non-null  int16   
 8   Fwd Packet Length Mean      103121 non-null  float32 
 9   Fwd Packet Length Std       103121 non-null  float32 
 10  Bwd Packet Length Max       103121 non-null  int32   
 11  Bwd Packet Length Min       103121 non-null  int16   
 12  Bwd Packet Length Mean      103121 non-null  float32 
 13 

Label
Non-Tor    64804
NonVPN     20216
VPN        16922
Tor         1179
Name: count, dtype: int64

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split


print("=== INITIAL DATA INSPECTION ===")
print(f"DataFrame shape: {df.shape}")
print(f"Memory usage: {df.memory_usage().sum() / 1024 / 1024:.2f} MB")

# Let's check the actual dtypes more carefully
print("\n=== DATA TYPES DETAILED ===")
print("Label column info:")
print(f"  dtype: {df['Label'].dtype}")
print(f"  type of dtype: {type(df['Label'].dtype)}")
print(f"  is categorical? {pd.api.types.is_categorical_dtype(df['Label'])}")
print(f"  is string? {pd.api.types.is_string_dtype(df['Label'])}")

print("\nLabel.1 column info:")
print(f"  dtype: {df['Label.1'].dtype}")
print(f"  type of dtype: {type(df['Label.1'].dtype)}")
print(f"  is categorical? {pd.api.types.is_categorical_dtype(df['Label.1'])}")
print(f"  is string? {pd.api.types.is_string_dtype(df['Label.1'])}")

# DECISION TIME: Which label to use?
print("\n=== DECIDING WHICH LABEL TO USE ===")
print("Based on your output:")
print("1. Label column: Has values 0, 1, 2, 3 (4 classes)")
print("2. Label.1 column: Has actual names like 'Browsing', 'P2P', etc.")

# Let me check if Label might be encoded already
print("\nChecking if Label is already encoded...")
# Get a mapping by sampling
sample_size = min(100, len(df))
sample = df[['Label', 'Label.1']].sample(sample_size)
for _, row in sample.iterrows():
    print(f"Label: {row['Label']} -> Label.1: {row['Label.1']}")



# Let's create a proper mapping
print("\n=== CREATING PROPER LABEL MAPPING ===")
# Group by Label and see what Label.1 values correspond
label_mapping_df = df.groupby('Label')['Label.1'].agg(['first', 'nunique', lambda x: list(x.unique())[:5]])
label_mapping_df.columns = ['most_common', 'num_unique', 'sample_values']
print(label_mapping_df)

# If Label is already encoded and Label.1 has the names, use Label.1
print("\nBased on analysis, I recommend using Label.1 as it has the actual class names")

# Clean up the data
print("\n=== DATA CLEANING ===")
print("Checking for duplicate columns...")
duplicate_columns = df.columns[df.columns.duplicated()]
print(f"Duplicate columns: {list(duplicate_columns)}")

# Check for constant columns
constant_columns = [col for col in df.columns if df[col].nunique() == 1]
print(f"Constant columns: {constant_columns}")

if constant_columns:
    df = df.drop(columns=constant_columns)
    print(f"Dropped constant columns: {constant_columns}")

# Handle missing/infinite values
print("\n=== HANDLING MISSING/INFINITE VALUES ===")
numeric_cols = df.select_dtypes(include=[np.number]).columns

for col in numeric_cols:
    df[col] = df[col].replace([np.inf, -np.inf], np.nan)

nan_counts = df.isnull().sum()
if nan_counts.any():
    nan_cols = nan_counts[nan_counts > 0].index.tolist()
    print(f"Columns with NaN: {nan_cols}")
    
    for col in numeric_cols:
        if df[col].isnull().any():
            df[col] = df[col].fillna(df[col].median())

# Now process the labels
print("\n=== LABEL PROCESSING ===")
# Use Label.1 since it has the actual names
df['Label_original'] = df['Label.1'].astype(str)

# Clean the labels
df['Label_cleaned'] = df['Label_original'].str.lower().str.strip()

# Check cleaned labels
print("\nCleaned label distribution:")
cleaned_counts = df['Label_cleaned'].value_counts()
for label, count in cleaned_counts.items():
    proportion = count / len(df) * 100
    print(f"  '{label}': {count} samples ({proportion:.2f}%)")

# Encode labels
label_encoder = LabelEncoder()
df['Label_encoded'] = label_encoder.fit_transform(df['Label_cleaned'])

print("\n=== FINAL LABEL ENCODING ===")
print("Class mapping:")
for i, label in enumerate(label_encoder.classes_):
    count = (df['Label_cleaned'] == label).sum()
    proportion = count / len(df) * 100
    print(f"  {i}: '{label}' - {count} samples ({proportion:.2f}%)")

label_mapping = dict(zip(range(len(label_encoder.classes_)), label_encoder.classes_))

# Prepare features
print("\n=== FEATURE PREPARATION ===")
# Exclude all label-related columns
label_cols = ['Label', 'Label.1', 'Label_original', 'Label_cleaned', 'Label_encoded']
exclude_cols = [col for col in label_cols if col in df.columns]

feature_cols = [col for col in df.columns if col not in exclude_cols]

X = df[feature_cols]
y = df['Label_encoded']

print(f"Features shape: {X.shape}")
print(f"Labels shape: {y.shape}")

# Scale features
print("\n=== FEATURE SCALING ===")
scaler = StandardScaler()
X_scaled = X.copy()
numeric_features = X.select_dtypes(include=[np.number]).columns

if len(numeric_features) > 0:
    X_scaled[numeric_features] = scaler.fit_transform(X[numeric_features])
    print(f"Scaled {len(numeric_features)} numeric features")
else:
    print("No numeric features to scale")

# Split data
print("\n=== DATA SPLITTING ===")
class_counts = y.value_counts()
print("Class distribution:")
for class_id, count in class_counts.items():
    class_name = label_encoder.inverse_transform([class_id])[0]
    proportion = count / len(y) * 100
    print(f"  {class_id}: '{class_name}' - {count} samples ({proportion:.2f}%)")

# Use stratification if possible
if class_counts.min() >= 2:
    print("\nUsing stratified split")
    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y, test_size=0.2, random_state=42, stratify=y
    )
else:
    print("\nUsing random split (some classes have < 2 samples)")
    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y, test_size=0.2, random_state=42
    )

print(f"\nTraining set: {X_train.shape[0]} samples")
print(f"Testing set: {X_test.shape[0]} samples")

# Save processed data
print("\n=== SAVING DATA ===")
import pickle
from datetime import datetime

preprocessed_data = {
    'X_train': X_train,
    'X_test': X_test,
    'y_train': y_train,
    'y_test': y_test,
    'scaler': scaler,
    'label_encoder': label_encoder,
    'feature_names': X_train.columns.tolist(),
    'label_mapping': label_mapping,
    'num_classes': len(label_encoder.classes_)
}




#Quick Summary 
print("\nData preprocessing completed successfully.")   

=== INITIAL DATA INSPECTION ===
DataFrame shape: (103121, 79)
Memory usage: 23.60 MB

=== DATA TYPES DETAILED ===
Label column info:
  dtype: category
  type of dtype: <class 'pandas.core.dtypes.dtypes.CategoricalDtype'>
  is categorical? True
  is string? True

Label.1 column info:
  dtype: category
  type of dtype: <class 'pandas.core.dtypes.dtypes.CategoricalDtype'>
  is categorical? True
  is string? True

=== DECIDING WHICH LABEL TO USE ===
Based on your output:
1. Label column: Has values 0, 1, 2, 3 (4 classes)
2. Label.1 column: Has actual names like 'Browsing', 'P2P', etc.

Checking if Label is already encoded...
Label: Non-Tor -> Label.1: P2P
Label: Non-Tor -> Label.1: Browsing
Label: VPN -> Label.1: Audio-Streaming
Label: Non-Tor -> Label.1: Browsing
Label: VPN -> Label.1: Chat
Label: NonVPN -> Label.1: Chat
Label: NonVPN -> Label.1: Audio-Streaming
Label: Non-Tor -> Label.1: Browsing
Label: Non-Tor -> Label.1: P2P
Label: Tor -> Label.1: VOIP
Label: Non-Tor -> Label.1: Browsi

In [39]:
df['is_encrypted'] = df['Label'].apply(lambda x: 1 if x in ['Tor','VPN'] else 0)
df['is_encrypted'] = df['is_encrypted'].astype(int)
df['is_encrypted'].value_counts()
X = df.drop(columns=['Label', 'is_encrypted'])
y_multiclass = df['Label']         # for multiclass classification
y_binary = df['is_encrypted']      # for encrypted vs non-encrypted

In [40]:
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
import numpy as np

# Assuming df['Label_encoded'] exists
X = X_scaled  # scaled numeric features
y = df['Label_encoded'].values  # integer-encoded labels

# Split train/test (stratified)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y_binary
)

print("Class distribution before SMOTE:")
unique, counts = np.unique(y_train, return_counts=True)
for u, c in zip(unique, counts):
    print(f"  Class {u}: {c} samples")

# Apply SMOTE to training set
smote = SMOTE(random_state=42)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)

print("\nClass distribution after SMOTE:")
unique_res, counts_res = np.unique(y_train_res, return_counts=True)
for u, c in zip(unique_res, counts_res):
    print(f"  Class {u}: {c} samples")


Class distribution before SMOTE:
  Class 0: 8987 samples
  Class 1: 23732 samples
  Class 2: 8321 samples
  Class 3: 4405 samples
  Class 4: 8603 samples
  Class 5: 18782 samples
  Class 6: 7206 samples
  Class 7: 2460 samples

Class distribution after SMOTE:
  Class 0: 23732 samples
  Class 1: 23732 samples
  Class 2: 23732 samples
  Class 3: 23732 samples
  Class 4: 23732 samples
  Class 5: 23732 samples
  Class 6: 23732 samples
  Class 7: 23732 samples


In [41]:
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
import numpy as np

# Binary target
y_binary = df['is_encrypted'].values  # 0 = non-encrypted, 1 = Tor/VPN

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_binary, 
    test_size=0.2, 
    random_state=42,
    stratify=y_binary
)


print("Class distribution before SMOTE (binary):")
unique, counts = np.unique(y_binary, return_counts=True)
for u, c in zip(unique, counts):
    label_name = "Encrypted" if u == 1 else "Non-encrypted"
    print(f"  {label_name}: {c} samples")

# Apply SMOTE to training set
smote_bin = SMOTE(random_state=42)
X_train_bin_res, y_train_bin_res = smote_bin.fit_resample(X_train, y_train)

print("\nClass distribution after SMOTE (binary):")
unique_res, counts_res = np.unique(y_train_bin_res, return_counts=True)
for u, c in zip(unique_res, counts_res):
    label_name = "Encrypted" if u == 1 else "Non-encrypted"
    print(f"  {label_name}: {c} samples")


Class distribution before SMOTE (binary):
  Non-encrypted: 85020 samples
  Encrypted: 18101 samples

Class distribution after SMOTE (binary):
  Non-encrypted: 68015 samples
  Encrypted: 68015 samples


In [42]:
# Train IF on all training data (or only normal traffic if you have labels)
if_model = IsolationForest(n_estimators=100, contamination=0.05, random_state=42)
if_model.fit(X_train)

# Get anomaly scores (more negative = more anomalous)
if_scores_train = if_model.decision_function(X_train)
if_scores_test = if_model.decision_function(X_test)

# Add IF score as an additional feature
X_train_if = np.hstack([X_train, if_scores_train.reshape(-1,1)])
X_test_if = np.hstack([X_test, if_scores_test.reshape(-1,1)])


In [43]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_binary, 
    test_size=0.2, 
    random_state=42,
    stratify=y_binary
)

print("Train encrypted count:", sum(y_train))
print("Test encrypted count:", sum(y_test))

print("Unique values in y_binary:", np.unique(y_binary))



Train encrypted count: 14481
Test encrypted count: 3620
Unique values in y_binary: [0 1]


In [44]:
from sklearn.metrics import roc_auc_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.ensemble import IsolationForest
import numpy as np

# Train IF only on normal samples (y_train == 0)
if_model = IsolationForest(contamination=0.05, random_state=42)
if_model.fit(X_train[y_train == 0])

# anomaly scores (more negative = more anomalous)
if_scores_train = -if_model.decision_function(X_train)
if_scores_test = -if_model.decision_function(X_test)

# pick threshold = 95th percentile of normal scores
threshold_if = np.percentile(if_scores_train[y_train == 0], 95)
if_pred = (if_scores_test > threshold_if).astype(int)

print("\n=== ISOLATION FOREST ===")
print("Accuracy:", accuracy_score(y_test, if_pred))
print("ROC-AUC:", roc_auc_score(y_test, if_scores_test))
print("Precision:", precision_score(y_test, if_pred))
print("Recall:", recall_score(y_test, if_pred))
print("F1-score:", f1_score(y_test, if_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, if_pred))



=== ISOLATION FOREST ===
Accuracy: 0.8000484848484849
ROC-AUC: 0.6621479220443776
Precision: 0.28716216216216217
Recall: 0.09392265193370165
F1-score: 0.14154870940882597
Confusion Matrix:
 [[16161   844]
 [ 3280   340]]


In [52]:
X_train_if = np.hstack([X_train, if_scores_train.reshape(-1,1)])
X_test_if = np.hstack([X_test, if_scores_test.reshape(-1,1)])
from tensorflow.keras import layers, models, Model

input_dim = X_train_if.shape[1]
reconstruction_error = np.mean(np.square(X_test_if - X_test_pred), axis=1)


input_layer = layers.Input(shape=(input_dim,))
x = layers.Dense(64, activation='relu')(input_layer)
x = layers.Dense(32, activation='relu')(x)
latent = layers.Dense(16, activation='relu')(x)

x = layers.Dense(32, activation='relu')(latent)
x = layers.Dense(64, activation='relu')(x)
output = layers.Dense(input_dim, activation='linear')(x)

autoencoder = Model(input_layer, output)
autoencoder.compile(optimizer='adam', loss='mse')

autoencoder.fit(
    X_train_if[y_train == 0], X_train_if[y_train == 0],
    epochs=40,
    batch_size=256,
    validation_split=0.1,
    verbose=1
)
# 4️⃣ Predict reconstruction for test set
X_test_pred = autoencoder.predict(X_test_if)

# 5️⃣ Compute reconstruction error
reconstruction_error = np.mean(np.square(X_test_if - X_test_pred), axis=1)

Epoch 1/40
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - loss: 0.4918 - val_loss: 0.3173
Epoch 2/40
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.2482 - val_loss: 0.2205
Epoch 3/40
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.1868 - val_loss: 0.1879
Epoch 4/40
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.1603 - val_loss: 0.1906
Epoch 5/40
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.1496 - val_loss: 0.1402
Epoch 6/40
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.1317 - val_loss: 0.1262
Epoch 7/40
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.1138 - val_loss: 0.1291
Epoch 8/40
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.1071 - val_loss: 0.1154
Epoch 9/40
[1m240/240[0m [32m━━━━━━━━

In [34]:
X_test_pred = autoencoder.predict(X_test_if)
test_rec_err = np.mean((X_test_if - X_test_pred)**2, axis=1)

X_train_pred = autoencoder.predict(X_train_if[y_train == 0])
train_rec_err = np.mean((X_train_if[y_train == 0] - X_train_pred)**2, axis=1)

threshold_ae = np.percentile(train_rec_err, 95)
ae_pred = (test_rec_err > threshold_ae).astype(int)
print("\n=== AUTOENCODER ===")
print("Accuracy:", accuracy_score(y_test, ae_pred))
print("ROC-AUC:", roc_auc_score(y_test, test_rec_err))
print("Precision:", precision_score(y_test, ae_pred))
print("Recall:", recall_score(y_test, ae_pred))
print("F1-score:", f1_score(y_test, ae_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, ae_pred))


[1m645/645[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 583us/step
[1m2126/2126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 639us/step

=== AUTOENCODER ===
Accuracy: 0.8111030303030303
ROC-AUC: 0.5614007579831086
Precision: 0.3968609865470852
Recall: 0.14668508287292817
F1-score: 0.21419927390076643
Confusion Matrix:
 [[16198   807]
 [ 3089   531]]


In [45]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#Deep SVDD implementation in PyTorch
# --- Encoder network (small MLP) ---
class Encoder(nn.Module):
    def __init__(self, input_dim, hidden=[128,64], out_dim=32):
        super().__init__()
        layers = []
        prev = input_dim
        for h in hidden:
            layers.append(nn.Linear(prev, h))
            layers.append(nn.ReLU())
            prev = h
        layers.append(nn.Linear(prev, out_dim))
        self.net = nn.Sequential(*layers)
    def forward(self, x):
        return self.net(x)

# --- Deep SVDD trainer ---
class DeepSVDD:
    def __init__(self, input_dim, c=None, nu=0.1, lr=1e-3):
        self.encoder = Encoder(input_dim).to(device)
        self.c = c  # center (torch tensor) or None -> init from data
        self.nu = nu
        self.optimizer = optim.Adam(self.encoder.parameters(), lr=lr)
        self.criterion = lambda z, c: ((z - c)**2).sum(dim=1)  # per-sample squared dist

    def init_center_c(self, loader):
        # initialize center as mean of encoder outputs on normal data
        self.encoder.eval()
        n = 0
        c_sum = None
        with torch.no_grad():
            for x in loader:
                x = x[0].to(device).float()
                z = self.encoder(x)
                if c_sum is None:
                    c_sum = z.sum(dim=0)
                else:
                    c_sum += z.sum(dim=0)
                n += z.size(0)
        c = c_sum / n
        # avoid components too close to zero
        c[(abs(c) < 1e-6)] = 1e-6
        self.c = c

    def train(self, loader, epochs=50):
        if self.c is None:
            self.init_center_c(loader)
        self.encoder.train()
        for ep in range(epochs):
            epoch_loss = 0.0
            for x, in loader:
                x = x.to(device).float()
                z = self.encoder(x)
                dist = self.criterion(z, self.c)
                loss = dist.mean()  # minimize avg distance of normal samples
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()
                epoch_loss += loss.item() * x.size(0)
            # print progress
            if ep % 5 == 0:
                print(f"DeepSVDD epoch {ep}/{epochs} loss: {epoch_loss/len(loader.dataset):.6f}")

    def score(self, X):  # X: numpy array
        self.encoder.eval()
        ds = TensorDataset(torch.from_numpy(X).float())
        loader = DataLoader(ds, batch_size=1024, shuffle=False)
        scores = []
        with torch.no_grad():
            for batch in loader:
                x = batch[0].to(device)
                z = self.encoder(x)
                dist = ((z - self.c) ** 2).sum(dim=1)  # per-sample
                scores.append(dist.cpu().numpy())
        return np.concatenate(scores)


In [49]:
# Train DeepSVDD on *normal* training samples only (y_train==0)
# Convert DataFrame → NumPy → Tensor
X_train_normal_np = X_train[y_train == 0].to_numpy().astype("float32")
X_test_np = X_test.to_numpy().astype("float32")


ds = TensorDataset(torch.from_numpy(X_train_normal_np).float())
loader = DataLoader(ds, batch_size=1024, shuffle=True)

svdd = DeepSVDD(input_dim=X_train.shape[1])
svdd.init_center_c(DataLoader(TensorDataset(torch.from_numpy(X_train_normal_np).float()), batch_size=1024))
svdd.train(loader, epochs=50)

# get scores on test set (higher = more anomalous)
# Convert X_test and X_train to NumPy arrays
X_test_np = X_test.to_numpy().astype("float32")
X_train_np = X_train.to_numpy().astype("float32")

# Then score
svdd_scores_test = svdd.score(X_test_np)
svdd_scores_train = svdd.score(X_train_np)  # optional for thresholding
 # optional for thresholding


DeepSVDD epoch 0/50 loss: 0.023820
DeepSVDD epoch 5/50 loss: 0.000158
DeepSVDD epoch 10/50 loss: 0.000060
DeepSVDD epoch 15/50 loss: 0.000040
DeepSVDD epoch 20/50 loss: 0.000021
DeepSVDD epoch 25/50 loss: 0.000015
DeepSVDD epoch 30/50 loss: 0.000012
DeepSVDD epoch 35/50 loss: 0.000010
DeepSVDD epoch 40/50 loss: 0.000006
DeepSVDD epoch 45/50 loss: 0.000006


In [50]:
# Threshold for binary prediction
# Usually you can set it based on 95th percentile of training normal scores
threshold = np.percentile(svdd_scores_train[y_train == 0], 95)
svdd_pred = (svdd_scores_test > threshold).astype(int)

# Evaluation metrics
y_test_binary = (y_test != 0).astype(int)  # treat all non-normal as anomaly
print("=== DeepSVDD ===")
print(f"Accuracy: {accuracy_score(y_test_binary, svdd_pred):.4f}")
print(f"ROC-AUC: {roc_auc_score(y_test_binary, svdd_scores_test):.4f}")
print(f"Precision: {precision_score(y_test_binary, svdd_pred):.4f}")
print(f"Recall: {recall_score(y_test_binary, svdd_pred):.4f}")
print(f"F1-score: {f1_score(y_test_binary, svdd_pred):.4f}")
print("Confusion Matrix:")
print(confusion_matrix(y_test_binary, svdd_pred))

=== DeepSVDD ===
Accuracy: 0.8127
ROC-AUC: 0.6597
Precision: 0.4209
Recall: 0.1793
F1-score: 0.2515
Confusion Matrix:
[[16112   893]
 [ 2971   649]]


In [51]:
import numpy as np
from scipy.stats import mode

# Suppose you have binary predictions from each model:
pred_if = if_pred
pred_ae = ae_pred
pred_svdd = svdd_pred

# Stack predictions
pred_stack = np.vstack([pred_if, pred_ae, pred_svdd]).T

# Majority vote
ensemble_pred = mode(pred_stack, axis=1).mode.flatten()

# Evaluate
print("=== Ensemble (Majority Vote) ===")
print(f"Accuracy: {accuracy_score(y_test_binary, ensemble_pred):.4f}")
print(f"Precision: {precision_score(y_test_binary, ensemble_pred):.4f}")
print(f"Recall: {recall_score(y_test_binary, ensemble_pred):.4f}")
print(f"F1-score: {f1_score(y_test_binary, ensemble_pred):.4f}")
print("Confusion Matrix:")
print(confusion_matrix(y_test_binary, ensemble_pred))


=== Ensemble (Majority Vote) ===
Accuracy: 0.8151
Precision: 0.4130
Recall: 0.1265
F1-score: 0.1937
Confusion Matrix:
[[16354   651]
 [ 3162   458]]


In [53]:
from sklearn.ensemble import RandomForestClassifier

# Use anomaly scores from base models as features
X_meta_train = np.vstack([if_scores_test, reconstruction_error, svdd_scores_test]).T
y_meta_train = y_test_binary  # binary labels

meta_model = RandomForestClassifier(n_estimators=100, random_state=42)
meta_model.fit(X_meta_train, y_meta_train)

meta_pred = meta_model.predict(X_meta_train)

print("=== Stacking Ensemble (Random Forest) ===")
print(f"Accuracy: {accuracy_score(y_meta_train, meta_pred):.4f}")
print(f"Precision: {precision_score(y_meta_train, meta_pred):.4f}")
print(f"Recall: {recall_score(y_meta_train, meta_pred):.4f}")
print(f"F1-score: {f1_score(y_meta_train, meta_pred):.4f}")
print("Confusion Matrix:")
print(confusion_matrix(y_meta_train, meta_pred))


=== Stacking Ensemble (Random Forest) ===
Accuracy: 0.9991
Precision: 0.9992
Recall: 0.9956
F1-score: 0.9974
Confusion Matrix:
[[17002     3]
 [   16  3604]]


In [60]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# --- Step 1: Ensure DeepSVDD uses correct input dimension ---
input_dim = X_train_if.shape[1]  # include IF score if added
svdd = DeepSVDD(input_dim=input_dim)
X_train_normal_np = X_train_if[y_train == 0].astype("float32")
loader = DataLoader(TensorDataset(torch.from_numpy(X_train_normal_np).float()), batch_size=1024, shuffle=True)
svdd.init_center_c(loader)
svdd.train(loader, epochs=50)

# --- Step 2: Compute base model scores on test set ---
# IF scores
if_scores_test = -if_model.decision_function(X_test_if)

# Autoencoder reconstruction error
X_test_pred = autoencoder.predict(X_test_if)
reconstruction_error = np.mean(np.square(X_test_if - X_test_pred), axis=1)

# DeepSVDD scores
X_test_np = X_test_if.astype("float32")
svdd_scores_test = svdd.score(X_test_np)

# --- Step 3: Stack as meta-features ---
X_meta_test = np.vstack([if_scores_test, reconstruction_error, svdd_scores_test]).T
y_meta_test = y_test  # binary labels

# --- Step 4: Train meta-classifier ---
meta_model = RandomForestClassifier(n_estimators=100, random_state=42)
# For proper training, compute meta-features on the training set as well:
if_scores_train = -if_model.decision_function(X_train_if)
X_train_pred = autoencoder.predict(X_train_if)
reconstruction_error_train = np.mean(np.square(X_train_if - X_train_pred), axis=1)
X_train_np = X_train_if.astype("float32")
svdd_scores_train = svdd.score(X_train_np)

X_meta_train = np.vstack([if_scores_train, reconstruction_error_train, svdd_scores_train]).T
y_meta_train = y_train

meta_model.fit(X_meta_train, y_meta_train)

# --- Step 5: Evaluate stacking ensemble ---
meta_pred = meta_model.predict(X_meta_test)

print("=== Stacking Ensemble (Random Forest) ===")
print(f"Accuracy: {accuracy_score(y_meta_test, meta_pred):.4f}")
print(f"Precision: {precision_score(y_meta_test, meta_pred):.4f}")
print(f"Recall: {recall_score(y_meta_test, meta_pred):.4f}")
print(f"F1-score: {f1_score(y_meta_test, meta_pred):.4f}")
print("Confusion Matrix:\n", confusion_matrix(y_meta_test, meta_pred))


DeepSVDD epoch 0/50 loss: 0.018432
DeepSVDD epoch 5/50 loss: 0.000139
DeepSVDD epoch 10/50 loss: 0.000055
DeepSVDD epoch 15/50 loss: 0.000032
DeepSVDD epoch 20/50 loss: 0.000021
DeepSVDD epoch 25/50 loss: 0.000015
DeepSVDD epoch 30/50 loss: 0.000011
DeepSVDD epoch 35/50 loss: 0.000009
DeepSVDD epoch 40/50 loss: 0.000006
DeepSVDD epoch 45/50 loss: 0.000004
[1m645/645[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 595us/step
[1m2578/2578[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 538us/step
=== Stacking Ensemble (Random Forest) ===
Accuracy: 0.9452
Precision: 0.8687
Recall: 0.8099
F1-score: 0.8383
Confusion Matrix:
 [[16562   443]
 [  688  2932]]


In [None]:
X_meta_train = np.vstack([if_scores_train, reconstruction_error, svdd_scores_train]).T
X_meta_test = np.vstack([if_scores_test, reconstruction_error_test, svdd_scores_test]).T

import xgboost as xgb
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

meta_model = xgb.XGBClassifier(
    n_estimators=200,
    max_depth=3,
    learning_rate=0.1,
    random_state=42,
    use_label_encoder=False,
    eval_metric='logloss'
)

meta_model.fit(X_meta_train, y_train_binary)
meta_pred = meta_model.predict(X_meta_test)

print("=== Stacking Ensemble (XGBoost) ===")
print(f"Accuracy: {accuracy_score(y_test_binary, meta_pred):.4f}")
print(f"Precision: {precision_score(y_test_binary, meta_pred):.4f}")
print(f"Recall: {recall_score(y_test_binary, meta_pred):.4f}")
print(f"F1-score: {f1_score(y_test_binary, meta_pred):.4f}")
print("Confusion Matrix:\n", confusion_matrix(y_test_binary, meta_pred))
