In [None]:
import numpy as np
import tensorflow as tf
import torch
import torch.nn as nn
import torch.nn.functional as F
import sys

import pyarrow as pa
import pyarrow.parquet as pq
import pandas as pd
from pandas import DataFrame
from pandas.core.dtypes import common as com
from core.loader import Loader

from models.model_wrapper import ModelWrapper

from tensorflow.python.client import device_lib

for device in device_lib.list_local_devices():
    print(device.physical_device_desc)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


2025-05-01 19:17:36.944146: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-05-01 19:17:36.944178: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-05-01 19:17:36.945531: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-05-01 19:17:36.951874: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.



device: 0, name: NVIDIA GeForce RTX 3050 Ti Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6


2025-05-01 19:17:39.873296: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-05-01 19:17:39.912988: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-05-01 19:17:39.915739: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

In [None]:
import numpy as np
import joblib
from sklearn.metrics import classification_report, f1_score
from core.validator import ModelValidator, load_saved_split, load_train_split
from models.model_wrapper import ModelWrapper
from core.fpd_nn import FPDNeuralNetwork
from core.meta_nn import MetaNeuralClassifier  # <-- import your meta-classifier

# === Configuration ===
ARCHITECTURES = ["XgBoost", "Lgbm", "feedforward"]
VERSION = "v1.1"
MALICIOUS_LABEL = "phishing"
STAGE = 3
VERIFICATION = False
FPD_MODEL_PATH = "../src/models/fpd_saved_model"
META_MODEL_PATH = "../src/models/meta_nn_model"  # <-- path to saved meta-classifier

# === Load full train/test split ===
x_train, x_test, y_train, y_test = load_train_split(STAGE, MALICIOUS_LABEL, folder="../src/data/")

# === Subsample training set ===
x_train = x_train[:int(len(x_train) * 0.01)]
y_train = y_train[:int(len(y_train) * 0.01)]

# === Load and run model predictions ===
model_wrapper = ModelWrapper(model_dir="models")
train_preds = []
test_preds = []

def predict(model, x, architecture, label):
    if architecture == "feedforward":
        scaler = joblib.load(f"scalers/{label}_{architecture}_{STAGE}_scaler.joblib")
        x = scaler.transform(x)

    y_pred = model.predict(x)
    if architecture == "feedforward":
        y_pred = np.array(y_pred)
        if y_pred.ndim == 2 and y_pred.shape[1] > 1:
            y_pred = np.argmax(y_pred, axis=1)
        else:
            y_pred = (y_pred >= 0.5).astype(int)
    return np.array(y_pred.flatten())

for arch in ARCHITECTURES:
    model = model_wrapper.load(
        arch_name=arch,
        label=MALICIOUS_LABEL,
        prefix=f"stage_{STAGE}",
        version=VERSION
    )
    train_preds.append(predict(model, x_train, arch, MALICIOUS_LABEL))
    test_preds.append(predict(model, x_test, arch, MALICIOUS_LABEL))

# === Ensemble strategies ===
def weighted_ensemble(preds, weights):
    preds = np.array(preds)
    return np.round(np.average(preds, axis=0, weights=weights)).astype(int)

model_weights = [f1_score(y_train, preds) for preds in train_preds]

# === Load Meta-Classifying Neural Network and Predict ===
meta_nn = MetaNeuralClassifier()
meta_nn.load(META_MODEL_PATH)
meta_input_test = np.hstack([
    np.vstack(test_preds).T,    # shape: (n_samples, n_models)
    x_test[:, :10]              # shape: (n_samples, 10)
])
meta_preds = meta_nn.predict(meta_input_test)

# === Combine strategies ===
methods = {
    "Best model": test_preds[0],
    "Average": np.round(np.mean(test_preds, axis=0)).astype(int),
    "Weighted average": weighted_ensemble(test_preds, model_weights),
    "Meta-model": meta_preds
}

# === Load FPD neural net and apply correction ===
fpd_nn = FPDNeuralNetwork()
fpd_nn.load(FPD_MODEL_PATH)

def apply_fpd(preds, x_data):
    return fpd_nn.correct_predictions(preds, x_data)

# === Evaluate all ensemble methods on TEST ===
for name, preds in methods.items():
    print(f"\n=== {name} ===")
    print(classification_report(y_test, preds, digits=4))

    fpd_corrected = apply_fpd(preds, x_test)
    print(f"\n=== {name} + FPD ===")
    print(classification_report(y_test, fpd_corrected, digits=4))

# === Final output using ModelValidator ===
final_preds = apply_fpd(methods["Meta-model"], x_test)
final_model_wrapper = ModelWrapper(model_dir="models")
final_model_wrapper.predict = lambda x: final_preds

validator = ModelValidator(
    final_model_wrapper,
    x_test,
    y_test,
    arch_name="MetaNN+FPD",
    label=MALICIOUS_LABEL,
    prefix=f"stage_{STAGE}",
    version=VERSION,
    verification=VERIFICATION
)
validator.evaluate_performance()


3 phishing ./data/
📦 Loading model from models/XgBoost_stage_3_phishing_v1.1.xgb
📦 Loading model from models/Lgbm_stage_3_phishing_v1.1.pkl
📦 Loading model from models/feedforward_stage_3_phishing_v1.1.keras


2025-05-01 19:17:56.685205: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-05-01 19:17:56.688473: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-05-01 19:17:56.691398: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-






=== Best model ===
              precision    recall  f1-score   support

           0     0.9958    0.9994    0.9976     81671
           1     0.9968    0.9792    0.9879     16404

    accuracy                         0.9960     98075
   macro avg     0.9963    0.9893    0.9928     98075
weighted avg     0.9960    0.9960    0.9960     98075


=== Best model + FPD ===
              precision    recall  f1-score   support

           0     0.9958    0.9994    0.9976     81671
           1     0.9968    0.9792    0.9879     16404

    accuracy                         0.9960     98075
   macro avg     0.9963    0.9893    0.9928     98075
weighted avg     0.9960    0.9960    0.9960     98075


=== Average ===
              precision    recall  f1-score   support

           0     0.9951    0.9988    0.9970     81671
           1     0.9939    0.9757    0.9847     16404

    accuracy                         0.9949     98075
   macro avg     0.9945    0.9873    0.9909     98075
weighted av