# Load packages and import modules

In [None]:
# -*- coding: utf-8 -*-
import sys
import os

# Manually specify the path to the src folder
sys.path.append(os.path.abspath('../'))

# Train Meta-model using pretrained architectures

In [None]:
import numpy as np
from sklearn.metrics import classification_report

# Import from custom modules
from core.validator import ModelValidator, load_saved_split, load_train_split
from models.model_wrapper import ModelWrapper
from core.fpd_nn import FPDNeuralNetwork
from core.meta_nn import MetaNeuralClassifier  
from core.utils import safe_predict
from core.loader import Loader


# === Configuration ===
ARCHITECTURES = ["cnn", "XgBoost", "Lgbm", "feedforward","svm"]
VERSION = "v1.1"
MALICIOUS_LABEL = "phishing"
STAGE = 3
VERIFICATION = False
FPD_MODEL_PATH = "./models/fpd_saved_model"
META_MODEL_PATH = "./models/meta_nn_model"

# === Load train/test split ===
x_train, x_test, y_train, y_test, columns = load_train_split(STAGE, MALICIOUS_LABEL, folder="../data/")

# Optionally, overwrite test set from saved split (e.g. for verification)
x_train_additional, y_test_additional = load_saved_split(STAGE, MALICIOUS_LABEL,folder="../data/", verification=VERIFICATION)


# Reduce training size for speed
x_train = x_train[:int(len(x_train) * 0.10)]
y_train = y_train[:int(len(y_train) * 0.10)]

# appedn x_train_additional to x_train
x_train = np.vstack((x_train, x_train_additional))
y_train = np.hstack((y_train, y_test_additional))

# apend to test set
x_test = np.vstack((x_test, x_train_additional))
y_test = np.hstack((y_test, y_test_additional))

# === Load and run base models ===
model_wrapper = ModelWrapper(model_dir="../models")
train_preds = []
test_preds = []


def predict(model, x, architecture, label):
    y_pred = safe_predict(model, x, architecture, label, STAGE)
            
    return np.array(y_pred.flatten())

for arch in ARCHITECTURES:
    model = model_wrapper.load(
        arch_name=arch,
        label=MALICIOUS_LABEL,
        prefix=f"stage_{STAGE}",
        version=VERSION
    )
    train_preds.append(predict(model, x_train, arch, MALICIOUS_LABEL))
    test_preds.append(predict(model, x_test, arch, MALICIOUS_LABEL))
    
# === Prepare Meta Model Input ===
meta_input_train = np.hstack([
    np.vstack(train_preds).T,     # shape: (n_samples, n_models)
    x_train[:, :10]               # shape: (n_samples, 10)
])

meta_input_test = np.hstack([
    np.vstack(test_preds).T,
    x_test[:, :10]
])


# === Train Meta Neural Network ===
meta_nn = MetaNeuralClassifier()
meta_nn.fit(meta_input_train, y_train)

meta_nn.save(META_MODEL_PATH, "v0.1")

# === Train False Positive Detector ===
ensemble_train_preds = np.round(np.mean(train_preds, axis=0)).astype(int)
fpd_labels_train = ((ensemble_train_preds == 1) & (y_train == 0)).astype(int)

fpd_nn = FPDNeuralNetwork()
fpd_nn.fit(x_train, fpd_labels_train)
fpd_nn.save(FPD_MODEL_PATH, MALICIOUS_LABEL, STAGE)

meta_nn.load(META_MODEL_PATH, "v0.1")
raw_preds = meta_nn.predict(meta_input_test)

fpd_nn.load(FPD_MODEL_PATH, MALICIOUS_LABEL, STAGE)
corrected_preds = fpd_nn.correct_predictions(raw_preds, x_test)

# === Evaluation ===
print("\n=== Meta-NN without FPD ===")
print(classification_report(y_test, raw_preds, digits=4))

print("\n=== Meta-NN with FPD correction ===")
print(classification_report(y_test, corrected_preds, digits=4))

# === ModelValidator integration ===
final_model_wrapper = ModelWrapper(model_dir="../models")
final_model_wrapper.predict = lambda x: corrected_preds

validator = ModelValidator(
    final_model_wrapper,
    x_test,
    y_test,
    arch_name="MetaNN+FPD_NN",
    label=MALICIOUS_LABEL,
    prefix=f"stage_{STAGE}",
    version=VERSION,
    verification=VERIFICATION
    
)
validator.evaluate_performance()