# Load packages and import modules

In [None]:
# -*- coding: utf-8 -*-
import sys
import os

# Manually specify the path to the src folder
sys.path.append(os.path.abspath('../'))

# Train FPD model and compre with baseline pipeline

In [None]:

import numpy as np
from models.model_wrapper import ModelWrapper
from sklearn.metrics import classification_report
from core.validator import ModelValidator, load_train_split
import joblib
from tqdm import tqdm


from core.fpd_nn import FPDNeuralNetwork


# === Configuration ===
ARCHITECTURES = ["XgBoost", "Lgbm", "feedforward"]
VERSION = "v1.1"
MALICIOUS_LABEL = "malware"
STAGE = 2
VERIFICATION = False
FPD_MODEL_PATH = "./fpd_saved_model"

# === Load train/test split ===
x_train, x_test, y_train, y_test, columns = load_train_split(STAGE, MALICIOUS_LABEL)


# === Load and run all models ===
model_wrapper = ModelWrapper(model_dir="models")
train_preds = []
test_preds = []

def predict(model, x, architecture, label):
    if architecture == "feedforward":
        scaler = joblib.load(f"scalers/{label}_{architecture}_{STAGE}_scaler.joblib")
        x = scaler.transform(x)

    y_pred = model.predict(x)
    if architecture == "feedforward":
        y_pred = np.array(y_pred)
        if y_pred.ndim == 2 and y_pred.shape[1] > 1:
            y_pred = np.argmax(y_pred, axis=1)
        else:
            y_pred = (y_pred >= 0.5).astype(int)

    return np.array(y_pred.flatten())

for arch in ARCHITECTURES:
    model = model_wrapper.load(
        arch_name=arch,
        label=MALICIOUS_LABEL,
        prefix=f"stage_{STAGE}",
        version=VERSION
    )
    train_preds.append(predict(model, x_train, arch, MALICIOUS_LABEL))
    test_preds.append(predict(model, x_test, arch, MALICIOUS_LABEL))

# === Weighted voting on train ===
ensemble_train_preds = np.round(np.mean(train_preds, axis=0)).astype(int)
fpd_labels_train = ((ensemble_train_preds == 1) & (y_train == 0)).astype(int)

# === Train and save FPD neural network ===
fpd_nn = FPDNeuralNetwork()
fpd_nn.fit(x_train, fpd_labels_train)
fpd_nn.save(FPD_MODEL_PATH, MALICIOUS_LABEL, STAGE)

# === Predict and correct using FPD ===
def apply_fpd(preds, x_data):
    fpd_nn.load(FPD_MODEL_PATH, MALICIOUS_LABEL, STAGE)
    return fpd_nn.correct_predictions(preds, x_data)

# === Ensemble prediction on test ===
ensemble_test_preds = np.round(np.mean(test_preds, axis=0)).astype(int)
corrected_preds = apply_fpd(ensemble_test_preds, x_test)

# === Evaluation ===
print("\n=== Ensemble without FPD ===")
print(classification_report(y_test, ensemble_test_preds, digits=4))

print("\n=== Ensemble with FPD correction ===")
print(classification_report(y_test, corrected_preds, digits=4))

# === ModelValidator integration ===
final_model_wrapper = ModelWrapper(model_dir="models")
final_model_wrapper.predict = lambda x: corrected_preds

validator = ModelValidator(
    final_model_wrapper,
    x_test,
    y_test,
    arch_name="Ensemble+FPD_NN",
    label=MALICIOUS_LABEL,
    prefix=f"stage_{STAGE}",
    version=VERSION,
    verification=VERIFICATION
)
validator.evaluate_performance()