# Classifier Validation and Metrics

This notebook evaluates trained models using a labeled dataset and reports precision, recall, confusion matrix, and F1-score. It optionally supports multiple stages and SHAP value analysis.


In [None]:
from core.validator import ModelValidator, load_saved_split, load_train_split
from models.model_wrapper import ModelWrapper

model_wrapper = ModelWrapper(model_dir="models")

### CONFIGURATION
ARCH_NAME = "Lgbm"           # cnn / XgBoost / Lgbm / feedforward / svm
VERSION = "v1.1"             # v1.0 / v1.1
malicious_label = "phishing" # malware / phishing 
stage = 3                    # 1 / 2 / 3 
verification = True          # True / False


prefix=f"stage_{stage}"
model = model_wrapper.load(arch_name=ARCH_NAME, label=malicious_label, prefix=prefix, version=VERSION)
x_test, y_test = load_saved_split(stage, label=malicious_label, folder="./data/", verification=verification)

# Initialize validator class
validator = ModelValidator(model, x_test, y_test, arch_name=ARCH_NAME, label=malicious_label, prefix=f"stage_{stage}", version=VERSION, verification=verification, stage=stage)  

# Evaluate performance
validator.evaluate_performance(save_results = True)


## Run Verification / Validation for all models 
This script can be used to bulk evaluate all models withing this pipeline. Adjust list of architectures, list of stages and target dataset (verification/validation). Overall, the script will:
- Generates Confusion Matrices
- Generates Classification Reports
- Generates .TEX table and all the measurement data 

In [None]:
model_wrapper = ModelWrapper(model_dir="models")

# Define architectures you wish to bulk test
architectures = ["XgBoost" "feedforward", "Lgbm", "cnn"]

malicious_labels = ["phishing"]
stages = [1, 2, 3]
VERSION = "v1.1"
verification = False


# Iteration over all combinations of architectures, malicious labels, and stages...
for architecture in architectures:       
    for malicious_label in malicious_labels:
        for stage in stages:
            model = model_wrapper.load(arch_name=architecture, label=malicious_label, prefix=f"stage_{stage}", version=VERSION)
            x_test, y_test = load_saved_split(stage, malicious_label, folder="./data/", verification=verification)
            
            # Initialize validator class and pass model for evaluation
            validator = ModelValidator(model, x_test, y_test, arch_name=architecture, label=malicious_label, prefix=f"stage_{stage}", version=VERSION, verification=verification, stage=stage)  
            
            # Validator saves .tex tables, cf matrices and performance metrics to 
            validator.evaluate_performance()
            