In [1]:
import algos
import os
import importlib.util
from pathlib import Path

In [2]:
folder_path = Path('/home/mahalakshmi/Journal_May2025/SV_dec/SV Dataset/algos') 

In [3]:
ALGO_FILES = [
    # "ae.py",
    # "vae.py",
    "optics.py",
    
]

In [None]:
import sys
import pandas as pd
import numpy as np
from sklearn.metrics import (
    confusion_matrix, classification_report,
    precision_score, recall_score, f1_score,
    roc_auc_score, average_precision_score,
    balanced_accuracy_score, matthews_corrcoef
)
import preprocessed 


sys.path.append('/home/mahalakshmi/Journal_May2025/SV_dec/SV Dataset/NEW_results/')

try:
    import utility.resource_usage as ru
    import utility.unsupervised_helper as uh
    import utility.plot_helper as ph
except ImportError:
    print("CRITICAL ERROR: Could not import 'utility' modules.")
    print("Please check sys.path.append line in the script.")
    sys.exit(1)

# --- CONFIGURATION ---
DATASET_NAME = 'SV_Dataset'
GOID = 'NA'
ATTACK_LIST = ["replay", "injection"] 
NUM_RUNS = 1

def predict(model, X):
    y_pred = model.predict(X)
    if hasattr(model, "predict_proba"):
        try:
            scores = model.predict_proba(X)[:, 1]
        except:
            scores = y_pred # Fallback
    else:
        scores = y_pred
    return y_pred, scores

def main():
    # Setup Output Directory
    root_output_dir = uh.ROOT_OUTPUT_DIR
    os.makedirs(root_output_dir, exist_ok=True)
    

    # ================= ITERATE OVER ATTACKS =================
    for file_name in ALGO_FILES:
        file_path = folder_path / file_name
        module_name = file_path.stem 
        print(f"--- Processing: {module_name} ---")
        
        ALGO_NAME = module_name
        agg_csv_path = f'{root_output_dir}/aggregated_{ALGO_NAME}_results.csv'
        long_csv_path = os.path.join(uh.PLOT_DATA_DIR, f"{ALGO_NAME}_metrics_long.csv")
        if os.path.exists(agg_csv_path):
            os.remove(agg_csv_path)

        print(f"Starting {ALGO_NAME} Workflow...")
        print(f"Results will be saved to: {agg_csv_path}")
        for attack_name in ATTACK_LIST:
            print(f"\n{'='*40}")
            print(f"Processing Attack: {attack_name}")
            print(f"{'='*40}")

            try:
                X_train, y_train, X_val, y_val, X_test, y_test, feats, y_orig = \
                    preprocessed.load_preprocessed_for_attack(attack_name)
            except Exception as e:
                print(f"Error loading data for {attack_name}: {e}")
                continue

             # Get Counts
            n_test_attack = np.sum(y_test == 1)
            n_test_normal = np.sum(y_test == 0)
            n_train_attack = np.sum(y_train == 1)
            n_train_normal = np.sum(y_train == 0)

            # Hierarchy info for the CSV
            ds_info = {
                'dataset': DATASET_NAME,
                'goid': GOID,
                'attack_type': attack_name
            }

            all_runs_results = {}

            for i in range(1, NUM_RUNS + 1):
                run_key = f"Run_{i}"
                print(f"  > Executing {run_key}...")
                try:
                    
                        spec = importlib.util.spec_from_file_location(module_name, file_path)
                        module = importlib.util.module_from_spec(spec)
                        spec.loader.exec_module(module)
                        print(f"Running model function in {module_name}...")
                        # --- A. TRAINING (Profiled) ---
                        
                        with ru.ResourceProfiler() as profiler_train:
                            model_instance = module.model(X_train, y_train, X_val, y_val)
                        
                        avg_train_wall_ns = profiler_train.wall_nanoseconds
                        avg_time_pkt_tr = avg_train_wall_ns / len(y_train) if len(y_train) else 0

                        # --- B. TESTING (Profiled) ---
                        with ru.ResourceProfiler() as profiler_test:
                            y_test_pred, scores = predict(model_instance, X_test)
                        if set(np.unique(y_test_pred)) == {-1, 1}:
                            y_test_pred = np.where(y_test_pred == -1, 0, 1)

                        if scores is not None and np.min(scores) < 0:
                            scores = (scores - np.min(scores)) / (np.max(scores) - np.min(scores) + 1e-9)
                        
                        avg_test_wall_ns = profiler_test.wall_nanoseconds
                        avg_time_pkt_te = avg_test_wall_ns / len(y_test) if len(y_test) else 0

                        # --- C. METRICS ---
                        cm = confusion_matrix(y_test, y_test_pred, labels=[0, 1])
                        tn, fp, fn, tp = cm.ravel()
                        
                        rpt = classification_report(y_test, y_test_pred, output_dict=True, zero_division=0)
                        
                        # Calculate advanced metrics
                        precision_anom = precision_score(y_test, y_test_pred, pos_label=1, zero_division=0)
                        recall_anom    = recall_score(y_test, y_test_pred, pos_label=1, zero_division=0)
                        f1_anom        = f1_score(y_test, y_test_pred, pos_label=1, zero_division=0)
                        balanced_acc   = balanced_accuracy_score(y_test, y_test_pred)
                        mcc            = matthews_corrcoef(y_test, y_test_pred)
                        roc_auc        = roc_auc_score(y_test, scores)
                        pr_auc         = average_precision_score(y_test, scores)

                        # --- D. BUILD JSON (Exact format as DT_reference.py) ---
                        test_json = {
                            "Normal count"          : int(n_test_normal),
                            "Attack count"          : int(n_test_attack),
                            "Total"                 : int(n_test_normal + n_test_attack),
                            "tp"                    : int(tp),
                            "tn"                    : int(tn),
                            "fp"                    : int(fp),
                            "fn"                    : int(fn),
                            "Accuracy %"            : uh.r2(rpt['accuracy']*100),
                            "Precision_anom %"      : uh.r2(precision_anom*100),
                            "Precision %"           : uh.r2(rpt["macro avg"]["precision"]*100),
                            "Recall_anom %"         : uh.r2(recall_anom*100),
                            "Recall %"              : uh.r2(rpt["macro avg"]["recall"]*100),
                            "F1-Score_anom %"       : uh.r2(f1_anom*100),
                            "F1-Score %"            : uh.r2(rpt["macro avg"]["f1-score"]*100),
                            "BalancedAcc %"         : uh.r2(balanced_acc*100),
                            "MCC"                   : uh.r3(mcc),
                            "PR-AUC"                : uh.r3(pr_auc*100),
                            "ROC-AUC"               : uh.r3(roc_auc*100),
                            
                            # Resource Metrics
                            "TotalTime (ms)"        : uh.r3(avg_test_wall_ns / 1_000_000),
                            "AvgTimePerPacket(ns)"  : uh.r3(avg_time_pkt_te),
                            "Ram_usage"             : uh.r3(profiler_test.peak_ram_mb),
                            "CPU_avg%"              : uh.r3(profiler_test.cpu_avg_machine_pct),
                            "CPU_peak%"             : uh.r3(profiler_test.cpu_peak_machine_pct),
                            
                            # Training Metrics
                            "training_time_ms"      : uh.r3(avg_train_wall_ns / 1_000_000),
                            "training_avg_time_per_packet_ns": uh.r3(avg_time_pkt_tr),
                            "training_peak_ram_mb"  : uh.r3(profiler_train.peak_ram_mb),
                            "training_cpu_avg_pct"  : uh.r3(profiler_train.cpu_avg_machine_pct),
                            "training_cpu_peak_pct" : uh.r3(profiler_train.cpu_peak_machine_pct),
                            "n_train_attack"        : int(n_train_attack),
                            "n_train_normal"        : int(n_train_normal)     
                        }

                        all_runs_results[run_key] = {
                            "Test": test_json
                        }
                        # ================= SAVE RESULTS =================
                        uh.append_results_to_csv(agg_csv_path, all_runs_results, ds_info)

                        rows = uh.extract_plot_rows(all_runs_results, ALGO_NAME, ds_info) + \
                        uh.extract_average_rows_over_runs(all_runs_results, ALGO_NAME, ds_info)
            
                        uh.append_rows_to_long_csv(long_csv_path, rows)
            
                        print(f"  [Saved] Results for {attack_name} saved.")

                except Exception as e:
                    print(f"Error in Run {i}: {e}")
                    import traceback
                    traceback.print_exc()

        

    print("\nWorkflow Completed.")
    print(f"Aggregated results: {agg_csv_path}")
    # print(f"Plotting data: {long_csv_path}")

if __name__ == "__main__":
    main()

--- Processing: optics ---
Starting optics Workflow...
Results will be saved to: /home/mahalakshmi/Journal_May2025/SV_dec/SV Dataset/NEW_results/results_aggregate/aggregated_optics_results.csv

Processing Attack: replay

=== Dataset for attack: replay ===
Shapes (train, val, test):
(70000, 54) (15000, 54) (15000, 54)
Attack counts (train/val/test):
attack
0    56000
1    14000
Name: count, dtype: int64
attack
0    12000
1     3000
Name: count, dtype: int64
attack
0    12000
1     3000
Name: count, dtype: int64
Numeric cols are:
Index(['sv.length', 'sv.noASDU', 'sv.seqASDU', 'sv.smpCnt', 'sv.confRev',
       'sv.smpSynch', 'sv.current_measurement_phase_1',
       'sv.current_quality_phase_1', 'sv.current_measurement_phase_2',
       'sv.current_quality_phase_2', 'sv.current_measurement_phase_3',
       'sv.current_quality_phase_3', 'sv.current_measurement_4_derived',
       'sv.current_quality_4_derived', 'sv.voltage_measurement_phase_1',
       'sv.voltage_quality_phase_1', 'sv.voltage