In [5]:
# Terbaru 

In [None]:
# ==========================================================
# === RANDOM FOREST CROSS-VALIDATION 50 TOPOLOGI (GT.csv) ==
# === Dengan Semua Metrik Evaluasi + TP/FP/FN/TN          ==
# === Output mirip evaluasi Rule-Based                   ===
# ==========================================================

import os
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    multilabel_confusion_matrix, classification_report, hamming_loss
)
from sklearn.preprocessing import LabelEncoder
from datetime import datetime
from collections import OrderedDict

# ================== PATH ==================
CUR_DIR = os.getcwd()
ROOT_DIR = os.path.dirname(CUR_DIR)

# pakai GT.csv (gabungan 1–100 topologi)
gt_path = os.path.join(ROOT_DIR, "03_Output", "Ground_Truth", "GT.csv")

# ====== SIMPAN OUTPUT KE FOLDER 04_Evaluasi ======
eval_dir = os.path.join(ROOT_DIR, "04_Evaluasi")
os.makedirs(eval_dir, exist_ok=True)

# nama file output
output_txt = os.path.join(eval_dir, "Hasil_Evaluasi_RF_100_Topologi.txt") # ! <-------------- ubah nama filenya disini


# ================== URUTAN LABEL  ==================
LABELS_ORDER = [
    "HelloMismatch",
    "DeadMismatch",
    "NetworkTypeMismatch",
    "AreaMismatch",
    "AuthMismatch",
    "AuthKeyMismatch",
    "MTUMismatch",
    "PassiveMismatch",
    "RedistributeMismatch",
    "RouterIDMismatch",
]

# ================== LOAD DATA DARI GT.csv (FILTER 50 TOPOLOGI) ==================
# GT.csv = hasil 5_Pembuatan_GT_CSV.py, berisi fitur + label + topologi
df_all = pd.read_csv(gt_path)

# pastikan kolom topologi bentuk int
df_all["topologi"] = df_all["topologi"].astype(int)

# ambil topologi sesuai skenario
df_all = df_all[df_all["topologi"].between(1, 100)].reset_index(drop=True) #! <------------------------- klo mo ganti" jumlah topologinya, ubah disini 

# ================== TARGET & FEATURE ==================
target_cols = LABELS_ORDER[:]  # pakai urutan yang sama dengan evaluasi

exclude_cols = [
    "router_a", "router_b", "interface_a", "interface_b",
    "ip_a", "ip_b", "subnet_a", "subnet_b",
    "neighbor_a", "neighbor_b", "topologi"
] + target_cols

feature_cols = [c for c in df_all.columns if c not in exclude_cols]

# encode kolom object (kategorikal) jadi angka
df_encoded = df_all.copy()
for col in feature_cols:
    if df_encoded[col].dtype == "object":
        df_encoded[col] = LabelEncoder().fit_transform(df_encoded[col].astype(str))

# ================== CROSS VALIDATION LEAVE-ONE-TOPOLOGY-OUT ==================
results = []
all_y_true = []
all_y_pred = []

for topo in sorted(df_encoded["topologi"].unique()):

    df_train = df_encoded[df_encoded["topologi"] != topo]
    df_test  = df_encoded[df_encoded["topologi"] == topo]

    if df_train.empty or df_test.empty:
        continue

    X_train, y_train = df_train[feature_cols], df_train[target_cols].astype(int)
    X_test,  y_test  = df_test[feature_cols], df_test[target_cols].astype(int)

    # Train Random Forest (multi-output)
    model = RandomForestClassifier(n_estimators=200, random_state=42, n_jobs=-1)
    model.fit(X_train, y_train)

    y_pred = pd.DataFrame(model.predict(X_test), columns=target_cols, index=y_test.index)

    # ===== PER-TOPO METRICS =====
    subset_acc = accuracy_score(y_test, y_pred)
    hamming_l = hamming_loss(y_test, y_pred)
    hamming_acc = 1 - hamming_l

    micro_prec = precision_score(y_test, y_pred, average="micro", zero_division=0)
    micro_rec  = recall_score(y_test, y_pred, average="micro", zero_division=0)
    micro_f1   = f1_score(y_test, y_pred, average="micro", zero_division=0)

    macro_prec = precision_score(y_test, y_pred, average="macro", zero_division=0)
    macro_rec  = recall_score(y_test, y_pred, average="macro", zero_division=0)
    macro_f1   = f1_score(y_test, y_pred, average="macro", zero_division=0)

    weighted_prec = precision_score(y_test, y_pred, average="weighted", zero_division=0)
    weighted_rec  = recall_score(y_test, y_pred, average="weighted", zero_division=0)
    weighted_f1   = f1_score(y_test, y_pred, average="weighted", zero_division=0)

    results.append({
        "topologi": topo,
        "subset_accuracy": subset_acc,
        "hamming_accuracy": hamming_acc,
        "hamming_loss": hamming_l,
        "micro_precision": micro_prec,
        "micro_recall": micro_rec,
        "micro_f1": micro_f1,
        "macro_precision": macro_prec,
        "macro_recall": macro_rec,
        "macro_f1": macro_f1,
        "weighted_precision": weighted_prec,
        "weighted_recall": weighted_rec,
        "weighted_f1": weighted_f1
    })

    all_y_true.append(y_test)
    all_y_pred.append(y_pred)

# ================== GLOBAL METRICS (GABUNG SEMUA BARIS) ==================
df_results = pd.DataFrame(results)
summary_cv_mean = df_results.mean(numeric_only=True)  # rata-rata per-topologi (fold)

y_true_all = pd.concat(all_y_true, ignore_index=True)
y_pred_all = pd.concat(all_y_pred, ignore_index=True)

# confusion matrix per label
mcm = multilabel_confusion_matrix(y_true_all, y_pred_all)

def safe_div(a, b):
    return a / b if b else 0.0

per_label = OrderedDict()
sum_tp = sum_fp = sum_fn = sum_tn = 0

for i, label in enumerate(target_cols):
    TN, FP, FN, TP = mcm[i].ravel()

    support_pos = TP + FN
    support_neg = TN + FP
    support_all = support_pos + support_neg

    prec = safe_div(TP, TP + FP)
    rec  = safe_div(TP, TP + FN)
    f1   = safe_div(2 * prec * rec, (prec + rec))
    acc  = safe_div(TP + TN, support_all)

    per_label[label] = {
        "tp": TP,
        "fp": FP,
        "fn": FN,
        "tn": TN,
        "precision": round(prec, 4),
        "recall": round(rec, 4),
        "f1": round(f1, 4),
        "accuracy": round(acc, 4),
        "support_pos": int(support_pos),
        "support_neg": int(support_neg),
        "support_all": int(support_all),
    }

    sum_tp += TP
    sum_fp += FP
    sum_fn += FN
    sum_tn += TN

# macro dari per_label
if per_label:
    macro_p = sum(v["precision"] for v in per_label.values()) / len(per_label)
    macro_r = sum(v["recall"]    for v in per_label.values()) / len(per_label)
    macro_f1= sum(v["f1"]        for v in per_label.values()) / len(per_label)
    macro_acc = sum(v["accuracy"] for v in per_label.values()) / len(per_label)
else:
    macro_p = macro_r = macro_f1 = macro_acc = 0.0

# micro dari agregat TP/FP/FN/TN semua label
micro_p = safe_div(sum_tp, sum_tp + sum_fp)
micro_r = safe_div(sum_tp, sum_tp + sum_fn)
micro_f1= safe_div(2 * micro_p * micro_r, (micro_p + micro_r))
micro_jaccard = safe_div(sum_tp, (sum_tp + sum_fp + sum_fn))
micro_acc_std  = safe_div(sum_tp + sum_tn, (sum_tp + sum_fp + sum_fn + sum_tn))

# subset accuracy global (rata2 per-topologi)
subset_acc_mean = float(summary_cv_mean["subset_accuracy"])
hamming_acc_mean = float(summary_cv_mean["hamming_accuracy"])

summary_global = {
    "macro": {
        "precision": round(macro_p, 4),
        "recall": round(macro_r, 4),
        "f1": round(macro_f1, 4),
        "accuracy": round(macro_acc, 4),
    },
    "micro": {
        "precision": round(micro_p, 4),
        "recall": round(micro_r, 4),
        "f1": round(micro_f1, 4),
        "accuracy_jaccard": round(micro_jaccard, 4),
        "accuracy_standard": round(micro_acc_std, 4),
    },
    "global_counts": {
        "tp_total": int(sum_tp),
        "fp_total": int(sum_fp),
        "fn_total": int(sum_fn),
        "tn_total": int(sum_tn),
    },
    "subset_accuracy": {
        "mean_exact_match": round(subset_acc_mean, 4),
        "mean_hamming_accuracy": round(hamming_acc_mean, 4),
        "num_samples": int(len(y_true_all)),
        "num_topologies": int(len(df_results)),
    },
}

# classification report (opsional, tetap dipertahankan)
report = classification_report(
    y_true_all, y_pred_all,
    target_names=LABELS_ORDER,
    zero_division=0
)

# ================== SAVE OUTPUT ==================
with open(output_txt, "w", encoding="utf-8") as f:

    f.write("=== HASIL RANDOM FOREST CROSS-VALIDATION (50 Topologi, GT.csv) ===\n\n")

    # ---- Ringkasan per-topologi (fold CV) ----
    f.write("== Ringkasan Per Topologi (Fold CV) ==\n")
    f.write(df_results.to_string(index=False))
    f.write("\n")

    # ---- Per-label (format mirip rule-based) ----
    f.write("\n== Per Label (Global, gabungan 50 topologi) ==\n")
    f.write("Label                  | TP  FP  FN  TN  | Prec   Rec    F1     Acc    | Pos  Neg  All\n")
    f.write("-" * 96 + "\n")
    for lbl in LABELS_ORDER:
        v = per_label[lbl]
        f.write(
            f"{lbl:22} | "
            f"{v['tp']:3} {v['fp']:3} {v['fn']:3} {v['tn']:3} | "
            f"{v['precision']:.4f} {v['recall']:.4f} {v['f1']:.4f} {v['accuracy']:.4f} | "
            f"{v['support_pos']:4} {v['support_neg']:4} {v['support_all']:4}\n"
        )

    # ---- Rata-rata macro/micro ----
    f.write("\n== Rata-rata (Macro) ==\n")
    f.write(f"Macro Precision       : {summary_global['macro']['precision']}\n")
    f.write(f"Macro Recall          : {summary_global['macro']['recall']}\n")
    f.write(f"Macro F1-Score        : {summary_global['macro']['f1']}\n")
    f.write(f"Macro Accuracy        : {summary_global['macro']['accuracy']}\n")

    f.write("\n== Metrik Mikro (Global) ==\n")
    f.write(f"Micro Precision       : {summary_global['micro']['precision']}\n")
    f.write(f"Micro Recall          : {summary_global['micro']['recall']}\n")
    f.write(f"Micro F1-Score        : {summary_global['micro']['f1']}\n")
    f.write(f"Micro Accuracy Jaccard: {summary_global['micro']['accuracy_jaccard']}\n")
    f.write(f"Micro Accuracy Std    : {summary_global['micro']['accuracy_standard']}\n")

    # ---- TN & subset accuracy (mirip rule-based) ----
    f.write("\n== TN & Subset Accuracy ==\n")
    f.write(
        f"Total TP/FP/FN/TN     : "
        f"{summary_global['global_counts']['tp_total']}/"
        f"{summary_global['global_counts']['fp_total']}/"
        f"{summary_global['global_counts']['fn_total']}/"
        f"{summary_global['global_counts']['tn_total']}\n"
    )
    f.write(
        f"Subset Accuracy (Exact Match, mean per topologi) : "
        f"{summary_global['subset_accuracy']['mean_exact_match']}\n"
    )
    f.write(
        f"Hamming Accuracy (mean per topologi)            : "
        f"{summary_global['subset_accuracy']['mean_hamming_accuracy']}\n"
    )
    f.write(
        f"Total Sampel (baris dataset)                   : "
        f"{summary_global['subset_accuracy']['num_samples']}\n"
    )
    f.write(
        f"Total Topologi (fold CV)                       : "
        f"{summary_global['subset_accuracy']['num_topologies']}\n"
    )

    # ---- Classification report (opsional) ----
    f.write("\n=== CLASSIFICATION REPORT (GLOBAL) ===\n")
    f.write(report)

    f.write("\nWaktu Eksekusi: " + datetime.now().strftime("%Y-%m-%d %H:%M:%S"))

print(f"[✓] Semua metrik + TP/FP/FN/TN berhasil disimpan ke: {output_txt}")


[✓] Semua metrik + TP/FP/FN/TN berhasil disimpan ke: c:\Users\Deore Mufrad\Documents\Tugas Akhir\Automasi\PAKE RULE BASED\9. BACKUP 9 (KODE LAMA & BARU) + (TA)\BARU\Code\Alur Baru\04_Evaluasi\Hasil_Evaluasi_RF_100_Topologi.txt
