In [1]:
# ==========================================================
#  === RANDOM FOREST CROSS-VALIDATION: 50 TOPOLOGI ===
#  Hanya memakai maksimal 50 topologi pertama yang tersedia
#  Simpan hasil ringkasan ke Hasil_ML_CrossValidation_50.txt
# ==========================================================
import os
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, classification_report
)
from sklearn.preprocessing import LabelEncoder
from datetime import datetime

# ====================== PATH & SETUP ====================== #
CUR_DIR = os.getcwd()
ROOT_DIR = os.path.dirname(CUR_DIR)
data_dir = os.path.join(ROOT_DIR, "03_Output", "Data_ML_Labeled")
hasil_ml_dir = os.path.join(ROOT_DIR, "03_Output", "Hasil_ML_Cross_Validation")
os.makedirs(hasil_ml_dir, exist_ok=True)

output_txt = os.path.join(hasil_ml_dir, "Hasil_ML_CrossValidation_50.txt")

print(f"[INFO] Folder data     : {data_dir}")
print(f"[INFO] Folder hasil ML : {hasil_ml_dir}\n")

# ====================== LIST & PILIH 50 TOPO ====================== #
csv_files = [f for f in os.listdir(data_dir) if f.startswith("topologi_") and f.endswith(".csv")]
# Urutkan numerik berdasarkan nomor topologi
csv_files = sorted(csv_files, key=lambda x: int(x.split("_")[1].split(".")[0]))

# Ambil maksimal 50 topologi pertama
selected_files = csv_files[:50]
selected_topos = [int(f.split("_")[1].split(".")[0]) for f in selected_files]

if not selected_files:
    raise FileNotFoundError("[!] Tidak ditemukan file topologi_*.csv di folder data.")

print(f"[INFO] Topologi terpakai (maks 50): {len(selected_files)} file → {selected_topos}\n")

# ====================== GABUNG CSV TERPILIH ====================== #
df_list = []
for f in selected_files:
    path = os.path.join(data_dir, f)
    topo_num = int(f.split("_")[1].split(".")[0])
    temp = pd.read_csv(path)
    temp["topologi"] = topo_num
    df_list.append(temp)

df_all = pd.concat(df_list, ignore_index=True).drop_duplicates().reset_index(drop=True)
print(f"[✓] Dataset gabungan (50 skenario): {df_all.shape[0]} baris dari {len(selected_files)} file.\n")

# ====================== FITUR & TARGET ====================== #
target_cols = [
    "RouterIDMismatch", "HelloMismatch", "DeadMismatch", "NetworkTypeMismatch",
    "AreaMismatch", "AuthMismatch", "AuthKeyMismatch", "MTUMismatch",
    "PassiveMismatch", "RedistributeMismatch"
]

exclude_cols = [
    "router_a", "router_b", "interface_a", "interface_b", "ip_a", "ip_b",
    "subnet_a", "subnet_b", "neighbor_a", "neighbor_b", "topologi"
] + target_cols

feature_cols = [c for c in df_all.columns if c not in exclude_cols]

# Encode fitur kategorikal pada gabungan data (konsisten train/test)
df_encoded = df_all.copy()
for col in feature_cols:
    if df_encoded[col].dtype == "object":
        le = LabelEncoder()
        df_encoded[col] = le.fit_transform(df_encoded[col].astype(str))

print(f"[INFO] Jumlah fitur: {len(feature_cols)} | Jumlah label: {len(target_cols)}\n")

# ====================== CROSS-VALIDATION PER TOPO ====================== #
results = []
all_y_true, all_y_pred = [], []

print("=== [INFO] Mulai cross-validation per topologi (subset 50) ===\n")

for topo in sorted(df_encoded["topologi"].unique()):
    df_train = df_encoded[df_encoded["topologi"] != topo]
    df_test  = df_encoded[df_encoded["topologi"] == topo]

    if df_train.empty or df_test.empty:
        print(f"[SKIP] Topologi {topo} dilewati (data kosong)")
        continue

    X_train, y_train = df_train[feature_cols], df_train[target_cols]
    X_test,  y_test  = df_test[feature_cols],  df_test[target_cols]

    # Pastikan label numerik (0/1)
    y_train = y_train.apply(pd.to_numeric, errors="coerce").fillna(0).astype(int)
    y_test  = y_test.apply(pd.to_numeric, errors="coerce").fillna(0).astype(int)

    if y_train.empty or y_test.empty or y_train.sum().sum() == 0:
        print(f"[SKIP] Topologi {topo} dilewati karena label tidak valid (kosong atau semua nol).")
        continue

    # === Train model === #
    rf = RandomForestClassifier(
        n_estimators=200,
        random_state=42,
        n_jobs=-1
    )
    rf.fit(X_train, y_train)

    y_pred = pd.DataFrame(rf.predict(X_test), columns=target_cols, index=y_test.index)

    # === Evaluasi === #
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average="micro", zero_division=0)
    rec = recall_score(y_test, y_pred, average="micro", zero_division=0)
    f1  = f1_score(y_test, y_pred, average="micro", zero_division=0)

    results.append({
        "topologi": topo,
        "accuracy": acc,
        "precision": prec,
        "recall": rec,
        "f1_score": f1
    })

    all_y_true.append(y_test)
    all_y_pred.append(y_pred)

    print(f"[✓] Topologi {topo:03d} → Acc: {acc:.4f} | F1: {f1:.4f}")

# ====================== HASIL RINGKASAN ====================== #
df_results = pd.DataFrame(results)
if not df_results.empty:
    mean_acc = df_results["accuracy"].mean()
    mean_prec = df_results["precision"].mean()
    mean_rec = df_results["recall"].mean()
    mean_f1 = df_results["f1_score"].mean()
else:
    mean_acc = mean_prec = mean_rec = mean_f1 = float("nan")

# Gabungkan semua prediksi untuk laporan global
if all_y_true and all_y_pred:
    y_true_all = pd.concat(all_y_true, ignore_index=True)
    y_pred_all = pd.concat(all_y_pred, ignore_index=True)
    report = classification_report(y_true_all, y_pred_all, zero_division=0)
else:
    report = "[INFO] Tidak ada data valid untuk classification report."

# ====================== SIMPAN HASIL KE TXT ====================== #
with open(output_txt, "w", encoding="utf-8") as f:
    f.write("=== HASIL RANDOM FOREST CROSS VALIDATION (50 Topologi) ===\n\n")
    if not df_results.empty:
        f.write(df_results.sort_values('topologi').to_string(index=False))
        f.write("\n\n=== RATA-RATA SELURUH TOPOLOGI (Subset 50) ===\n")
        f.write(f"Mean Accuracy : {mean_acc:.4f}\n")
        f.write(f"Mean Precision: {mean_prec:.4f}\n")
        f.write(f"Mean Recall   : {mean_rec:.4f}\n")
        f.write(f"Mean F1-score : {mean_f1:.4f}\n\n")
    else:
        f.write("[!] Tidak ada hasil cross-validation yang valid.\n\n")

    f.write("=== CLASSIFICATION REPORT (Keseluruhan Data Subset 50) ===\n")
    f.write(report)
    f.write("\n\nWaktu eksekusi: " + datetime.now().strftime("%Y-%m-%d %H:%M:%S"))

print(f"\n[✓] Semua hasil disimpan ke: {output_txt}")


[INFO] Folder data     : c:\Users\Deore Mufrad\Documents\Tugas Akhir\Automasi\PAKE RULE BASED\9. BACKUP 9 (KODE LAMA & BARU) + (TA)\BARU\Code\Code Rapi Fix\03_Output\Data_ML_Labeled
[INFO] Folder hasil ML : c:\Users\Deore Mufrad\Documents\Tugas Akhir\Automasi\PAKE RULE BASED\9. BACKUP 9 (KODE LAMA & BARU) + (TA)\BARU\Code\Code Rapi Fix\03_Output\Hasil_ML_Cross_Validation

[INFO] Topologi terpakai (maks 50): 50 file → [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50]

[✓] Dataset gabungan (50 skenario): 800 baris dari 50 file.

[INFO] Jumlah fitur: 22 | Jumlah label: 10

=== [INFO] Mulai cross-validation per topologi (subset 50) ===

[✓] Topologi 001 → Acc: 1.0000 | F1: 0.0000
[✓] Topologi 002 → Acc: 1.0000 | F1: 1.0000
[✓] Topologi 003 → Acc: 1.0000 | F1: 1.0000
[✓] Topologi 004 → Acc: 1.0000 | F1: 1.0000
[✓] Topologi 005 → Acc: 1.0000 | F1: 1.00

In [2]:
# ==========================================================
#  === RANDOM FOREST CROSS-VALIDATION: 100 TOPOLOGI ===
#  Memakai semua topologi yang tersedia (hingga 100)
#  Simpan hasil ringkasan ke Hasil_ML_CrossValidation_100.txt
# ==========================================================
import os
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, classification_report
)
from sklearn.preprocessing import LabelEncoder
from datetime import datetime

# ====================== PATH & SETUP ====================== #
CUR_DIR = os.getcwd()
ROOT_DIR = os.path.dirname(CUR_DIR)
data_dir = os.path.join(ROOT_DIR, "03_Output", "Data_ML_Labeled")
hasil_ml_dir = os.path.join(ROOT_DIR, "03_Output", "Hasil_ML_Cross_Validation")
os.makedirs(hasil_ml_dir, exist_ok=True)

output_txt = os.path.join(hasil_ml_dir, "Hasil_ML_CrossValidation_100.txt")

print(f"[INFO] Folder data     : {data_dir}")
print(f"[INFO] Folder hasil ML : {hasil_ml_dir}\n")

# ====================== LIST SEMUA TOPO ====================== #
csv_files = [f for f in os.listdir(data_dir) if f.startswith("topologi_") and f.endswith(".csv")]
csv_files = sorted(csv_files, key=lambda x: int(x.split("_")[1].split(".")[0]))

if not csv_files:
    raise FileNotFoundError("[!] Tidak ditemukan file topologi_*.csv di folder data.")

all_topos = [int(f.split("_")[1].split(".")[0]) for f in csv_files]
print(f"[INFO] Topologi terpakai (ALL): {len(csv_files)} file → {all_topos}\n")

# ====================== GABUNG SEMUA CSV ====================== #
df_list = []
for f in csv_files:
    path = os.path.join(data_dir, f)
    topo_num = int(f.split("_")[1].split(".")[0])
    temp = pd.read_csv(path)
    temp["topologi"] = topo_num
    df_list.append(temp)

df_all = pd.concat(df_list, ignore_index=True).drop_duplicates().reset_index(drop=True)
print(f"[✓] Dataset gabungan (ALL): {df_all.shape[0]} baris dari {len(csv_files)} file.\n")

# ====================== FITUR & TARGET ====================== #
target_cols = [
    "RouterIDMismatch", "HelloMismatch", "DeadMismatch", "NetworkTypeMismatch",
    "AreaMismatch", "AuthMismatch", "AuthKeyMismatch", "MTUMismatch",
    "PassiveMismatch", "RedistributeMismatch"
]

exclude_cols = [
    "router_a", "router_b", "interface_a", "interface_b", "ip_a", "ip_b",
    "subnet_a", "subnet_b", "neighbor_a", "neighbor_b", "topologi"
] + target_cols

feature_cols = [c for c in df_all.columns if c not in exclude_cols]

# Encode fitur kategorikal pada gabungan data
df_encoded = df_all.copy()
for col in feature_cols:
    if df_encoded[col].dtype == "object":
        le = LabelEncoder()
        df_encoded[col] = le.fit_transform(df_encoded[col].astype(str))

print(f"[INFO] Jumlah fitur: {len(feature_cols)} | Jumlah label: {len(target_cols)}\n")

# ====================== CROSS-VALIDATION PER TOPO ====================== #
results = []
all_y_true, all_y_pred = [], []

print("=== [INFO] Mulai cross-validation per topologi (ALL) ===\n")

for topo in sorted(df_encoded["topologi"].unique()):
    df_train = df_encoded[df_encoded["topologi"] != topo]
    df_test  = df_encoded[df_encoded["topologi"] == topo]

    if df_train.empty or df_test.empty:
        print(f"[SKIP] Topologi {topo} dilewati (data kosong)")
        continue

    X_train, y_train = df_train[feature_cols], df_train[target_cols]
    X_test,  y_test  = df_test[feature_cols],  df_test[target_cols]

    # Pastikan label numerik (0/1)
    y_train = y_train.apply(pd.to_numeric, errors="coerce").fillna(0).astype(int)
    y_test  = y_test.apply(pd.to_numeric, errors="coerce").fillna(0).astype(int)

    if y_train.empty or y_test.empty or y_train.sum().sum() == 0:
        print(f"[SKIP] Topologi {topo} dilewati karena label tidak valid (kosong atau semua nol).")
        continue

    # === Train model === #
    rf = RandomForestClassifier(
        n_estimators=200,
        random_state=42,
        n_jobs=-1
    )
    rf.fit(X_train, y_train)

    y_pred = pd.DataFrame(rf.predict(X_test), columns=target_cols, index=y_test.index)

    # === Evaluasi === #
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average="micro", zero_division=0)
    rec = recall_score(y_test, y_pred, average="micro", zero_division=0)
    f1  = f1_score(y_test, y_pred, average="micro", zero_division=0)

    results.append({
        "topologi": topo,
        "accuracy": acc,
        "precision": prec,
        "recall": rec,
        "f1_score": f1
    })

    all_y_true.append(y_test)
    all_y_pred.append(y_pred)

    print(f"[✓] Topologi {topo:03d} → Acc: {acc:.4f} | F1: {f1:.4f}")

# ====================== HASIL RINGKASAN ====================== #
df_results = pd.DataFrame(results)
if not df_results.empty:
    mean_acc = df_results["accuracy"].mean()
    mean_prec = df_results["precision"].mean()
    mean_rec = df_results["recall"].mean()
    mean_f1 = df_results["f1_score"].mean()
else:
    mean_acc = mean_prec = mean_rec = mean_f1 = float("nan")

# Gabungkan semua prediksi untuk laporan global
if all_y_true and all_y_pred:
    y_true_all = pd.concat(all_y_true, ignore_index=True)
    y_pred_all = pd.concat(all_y_pred, ignore_index=True)
    report = classification_report(y_true_all, y_pred_all, zero_division=0)
else:
    report = "[INFO] Tidak ada data valid untuk classification report."

# ====================== SIMPAN HASIL KE TXT ====================== #
with open(output_txt, "w", encoding="utf-8") as f:
    f.write("=== HASIL RANDOM FOREST CROSS VALIDATION (100/All Topologi) ===\n\n")
    if not df_results.empty:
        f.write(df_results.sort_values('topologi').to_string(index=False))
        f.write("\n\n=== RATA-RATA SELURUH TOPOLOGI (All) ===\n")
        f.write(f"Mean Accuracy : {mean_acc:.4f}\n")
        f.write(f"Mean Precision: {mean_prec:.4f}\n")
        f.write(f"Mean Recall   : {mean_rec:.4f}\n")
        f.write(f"Mean F1-score : {mean_f1:.4f}\n\n")
    else:
        f.write("[!] Tidak ada hasil cross-validation yang valid.\n\n")

    f.write("=== CLASSIFICATION REPORT (Keseluruhan Data All) ===\n")
    f.write(report)
    f.write("\n\nWaktu eksekusi: " + datetime.now().strftime("%Y-%m-%d %H:%M:%S"))

print(f"\n[✓] Semua hasil disimpan ke: {output_txt}")


[INFO] Folder data     : c:\Users\Deore Mufrad\Documents\Tugas Akhir\Automasi\PAKE RULE BASED\9. BACKUP 9 (KODE LAMA & BARU) + (TA)\BARU\Code\Code Rapi Fix\03_Output\Data_ML_Labeled
[INFO] Folder hasil ML : c:\Users\Deore Mufrad\Documents\Tugas Akhir\Automasi\PAKE RULE BASED\9. BACKUP 9 (KODE LAMA & BARU) + (TA)\BARU\Code\Code Rapi Fix\03_Output\Hasil_ML_Cross_Validation

[INFO] Topologi terpakai (ALL): 100 file → [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100]

[✓] Dataset gabungan (ALL): 1600 baris dari 100 file.

[INFO] Jumlah fitur: 22 | Jumlah label: 10

=== [INFO] Mulai cross-validation per topologi (ALL) ===

[✓] Topologi 001 → Acc: 1.0000 