In [None]:
import pandas as pd
import numpy as np
import joblib
import warnings
import pickle
import bz2
from glob import glob

from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import MACCSkeys, Descriptors, PandasTools, Draw
from rdkit.Chem.Draw import IPythonConsole
from rdkit.DataStructs import ExplicitBitVect

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    confusion_matrix, accuracy_score, f1_score,
    roc_auc_score, cohen_kappa_score
)
from sklearn.model_selection import (
    train_test_split, RepeatedStratifiedKFold,
    ShuffleSplit, StratifiedShuffleSplit
)

from standardiser import break_bonds, neutralise, rules, unsalt
from standardiser.utils import StandardiseException, sanity_check

# Optional: untuk autoreload jika di Jupyter
# %reload_ext autoreload
# %autoreload 2

# Suppress warnings
warnings.filterwarnings("ignore")
warnings.warn = lambda *args, **kwargs: None

In [None]:
import pandas as pd

# Fungsi untuk ubah string ke list of int
def string_to_list(bit_string):
    if isinstance(bit_string, str):
        return list(map(int, bit_string.strip('[]').split(', ')))
    return bit_string

# Load test set dari Excel
test_file = r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Test_set_Dermal_balanced_with_fingerprints_sorted_with_RDKit_and_CDK_features.xlsx"
test_df = pd.read_excel(test_file)

# Konversi kolom deskriptor jika masih berupa string
for col in ['Morgan_Descriptors', 'MACCS_Descriptors', 'APF_Descriptors']:
    if col in test_df.columns:
        if isinstance(test_df[col].iloc[0], str):
            test_df[col] = test_df[col].apply(string_to_list)

# Tampilkan hasil
print("Test DataFrame:")
print(test_df.head())


In [None]:
# Melihat nama-nama kolom yang ada di DataFrame
print("Daftar kolom dalam test_df:")
print(test_df.columns.tolist())

In [None]:
# Cek jumlah NaN sebelum dihapus
nan_before = test_df.isnull().sum().sum()

# Hapus baris yang mengandung NaN
test_df = test_df.dropna()

# Tampilkan informasi jumlah NaN
if nan_before > 0:
    print(f"Total nilai NaN yang dihapus dari test_df: {nan_before}")
else:
    print("Tidak ada nilai NaN yang ditemukan dalam test_df.")

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder

# Buat salinan kolom Outcome
S = test_df['Outcome'].copy()

# Plot distribusi kelas
fig, ax = plt.subplots()
ax = S.hist(bins=np.arange(-0.5, 5), edgecolor='black')
ax.set_xticks(range(0, 5))
ax.set_xlabel("Outcome Class")
ax.set_ylabel("Count")
ax.set_title("Distribusi Outcome (Test Set)")
plt.show()

# Encoding label
le = LabelEncoder()
outcomes = np.unique(test_df['Outcome'])
le.fit(outcomes)
y = le.transform(test_df['Outcome'])

# Info distribusi
print("Classes                          :", outcomes)
print("Number of cpds in each class     :", np.bincount(y))
print("Total number of cpds             :", len(y))

# Ganti label Outcome menjadi angka (mapping)
S = test_df['Outcome']
info = {}
for i, cls in enumerate(S.unique()):
    info[cls] = i
    S = S.replace(cls, i)

# Optional: simpan mapping info kalau mau pakai nanti
print("Label mapping (kelas ‚Üí angka):", info)

In [None]:
import numpy as np
from sklearn.preprocessing import LabelEncoder

# Ambil label Outcome dari test_df
S = test_df['Outcome'].copy()

# Encode label ke angka
info = {}
for i, cls in enumerate(S.unique()):
    info[cls] = i
    S = S.replace(cls, i)

# Konversi label ke numpy array bertipe int32
y_test = np.int32(S)

# Konversi MACCS, Morgan, dan APF Descriptors ke array numpy
def convert_to_array(desc_list):
    return np.array([eval(desc) if isinstance(desc, str) else desc for desc in desc_list])

x_test_macckeys = convert_to_array(test_df['MACCS_Descriptors'])
x_test_morgan = convert_to_array(test_df['Morgan_Descriptors'])
x_test_apf = convert_to_array(test_df['APF_Descriptors'])  # <-- tambahan APF

# Cek isi
print("Label classes (encoded)       :", info)
print("Jumlah senyawa per kelas      :", np.bincount(y_test))
print("Total jumlah senyawa (test)   :", len(y_test))
print("x_test_macckeys shape         :", x_test_macckeys.shape)
print("x_test_morgan shape           :", x_test_morgan.shape)
print("x_test_apf shape              :", x_test_apf.shape)  # <-- cek APF


In [None]:
x_rdkitcdk = test_df.drop(columns=['SMILES',
    'Outcome',
    'Morgan_Descriptors',
    'MACCS_Descriptors',
    'APF_Descriptors'])
x_rdkitcdk

In [None]:
print(x_rdkitcdk)

In [None]:
x_rdkitcdk  = x_rdkitcdk.apply(lambda row: row.values, axis=1).tolist()

# Add the new column 'rdkit_cdk' to test_df
test_df['rdkit_cdk'] = x_rdkitcdk 

# Display the updated DataFrame
print(test_df)

In [None]:
y_test = np.int32(S)
x_test_morgan = np.array(list(test_df['Morgan_Descriptors']))
x_test_macckeys = np.array(list(test_df['MACCS_Descriptors']))
x_test_rdkit_cdk = np.array(list(test_df['rdkit_cdk']))
x_test_apf = np.array(list(test_df['APF_Descriptors']))  # <-- tambahan APF


In [None]:
y_test= np.int32((S))
x_test_rdkit_cdk

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score, f1_score, classification_report


In [None]:
y_true = test_df['Outcome'].astype(int)  # Ensure it's of integer type, suitable for metrics calculation


In [None]:
test_df

In [None]:
def convert_list_str_to_float(lst):
    return [float(x) for x in lst if x != '' and x is not None]

test_df['rdkit_cdk'] = test_df['rdkit_cdk'].apply(convert_list_str_to_float)

X_rdkitcdk = np.array(test_df['rdkit_cdk'].tolist(), dtype=float)

# EVALUASI DESCRIPTORS

In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score
import joblib
import os

# Load semua model Hepatotoxicity
# ==========================
models_info = {
    'SVM': {
        'Morgan': r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_SVM_Morgan.pkl",
        'MACCS': r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_SVM_MACCS.pkl",
        'APF':   r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_SVM_APF.pkl",
        'RDKitCDK': r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_svm_rdkitcdk.pkl"
    },
    'RF': {
        'Morgan': r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_rf_morgan.pkl",
        'MACCS':  r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_rf_macckeys.pkl",
        'APF':    r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_rf_apf.pkl",
        'RDKitCDK': r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_rf_rdkitcdk.pkl"
    },
    'XGB': {
        'Morgan': r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_xgb_morgan.pkl",
        'MACCS':  r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_xgb_maccs.pkl",
        'APF':    r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_xgb_apf.pkl",
        'RDKitCDK': r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_xgb_rdkitcdk.pkl"
    }
}

loaded_models = {}
for algo, fps in models_info.items():
    loaded_models[algo] = {}
    for fp_name, path in fps.items():
        loaded_models[algo][fp_name] = joblib.load(path)

print("Semua model Dermal Toxicity berhasil dimuat.\n")

In [None]:
test_set = r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Test_set_Dermal_balanced_with_fingerprints_sorted_with_RDKit_and_CDK_features.xlsx"

In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score
import joblib
import itertools
import os
import ast  # untuk konversi string ke list

# ==========================
# Load test sets
# ==========================
test_files = r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Test_set_Dermal_balanced_with_fingerprints_sorted_with_RDKit_and_CDK_features.xlsx"
test_df = pd.read_excel(test_files)

# ==========================
# Drop kolom yang tidak digunakan & buat X_rdkitcdk
# ==========================
drop_cols = ['SMILES', 'Morgan_Descriptors', 'MACCS_Descriptors', 'APF_Descriptors', 'Outcome']
x_rdkitcdk_test = test_df.drop(columns=drop_cols)
y_true = test_df['Outcome'].astype(int).values

# Load semua model Hepatotoxicity
# ==========================
models_info = {
    'SVM': {
        'Morgan': r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_SVM_Morgan.pkl",
        'MACCS': r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_SVM_MACCS.pkl",
        'APF':   r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_SVM_APF.pkl",
        'RDKitCDK': r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_svm_rdkitcdk.pkl"
    },
    'RF': {
        'Morgan': r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_rf_morgan.pkl",
        'MACCS':  r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_rf_macckeys.pkl",
        'APF':    r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_rf_apf.pkl",
        'RDKitCDK': r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_rf_rdkitcdk.pkl"
    },
    'XGB': {
        'Morgan': r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_xgb_morgan.pkl",
        'MACCS':  r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_xgb_maccs.pkl",
        'APF':    r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_xgb_apf.pkl",
        'RDKitCDK': r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_xgb_rdkitcdk.pkl"
    }
}


loaded_models = {}
for algo, fps in models_info.items():
    loaded_models[algo] = {}
    for fp_name, path in fps.items():
        loaded_models[algo][fp_name] = joblib.load(path)

print("Semua model Hepatotoxicity berhasil dimuat.\n")

# ==========================
# Fungsi bootstrap metrics
# ==========================
def bootstrap_metrics(probs, y_true, n_bootstrap=1000, ci=0.95, random_state=42):
    rng = np.random.RandomState(random_state)
    preds = (probs > 0.5).astype(int)
    
    acc_list, sen_list, spe_list, auc_list = [], [], [], []
    tn_list, fp_list, fn_list, tp_list = [], [], [], []
    n_samples = len(y_true)

    for _ in range(n_bootstrap):
        idx = rng.choice(np.arange(n_samples), size=n_samples, replace=True)
        y_sample = y_true[idx]
        p_sample = probs[idx]
        pred_sample = preds[idx]

        tn, fp, fn, tp = confusion_matrix(y_sample, pred_sample).ravel()
        tn_list.append(tn)
        fp_list.append(fp)
        fn_list.append(fn)
        tp_list.append(tp)

        acc_list.append(accuracy_score(y_sample, pred_sample))
        sen_list.append(tp / (tp + fn) if (tp + fn) > 0 else 0)
        spe_list.append(tn / (tn + fp) if (tn + fp) > 0 else 0)
        try:
            auc_list.append(roc_auc_score(y_sample, p_sample))
        except:
            auc_list.append(np.nan)

    lower = (1 - ci)/2
    upper = 1 - lower

    def format_metric(values):
        mean_val = np.nanmean(values)
        low = np.nanpercentile(values, 100*lower)
        high = np.nanpercentile(values, 100*upper)
        half_width = (high - low) / 2
        return f"{mean_val:.2f} ¬± {half_width:.2f}"

    metrics = {
        'Accuracy': format_metric(acc_list),
        'Sensitivity': format_metric(sen_list),
        'Specificity': format_metric(spe_list),
        'AUC': format_metric(auc_list),
        'TN': int(np.mean(tn_list)),
        'FP': int(np.mean(fp_list)),
        'FN': int(np.mean(fn_list)),
        'TP': int(np.mean(tp_list))
    }
    return metrics

# ==========================
# Buat semua kombinasi model (81 kombinasi)
# ==========================
fingerprints = ['Morgan', 'MACCS', 'APF', 'RDKitCDK']
algorithms = ['SVM', 'RF', 'XGB']
all_combinations = list(itertools.product(algorithms, repeat=len(fingerprints)))
print(f"Jumlah kombinasi model: {len(all_combinations)}")  # 81

# ==========================
# Fungsi untuk mengubah string list menjadi array numerik
# ==========================
def convert_to_array(series):
    return np.array(series.apply(ast.literal_eval).tolist())

# ==========================
# Hitung probabilitas consensus & metrics untuk tiap kombinasi
# ==========================
results_list = []

for combo in all_combinations:
    probs_list = []
    for fp, algo in zip(fingerprints, combo):
        model = loaded_models[algo][fp]
        if fp == 'RDKitCDK':
            X_input = x_rdkitcdk_test.values
        else:
            X_input = convert_to_array(test_df[f"{fp}_Descriptors"])
        probs = model.predict_proba(X_input)[:,1]
        probs_list.append(probs)

    consensus_probs = np.mean(probs_list, axis=0)
    metrics = bootstrap_metrics(consensus_probs, y_true)
    metrics['Combination'] = "_".join([f"{fp}-{algo}" for fp, algo in zip(fingerprints, combo)])
    metrics['Type'] = "Consensus"
    results_list.append(metrics)

# ==========================
# Performa individual model
# ==========================
for algo in algorithms:
    for fp in fingerprints:
        model = loaded_models[algo][fp]
        if fp == 'RDKitCDK':
            X_input = x_rdkitcdk_test.values
        else:
            X_input = convert_to_array(test_df[f"{fp}_Descriptors"])
        probs = model.predict_proba(X_input)[:,1]
        metrics = bootstrap_metrics(probs, y_true)
        metrics['Combination'] = f"{fp}-{algo}"
        metrics['Type'] = "Individual"
        results_list.append(metrics)

# ==========================
# Simpan ke Excel (urutkan berdasarkan AUC)
# ==========================
metrics_df = pd.DataFrame(results_list)

# Ekstrak nilai AUC (tanpa CI) untuk sorting
metrics_df["AUC_val"] = metrics_df["AUC"].str.extract(r"([0-9.]+)").astype(float)

metrics_df = metrics_df.sort_values(by="AUC_val", ascending=False).drop(columns=["AUC_val"])

metrics_df = metrics_df[['Type','Combination','AUC','Accuracy','Sensitivity','Specificity','TN','FP','FN','TP']]

save_path = r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Evaluation\Evaluation_Full_Consensus_and_Individual_with_CM.xlsx"
os.makedirs(os.path.dirname(save_path), exist_ok=True)
metrics_df.to_excel(save_path, index=False)

print(f"Hasil {len(metrics_df)} model (Consensus + Individual) telah diurutkan dan disimpan ke: {save_path}")


# Without CI 95%

In [None]:
# ================================
# ÎùºÏù¥Î∏åÎü¨Î¶¨ ÏûÑÌè¨Ìä∏
# ================================
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score
import joblib
import itertools
import os
import ast  # Î¨∏ÏûêÏó¥ÏùÑ Î¶¨Ïä§Ìä∏/Î∞∞Ïó¥Î°ú Î≥ÄÌôòÌï† Îïå ÏÇ¨Ïö©

# ================================
# ÌÖåÏä§Ìä∏ÏÖã Í≤ΩÎ°ú ÏÑ§Ï†ï (ÌååÏùº 1Í∞ú ÎòêÎäî Ïó¨Îü¨ Í∞ú Í∞ÄÎä•)
# ================================
test_files = r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Test_set_Dermal_balanced_with_fingerprints_sorted_with_RDKit_and_CDK_features.xlsx"

# Î¨∏ÏûêÏó¥Ïù¥Î©¥ Î¶¨Ïä§Ìä∏Î°ú Î≥ÄÌôò
if isinstance(test_files, str):
    test_files = [test_files]

# ================================
# Î™®Îç∏ Î∂àÎü¨Ïò§Í∏∞
# ================================
models_info = {
    'SVM': {
        'Morgan': r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_SVM_Morgan.pkl",
        'MACCS': r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_SVM_MACCS.pkl",
        'APF':   r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_SVM_APF.pkl",
        'RDKitCDK': r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_svm_rdkitcdk.pkl"
    },
    'RF': {
        'Morgan': r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_rf_morgan.pkl",
        'MACCS':  r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_rf_macckeys.pkl",
        'APF':    r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_rf_apf.pkl",
        'RDKitCDK': r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_rf_rdkitcdk.pkl"
    },
    'XGB': {
        'Morgan': r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_xgb_morgan.pkl",
        'MACCS':  r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_xgb_maccs.pkl",
        'APF':    r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_xgb_apf.pkl",
        'RDKitCDK': r"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Model\Dermal_xgb_rdkitcdk.pkl"
    }
}

loaded_models = {}
for algo, fps in models_info.items():
    loaded_models[algo] = {}
    for fp_name, path in fps.items():
        loaded_models[algo][fp_name] = joblib.load(path)  # Î™®Îç∏ Î°úÎìú

print("‚úÖ Î™®Îì† Î™®Îç∏Ïù¥ ÏÑ±Í≥µÏ†ÅÏúºÎ°ú Î°úÎìúÎêòÏóàÏäµÎãàÎã§.\n")

# ================================
# ÌèâÍ∞Ä ÏßÄÌëú Í≥ÑÏÇ∞ Ìï®Ïàò Ï†ïÏùò
# ================================
def compute_metrics(probs, y_true):
    preds = (probs > 0.5).astype(int)  # ÌôïÎ•† ‚Üí ÌÅ¥ÎûòÏä§ Î≥ÄÌôò
    tn, fp, fn, tp = confusion_matrix(y_true, preds).ravel()
    acc = accuracy_score(y_true, preds)
    sen = tp / (tp + fn) if (tp + fn) > 0 else 0
    spe = tn / (tn + fp) if (tn + fp) > 0 else 0
    try:
        auc = roc_auc_score(y_true, probs)
    except:
        auc = np.nan
    return {
        'Accuracy': round(acc, 3),
        'Sensitivity': round(sen, 3),
        'Specificity': round(spe, 3),
        'AUC': round(auc, 3),
        'TN': tn, 'FP': fp, 'FN': fn, 'TP': tp
    }

# ================================
# Î¨∏ÏûêÏó¥Î°ú Ï†ÄÏû•Îêú descriptor ‚Üí numpy array Î≥ÄÌôò Ìï®Ïàò
# ================================
def convert_to_array(series):
    return np.array(series.apply(ast.literal_eval).tolist())

# ================================
# Î™®Îì† Î™®Îç∏ Ï°∞Ìï© ÏÉùÏÑ± (81Í∞ú)
# ================================
fingerprints = ['Morgan', 'MACCS', 'APF', 'RDKitCDK']
algorithms = ['SVM', 'RF', 'XGB']
all_combinations = list(itertools.product(algorithms, repeat=len(fingerprints)))

# ================================
# ÌÖåÏä§Ìä∏ÏÖã Ï≤òÎ¶¨ Î£®ÌîÑ
# ================================
for test_file in test_files:
    print(f"üîç ÌååÏùº Ï≤òÎ¶¨ Ï§ë: {test_file}")
    test_df = pd.read_excel(test_file)
    drop_cols = ['SMILES', 'Morgan_Descriptors', 'MACCS_Descriptors', 'APF_Descriptors', 'Outcome']
    x_rdkitcdk_test = test_df.drop(columns=drop_cols)  # RDKit+CDK ÌäπÏßï
    y_true = test_df['Outcome'].astype(int).values
    results_list = []

    # 81Í∞ú Ï°∞Ìï© Í∏∞Î∞ò Ïª®ÏÑºÏÑúÏä§ ÏòàÏ∏°
    for combo in all_combinations:
        probs_list = []
        for fp, algo in zip(fingerprints, combo):
            model = loaded_models[algo][fp]
            X_input = x_rdkitcdk_test.values if fp == 'RDKitCDK' else convert_to_array(test_df[f"{fp}_Descriptors"])
            probs = model.predict_proba(X_input)[:, 1]
            probs_list.append(probs)

        consensus_probs = np.mean(probs_list, axis=0)
        metrics = compute_metrics(consensus_probs, y_true)
        metrics['Combination'] = "_".join([f"{fp}-{algo}" for fp, algo in zip(fingerprints, combo)])
        metrics['Type'] = "Consensus"
        results_list.append(metrics)

    # Í∞úÎ≥Ñ Î™®Îç∏ ÌèâÍ∞Ä
    for algo in algorithms:
        for fp in fingerprints:
            model = loaded_models[algo][fp]
            X_input = x_rdkitcdk_test.values if fp == 'RDKitCDK' else convert_to_array(test_df[f"{fp}_Descriptors"])
            probs = model.predict_proba(X_input)[:, 1]
            metrics = compute_metrics(probs, y_true)
            metrics['Combination'] = f"{fp}-{algo}"
            metrics['Type'] = "Individual"
            results_list.append(metrics)

    # Í≤∞Í≥º Ï†ÄÏû•
    metrics_df = pd.DataFrame(results_list).sort_values(by="AUC", ascending=False)
    metrics_df = metrics_df[['Type', 'Combination', 'AUC', 'Accuracy', 'Sensitivity', 'Specificity', 'TN', 'FP', 'FN', 'TP']]
    set_name = os.path.splitext(os.path.basename(test_file))[0]
    save_path = fr"C:\Fauzan\Manuskrip QSAR 1\Major Revision\Acute Dermal Toxicity (manual split)\Evaluation\NoCI95_Evaluation_{set_name}_Consensus_and_Individual.xlsx"
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    metrics_df.to_excel(save_path, index=False)

    print(f"‚úÖ {set_name}Ïùò Î™®Îì† Î™®Îç∏({len(metrics_df)}) Í≤∞Í≥ºÍ∞Ä Ï†ÄÏû•ÎêòÏóàÏäµÎãàÎã§:\n   {save_path}\n")
