In [1]:
import pandas as pd
import numpy as np
import joblib

from sklearn.metrics import (
    confusion_matrix,
    accuracy_score,
    roc_auc_score,
    precision_score,
    recall_score,
    f1_score,
    matthews_corrcoef,
)

In [2]:
data = pd.read_csv('../data/test_data.csv')

In [3]:
X = data.drop(["diagnosis"],axis = 1)
y = data["diagnosis"]

scaler = joblib.load('../model/scaler.pkl')
X_scaled = scaler.transform(X)

In [4]:
def evaluation_metrics(y_true, y_pred, y_proba=None):
    metrics = {
        "Accuracy": accuracy_score(y_true, y_pred),
        "Precision": precision_score(y_true, y_pred),
        "Recall": recall_score(y_true, y_pred),
        "F1": f1_score(y_true, y_pred),
        "MCC": matthews_corrcoef(y_true, y_pred)
    }

    if isinstance(y_proba, np.ndarray):
        metrics["AUC"] = roc_auc_score(y_true, y_proba)
    else:
        metrics["AUC"] = None

    return metrics

In [5]:
models = {
    "Logistic Regression": '../model/logistic_regression.pkl',
    "Decision Tree Classifier": '../model/decision_tree.pkl',
    "K-Nearest Neighbor Classifier": '../model/knn.pkl',
    "Naive Bayes Classifier": '../model/naive_bayes.pkl',
    "Random Forest": '../model/random_forest.pkl',
    "XGBoost": '../model/xgboost.pkl'
}

In [6]:
models.items()

dict_items([('Logistic Regression', '../model/logistic_regression.pkl'), ('Decision Tree Classifier', '../model/decision_tree.pkl'), ('K-Nearest Neighbor Classifier', '../model/knn.pkl'), ('Naive Bayes Classifier', '../model/naive_bayes.pkl'), ('Random Forest', '../model/random_forest.pkl'), ('XGBoost', '../model/xgboost.pkl')])

In [7]:
results = []

In [8]:
for model_name, model_path in models.items():
    print("Testing", model_name)

    model = joblib.load(model_path)

    if hasattr(model, "predict_proba"):
        y_proba = model.predict_proba(X_scaled)[:, 1]
        y_pred = (y_proba >= 0.5).astype(int)
    else:
        y_proba = None
        y_pred = model.predict(X_scaled)

    metrics = evaluation_metrics(y, y_pred, y_proba)

    results.append({model_name: metrics})

Testing Logistic Regression
Testing Decision Tree Classifier
Testing K-Nearest Neighbor Classifier
Testing Naive Bayes Classifier
Testing Random Forest
Testing XGBoost


In [9]:
print(results)

[{'Logistic Regression': {'Accuracy': 0.9824561403508771, 'Precision': 0.9761904761904762, 'Recall': 0.9761904761904762, 'F1': 0.9761904761904762, 'MCC': 0.9623015873015873, 'AUC': 0.9970238095238094}}, {'Decision Tree Classifier': {'Accuracy': 0.9385964912280702, 'Precision': 0.926829268292683, 'Recall': 0.9047619047619048, 'F1': 0.9156626506024096, 'MCC': 0.8675534786006366, 'AUC': 0.9315476190476191}}, {'K-Nearest Neighbor Classifier': {'Accuracy': 0.956140350877193, 'Precision': 0.9743589743589743, 'Recall': 0.9047619047619048, 'F1': 0.9382716049382716, 'MCC': 0.9058238738943076, 'AUC': 0.982308201058201}}, {'Naive Bayes Classifier': {'Accuracy': 0.9210526315789473, 'Precision': 0.9230769230769231, 'Recall': 0.8571428571428571, 'F1': 0.8888888888888888, 'MCC': 0.8291617197562593, 'AUC': 0.9890873015873015}}, {'Random Forest': {'Accuracy': 0.9649122807017544, 'Precision': 1.0, 'Recall': 0.9047619047619048, 'F1': 0.95, 'MCC': 0.9258200997725514, 'AUC': 0.9953703703703703}}, {'XGBoost

In [10]:
rows = []

for result in results:
    for model_name, metrics in result.items():
        row = {"ML Model Name": model_name}
        row.update(metrics)
        rows.append(row)

In [11]:
results_df = pd.DataFrame(rows)

In [12]:
results_df = results_df[
    [
        "ML Model Name",
        "Accuracy",
        "AUC",
        "Precision",
        "Recall",
        "F1",
        "MCC"
    ]
]

In [13]:
results_df

Unnamed: 0,ML Model Name,Accuracy,AUC,Precision,Recall,F1,MCC
0,Logistic Regression,0.982456,0.997024,0.97619,0.97619,0.97619,0.962302
1,Decision Tree Classifier,0.938596,0.931548,0.926829,0.904762,0.915663,0.867553
2,K-Nearest Neighbor Classifier,0.95614,0.982308,0.974359,0.904762,0.938272,0.905824
3,Naive Bayes Classifier,0.921053,0.989087,0.923077,0.857143,0.888889,0.829162
4,Random Forest,0.964912,0.99537,1.0,0.904762,0.95,0.92582
5,XGBoost,0.973684,0.994048,1.0,0.928571,0.962963,0.944155
