    # ACC: Accuracy	-> (Number of Correct Predictions) / (Total Number of Predictions
    # FPR: False Positive Rate	-> Number of FP / (FP +TN)
    # AUC: Area under the ROC curve	-> scikit-learn
    # Ya: Yield of actives 	-> TP/(TP+FP)
    # EF: Enrichment Factor	-> ((TP)/(TP+FP))((tp+fn)/(tp+tn+fp+fn))
    # REF: Relative Enrichment Factor -> 100*tp/min(tp+fp,tp+fn)

In [18]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score, roc_curve, RocCurveDisplay, auc

import os 
from pathlib import Path

In [19]:
# paths
base_dir = Path(os.getcwd())/"implementation"
result_dir = base_dir / "data/results/ACHE"

In [20]:
def calc_metrics_from_result_df(df: pd.DataFrame,name:str):
    label = df["LABEL"]
    pred = df["PRED"]

    true_positive = 0
    false_positive = 0
    true_negative = 0
    false_negative = 0

    for i, v in enumerate(pred):
        if v == 1 and label[i] == 1:
            true_positive += 1
        elif v == 1 and label[i] == 0:
            false_positive += 1
        elif v == 0 and label[i] == 0:
            true_negative += 1
        else:
            false_negative += 1

    resultdf = pd.DataFrame()
    resultdf["name"] = [name]
    resultdf["ACC"] = [(true_positive + true_negative) / len(label)]
    resultdf["FPR"] = [(false_positive) / (false_positive + true_negative)]
    resultdf["AUC"] = roc_auc_score(label, pred)
    resultdf["EF"] = [
        ((true_positive) / (true_positive + false_positive))
        / ((true_positive + false_negative) / (len(label)))
    ]
    resultdf["REF"] = [
        (100 * true_positive)
        / min((true_positive + false_positive), (true_positive + false_negative))
    ]
    return resultdf


def print_roc_curve(df: pd.DataFrame):
    label = df["LABEL"]
    pred = df["PRED"]

    fpr, tpr, thresholds = roc_curve(label, pred)
    roc_auc = auc(fpr, tpr)
    display = RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc,                                          estimator_name='example estimator')
    display.plot()  
    plt.show() 

### load results


In [21]:
res = {
    "baseline_rf": pd.read_csv(result_dir / "baseline_rf.csv"),
    "baseline_nn": pd.read_csv(result_dir / "baseline_nn.csv"),
    "baseline_knn": pd.read_csv(result_dir / "baseline_knn.csv"),
    "fe_rf_mdi_nn": pd.read_csv(result_dir / "fe_rf_mdi_nn.csv"),
    "fe_rf_per_nn": pd.read_csv(result_dir / "fe_rf_per_nn.csv"),
    "fe_rf_mdi_knn": pd.read_csv(result_dir / "fe_rf_mdi_knn.csv"),
    "fe_rf_per_knn": pd.read_csv(result_dir / "fe_rf_per_knn.csv"),
    "fe_rf_mdi_rf": pd.read_csv(result_dir / "fe_rf_mdi_rf.csv"),
    "fe_rf_per_rf": pd.read_csv(result_dir / "fe_rf_per_rf.csv"),
    "fe_freq_rf": pd.read_csv(result_dir / "fe_freq_rf.csv"),
    "fe_freq_nn": pd.read_csv(result_dir / "fe_freq_nn.csv"),
    "fe_freq_knn": pd.read_csv(result_dir / "fe_freq_knn.csv"),
    "fe_nonhydrop_rf": pd.read_csv(result_dir / "fe_nonhydrop_rf.csv"),
    "fe_nonhydrop_nn": pd.read_csv(result_dir / "fe_nonhydrop_nn.csv"),
    "fe_nonhydrop_knn": pd.read_csv(result_dir / "fe_nonhydrop_knn.csv"),
    "fe_pca_rf": pd.read_csv(result_dir / "fe_pca_rf.csv"),
    "fe_pca_nn": pd.read_csv(result_dir / "fe_pca_nn.csv"),
    "fe_pca_knn": pd.read_csv(result_dir / "fe_pca_knn.csv"),
    "fe_smote_rf": pd.read_csv(result_dir / "fe_smote_rf.csv"),
    "fe_smote_nn": pd.read_csv(result_dir / "fe_smote_nn.csv"),
    "fe_smote_knn": pd.read_csv(result_dir / "fe_smote_knn.csv"),
}


In [22]:

r = pd.DataFrame()
for k,v in res.items():
    r = pd.concat([r,calc_metrics_from_result_df(v,name=k)])

r.sort_values("ACC")

Unnamed: 0,name,ACC,FPR,AUC,EF,REF
0,fe_rf_mdi_knn,0.551495,0.430657,0.552964,1.098722,59.863946
0,fe_rf_mdi_rf,0.551495,0.350365,0.559574,1.130585,61.6
0,fe_smote_knn,0.684385,0.576642,0.662898,1.196626,90.243902
0,baseline_knn,0.684385,0.576642,0.662898,1.196626,90.243902
0,fe_freq_knn,0.710963,0.518248,0.692095,1.240339,90.243902
0,fe_pca_knn,0.724252,0.408759,0.713303,1.302824,83.536585
0,fe_pca_nn,0.727575,0.364964,0.719957,1.331144,80.487805
0,fe_nonhydrop_knn,0.730897,0.423358,0.718199,1.300435,85.97561
0,fe_rf_mdi_nn,0.747508,0.343066,0.740053,1.361398,82.317073
0,fe_rf_per_knn,0.757475,0.430657,0.741989,1.317248,91.463415
