    # ACC: Accuracy	-> (Number of Correct Predictions) / (Total Number of Predictions
    # FPR: False Positive Rate	-> Number of FP / (FP +TN)
    # AUC: Area under the ROC curve	-> scikit-learn
    # Ya: Yield of actives 	-> TP/(TP+FP)
    # EF: Enrichment Factor	-> ((TP)/(TP+FP))((tp+fn)/(tp+tn+fp+fn))
    # REF: Relative Enrichment Factor -> 100*tp/min(tp+fp,tp+fn)

In [118]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score, roc_curve, RocCurveDisplay, auc

In [119]:
# paths
base_dir = "/home/bac/activity_prediction/implementation/"
data_dir = base_dir + "data/results/ACHE/"

In [120]:
def calc_metrics_from_result_df(df: pd.DataFrame,name:str):
    label = df["LABEL"]
    pred = df["PRED"]

    true_positive = 0
    false_positive = 0
    true_negative = 0
    false_negative = 0

    for i, v in enumerate(pred):
        if v == 1 and label[i] == 1:
            true_positive += 1
        elif v == 1 and label[i] == 0:
            false_positive += 1
        elif v == 0 and label[i] == 0:
            true_negative += 1
        else:
            false_negative += 1

    resultdf = pd.DataFrame()
    resultdf["name"] = [name]
    resultdf["ACC"] = [(true_positive + true_negative) / len(label)]
    resultdf["FPR"] = [(false_positive) / (false_positive + true_negative)]
    resultdf["AUC"] = roc_auc_score(label, pred)
    resultdf["EF"] = [
        ((true_positive) / (true_positive + false_positive))
        / ((true_positive + false_negative) / (len(label)))
    ]
    resultdf["REF"] = [
        (100 * true_positive)
        / min((true_positive + false_positive), (true_positive + false_negative))
    ]
    return resultdf


def print_roc_curve(df: pd.DataFrame):
    label = df["LABEL"]
    pred = df["PRED"]

    fpr, tpr, thresholds = roc_curve(label, pred)
    roc_auc = auc(fpr, tpr)
    display = RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc,                                          estimator_name='example estimator')
    display.plot()  
    plt.show() 

### load results


In [121]:
res = {
    "baseline_rf": pd.read_csv(data_dir + "baseline_rf.csv"),
    "baseline_nn": pd.read_csv(data_dir + "baseline_nn.csv"),
    "baseline_knn": pd.read_csv(data_dir + "baseline_knn.csv"),
    "fe_rf_mdi_nn": pd.read_csv(data_dir + "fe_rf_mdi_nn.csv"),
    "fe_rf_per_nn": pd.read_csv(data_dir + "fe_rf_per_nn.csv"),
    "fe_rf_mdi_knn": pd.read_csv(data_dir + "fe_rf_mdi_knn.csv"),
    "fe_rf_per_knn": pd.read_csv(data_dir + "fe_rf_per_knn.csv"),
    "fe_rf_mdi_rf": pd.read_csv(data_dir + "fe_rf_mdi_rf.csv"),
    "fe_rf_per_rf": pd.read_csv(data_dir + "fe_rf_per_rf.csv"),
}


In [122]:

r = pd.DataFrame()
for k,v in res.items():
    r = pd.concat([r,calc_metrics_from_result_df(v,name=k)])

r

Unnamed: 0,name,ACC,FPR,AUC,EF,REF
0,baseline_rf,0.803987,0.335766,0.792483,1.406803,92.073171
0,baseline_nn,0.767442,0.29927,0.76195,1.407809,82.317073
0,baseline_knn,0.684385,0.576642,0.662898,1.196626,90.243902
0,fe_rf_mdi_nn,0.754153,0.313869,0.748553,1.386953,81.097561
0,fe_rf_per_nn,0.777409,0.284672,0.772298,1.426341,82.926829
0,fe_rf_mdi_knn,0.551495,0.430657,0.552964,1.098722,59.863946
0,fe_rf_per_knn,0.757475,0.430657,0.741989,1.317248,91.463415
0,fe_rf_mdi_rf,0.528239,0.350365,0.538232,1.088776,59.322034
0,fe_rf_per_rf,0.803987,0.291971,0.796088,1.43853,88.414634
