In [15]:
import pandas as pd
from pathlib import Path

In [16]:
CV_RES_ROOT = Path("../data/multi_modal_ppi_results/cross_validation")
HPRD_PATH = CV_RES_ROOT / "hprd_run.csv"
BIOINFER_PATH = CV_RES_ROOT / "bioinfer_run.csv"

In [17]:
hprd_df = pd.read_csv(HPRD_PATH, index_col="Name")
bioinfer_df = pd.read_csv(BIOINFER_PATH, index_col="Name")

In [27]:
MODULE_CHOICES = [
    "TextModule",
    "TextAndGraphModule",
    "TextAndNumModule",
    "TextAndGraphAndNumModule",
    "GraphModule",
    "GraphAndNumModule",
    "NumModule",
]

METRICS_CHOICES = [
    ("val/f1_mean", "Valid F1 score"),
    ("test/prec_mean", "Test F1 score"),
    ("test/rec_mean", "Test F1 score"),
    ("test/f1_mean", "Test F1 score"),
    ("test/auroc_mean", "AUROC")
]

PIVOT_METRIC = METRICS_CHOICES[0][0]

In [28]:
def summarize_cv_results(df: pd.DataFrame, MODULE_CHOICES=MODULE_CHOICES, PIVOT_METRIC=PIVOT_METRIC):
    for module in MODULE_CHOICES:
        print(f"Module: {module}")
        tmp_df = df.query(f"index.str.contains('{module}')").query(f"index.str.startswith('{module[0]}')")
        max_idx = tmp_df[PIVOT_METRIC].idxmax()
        max_row = tmp_df.loc[max_idx]
        for _metric in METRICS_CHOICES:
            print(f"    {_metric[0] + ':':<20}{100*max_row[{_metric[0]}][_metric[0]]:.2f}")


In [29]:
summarize_cv_results(hprd_df)

Module: TextModule
    val/f1_mean:        53.59
    test/prec_mean:     78.61
    test/rec_mean:      72.57
    test/f1_mean:       73.34
    test/auroc_mean:    94.55
Module: TextAndGraphModule
    val/f1_mean:        52.08
    test/prec_mean:     87.01
    test/rec_mean:      74.39
    test/f1_mean:       78.21
    test/auroc_mean:    95.34
Module: TextAndNumModule
    val/f1_mean:        56.61
    test/prec_mean:     75.06
    test/rec_mean:      72.18
    test/f1_mean:       71.98
    test/auroc_mean:    92.61
Module: TextAndGraphAndNumModule
    val/f1_mean:        54.57
    test/prec_mean:     83.84
    test/rec_mean:      71.03
    test/f1_mean:       75.21
    test/auroc_mean:    94.86
Module: GraphModule
    val/f1_mean:        14.02
    test/prec_mean:     2.57
    test/rec_mean:      27.13
    test/f1_mean:       4.68
    test/auroc_mean:    51.38
Module: GraphAndNumModule
    val/f1_mean:        18.17
    test/prec_mean:     4.00
    test/rec_mean:      30.43
    test/f1_m

In [30]:
summarize_cv_results(bioinfer_df)

Module: TextModule
    val/f1_mean:        75.66
    test/prec_mean:     85.27
    test/rec_mean:      84.69
    test/f1_mean:       84.88
    test/auroc_mean:    96.79
Module: TextAndGraphModule
    val/f1_mean:        74.92
    test/prec_mean:     87.07
    test/rec_mean:      83.28
    test/f1_mean:       85.05
    test/auroc_mean:    95.63
Module: TextAndNumModule
    val/f1_mean:        76.51
    test/prec_mean:     87.12
    test/rec_mean:      83.10
    test/f1_mean:       85.05
    test/auroc_mean:    96.16
Module: TextAndGraphAndNumModule
    val/f1_mean:        76.48
    test/prec_mean:     87.07
    test/rec_mean:      84.66
    test/f1_mean:       85.80
    test/auroc_mean:    96.79
Module: GraphModule
    val/f1_mean:        4.47
    test/prec_mean:     5.71
    test/rec_mean:      1.26
    test/f1_mean:       2.07
    test/auroc_mean:    51.06
Module: GraphAndNumModule
    val/f1_mean:        22.37
    test/prec_mean:     16.43
    test/rec_mean:      25.61
    test/f1_me