In [1]:
import pandas as pd
from pathlib import Path

In [6]:
SINGLE_RES_ROOT = Path("../data/multi_modal_ppi_results/single_run")
CV_RES_ROOT = Path("../data/multi_modal_ppi_results/cross_validation")
SINGLE_HPRD_PATH = SINGLE_RES_ROOT / "hprd_run.csv"
SINGLE_BIOINFER_PATH = SINGLE_RES_ROOT / "bioinfer_run.csv"
CV_HPRD_PATH = CV_RES_ROOT / "hprd_run.csv"
CV_BIOINFER_PATH = CV_RES_ROOT / "bioinfer_run.csv"

In [7]:
single_hprd_df = pd.read_csv(SINGLE_HPRD_PATH, index_col="Name")
single_bioinfer_df = pd.read_csv(SINGLE_BIOINFER_PATH, index_col="Name")
cv_hprd_df = pd.read_csv(CV_HPRD_PATH, index_col="Name")
cv_bioinfer_df = pd.read_csv(CV_BIOINFER_PATH, index_col="Name")

In [8]:
MODULE_CHOICES = [
    "TextModule",
    "TextAndGraphModule",
    "TextAndNumModule",
    "TextAndGraphAndNumModule",
    "GraphModule",
    "GraphAndNumModule",
    "NumModule",
]

METRICS_CHOICES = [
    ("val/f1_mean", "Valid F1 score"),
    ("test/prec_mean", "Test F1 score"),
    ("test/rec_mean", "Test F1 score"),
    ("test/f1_mean", "Test F1 score"),
    ("test/auroc_mean", "AUROC")
]

PIVOT_METRIC = METRICS_CHOICES[0][0]

In [11]:
def summarize_results(df: pd.DataFrame, MODULE_CHOICES=MODULE_CHOICES, PIVOT_METRIC=PIVOT_METRIC):
    for module in MODULE_CHOICES:
        print(f"Module: {module}")
        tmp_df = df.query(f"index.str.contains('{module}')").query(f"index.str.startswith('{module[0]}')")
        max_idx = tmp_df[PIVOT_METRIC].idxmax()
        max_row = tmp_df.loc[max_idx]
        for _metric in METRICS_CHOICES:
            print(f"    {_metric[0] + ':':<20}{100*max_row[{_metric[0]}][_metric[0]]:.2f}")


In [14]:
print("Single run: Result for hprd")
summarize_results(single_hprd_df)
print("-"*30)
print("Single run: Result for bioinfer")
summarize_results(single_bioinfer_df)

Single run: Result for hprd
Module: TextModule
    val/f1_mean:        52.49
    test/prec_mean:     93.33
    test/rec_mean:      70.00
    test/f1_mean:       80.00
    test/auroc_mean:    96.25
Module: TextAndGraphModule
    val/f1_mean:        60.19
    test/prec_mean:     87.50
    test/rec_mean:      70.00
    test/f1_mean:       77.78
    test/auroc_mean:    98.37
Module: TextAndNumModule
    val/f1_mean:        50.64
    test/prec_mean:     78.95
    test/rec_mean:      75.00
    test/f1_mean:       76.92
    test/auroc_mean:    97.27
Module: TextAndGraphAndNumModule
    val/f1_mean:        62.50
    test/prec_mean:     82.35
    test/rec_mean:      70.00
    test/f1_mean:       75.68
    test/auroc_mean:    97.10
Module: GraphModule
    val/f1_mean:        3.03
    test/prec_mean:     0.00
    test/rec_mean:      0.00
    test/f1_mean:       0.00
    test/auroc_mean:    47.70
Module: GraphAndNumModule
    val/f1_mean:        13.07
    test/prec_mean:     0.00
    test/rec_mean

In [15]:
print("CV run: Result for hprd")
summarize_results(cv_hprd_df)
print("-"*30)
print("CV run: Result for bioinfer")
summarize_results(cv_bioinfer_df)

CV run: Result for hprd
Module: TextModule
    val/f1_mean:        53.59
    test/prec_mean:     78.61
    test/rec_mean:      72.57
    test/f1_mean:       73.34
    test/auroc_mean:    94.55
Module: TextAndGraphModule
    val/f1_mean:        52.08
    test/prec_mean:     87.01
    test/rec_mean:      74.39
    test/f1_mean:       78.21
    test/auroc_mean:    95.34
Module: TextAndNumModule
    val/f1_mean:        56.61
    test/prec_mean:     75.06
    test/rec_mean:      72.18
    test/f1_mean:       71.98
    test/auroc_mean:    92.61
Module: TextAndGraphAndNumModule
    val/f1_mean:        54.57
    test/prec_mean:     83.84
    test/rec_mean:      71.03
    test/f1_mean:       75.21
    test/auroc_mean:    94.86
Module: GraphModule
    val/f1_mean:        14.02
    test/prec_mean:     2.57
    test/rec_mean:      27.13
    test/f1_mean:       4.68
    test/auroc_mean:    51.38
Module: GraphAndNumModule
    val/f1_mean:        18.17
    test/prec_mean:     4.00
    test/rec_mean: 