In [1]:
import pandas as pd

In [2]:
tuning_results_df = pd.read_csv("tuning_results.csv")

In [3]:
tuning_df = (
    pd.DataFrame(
        tuning_results_df.groupby(["num_layers", "embedding_size", "dataset", "ppv"])
        .mean()
        .groupby(["dataset", "ppv"])
        .idxmin()["loss"]
        .to_list(),
        columns=["layers", "embedding", "dataset", "ppv"],
    )
    .set_index(["dataset", "ppv"])
    .reset_index()
    .pivot(index="dataset", columns="ppv", values=["layers", "embedding"])
    .reset_index()
)
tuning_df.dataset = tuning_df.dataset.apply(lambda x: x.upper())
tuning_df = tuning_df.set_index("dataset")
print(tuning_df.to_latex())


\begin{tabular}{lrrrr}
\toprule
{} & \multicolumn{2}{l}{layers} & \multicolumn{2}{l}{embedding} \\
ppv &      0 &  1 &         0 &     1 \\
dataset &        &    &           &       \\
\midrule
AIFB    &      4 &  1 &       256 &   512 \\
AM      &      5 &  5 &       768 &   768 \\
AMPLUS  &      5 &  5 &      1024 &  1024 \\
BGS     &      5 &  5 &       512 &   512 \\
DBLP    &      5 &  5 &       256 &   256 \\
DMG777K &      2 &  2 &      1024 &  1024 \\
DMGFULL &      2 &  1 &       256 &  1024 \\
MDGENRE &      5 &  5 &       768 &  1024 \\
MUTAG   &      2 &  2 &      1024 &  1024 \\
\bottomrule
\end{tabular}



  print(tuning_df.to_latex())


In [4]:
# Results for 10 runs of trained R-GCN using
# https://github.com/thiviyanT/torch-rgcn
# for small KGs (w/ number of epochs tuned using a held-out validation set) and
# https://github.com/pbloem/kgbench-data
# for kgbench (w/ early stopping on validation set)
accuracies_rgcn = {
    "amplus": [
        0.83425,
        0.83365,
        0.84645,
        0.8403,
        0.8415,
        0.83535,
        0.83525,
        0.8366,
        0.83775,
        0.84,
    ],
    "dmg777k": [
        0.628686,
        0.634683,
        0.62069,
        0.597701,
        0.61919,
        0.618191,
        0.625187,
        0.637181,
        0.632184,
        0.637681,
    ],
    "dmgfull": [
        0.5702,
        0.57305,
        0.573,
        0.57515,
        0.5837,
        0.5734,
        0.573,
        0.58875,
        0.5714,
        0.5706,
    ],
    "mdgenre": [
        0.675208,
        0.665557,
        0.675208,
        0.67787,
        0.66589,
        0.671547,
        0.664559,
        0.6802,
        0.677537,
        0.679867,
    ],
    "aifb": [
        97.22222222222221,
        94.44444444444444,
        94.44444444444444,
        97.22222222222221,
        97.22222222222221,
        94.44444444444444,
        97.22222222222221,
        97.22222222222221,
        94.44444444444444,
        97.22222222222221,
    ],
    "bgs": [
        89.65517241379311,
        89.65517241379311,
        86.20689655172413,
        86.20689655172413,
        86.20689655172413,
        82.75862068965517,
        86.20689655172413,
        89.65517241379311,
        82.75862068965517,
        82.75862068965517,
    ],
    "am": [
        89.39393939393939,
        90.40404040404042,
        87.37373737373737,
        87.37373737373737,
        89.8989898989899,
        90.40404040404042,
        87.37373737373737,
        88.88888888888889,
        88.88888888888889,
        89.8989898989899,
    ],
    "mutag": [
        72.05882352941177,
        76.47058823529412,
        72.05882352941177,
        67.64705882352942,
        73.52941176470588,
        69.11764705882352,
        73.52941176470588,
        76.47058823529412,
        73.52941176470588,
        70.58823529411765,
    ],
    "dblp": [
        0.619,
        0.6936,
        0.712,
        0.70655,
        0.6384,
        0.70375,
        0.68435,
        0.70375,
        0.68435,
        0.7051,
    ],
}

accuracies_cut_rgcn = {
    "aifb": [
        94.44444444444444,
        94.44444444444444,
        97.22222222222221,
        94.44444444444444,
        94.44444444444444,
        97.22222222222221,
        97.22222222222221,
        94.44444444444444,
        94.44444444444444,
        97.22222222222221,
    ],
    "bgs": [
        86.20689655172413,
        82.75862068965517,
        86.20689655172413,
        89.65517241379311,
        89.65517241379311,
        82.75862068965517,
        86.20689655172413,
        82.75862068965517,
        89.65517241379311,
        86.20689655172413,
    ],
    "am": [
        89.8989898989899,
        89.8989898989899,
        87.87878787878788,
        87.37373737373737,
        86.86868686868688,
        87.37373737373737,
        86.36363636363636,
        89.39393939393939,
        88.88888888888889,
        87.37373737373737,
    ],
}


In [5]:
evaluation_results_df = pd.read_csv("evaluation_results.csv")

In [6]:
import numpy as np
from scipy.stats import sem

small = ["aifb", "mutag", "bgs", "am"]
kgb = ["amplus", "dmg777k", "dmgfull", "mdgenre", "dblp"]
for dataset in small + kgb:
    rgcn_accuracy = np.array(accuracies_rgcn[dataset])
    if dataset in kgb:
        rgcn_accuracy *= 100

    rrgcn_res_ppv = evaluation_results_df[
        (evaluation_results_df.dataset == dataset) & (evaluation_results_df.ppv == True)
    ]
    rrgcn_res = evaluation_results_df[
        (evaluation_results_df.dataset == dataset)
        & (evaluation_results_df.ppv == False)
    ]
    rrgcn_acc = rrgcn_res["acc"].values * 100
    rrgcn_ppv_acc = rrgcn_res_ppv["acc"].values * 100

    rgcn_text = (
        format(rgcn_accuracy.mean(), ".2f")
        + " $\pm$ "
        + format(sem(rgcn_accuracy), ".2f")
    )
    rrgcn_text = (
        format(rrgcn_acc.mean(), ".2f") + " $\pm$ " + format(sem(rrgcn_acc), ".2f")
    )
    rrgcn_ppv_text = (
        format(rrgcn_ppv_acc.mean(), ".2f")
        + " $\pm$ "
        + format(sem(rrgcn_ppv_acc), ".2f")
    )

    print(f"{dataset.upper()} & {rgcn_text} & {rrgcn_text} & {rrgcn_ppv_text} \\\\")


AIFB & 96.11 $\pm$ 0.45 & 83.33 $\pm$ 1.37 & 86.11 $\pm$ 0.93 \\
MUTAG & 72.50 $\pm$ 0.91 & 70.00 $\pm$ 0.83 & 79.41 $\pm$ 0.58 \\
BGS & 86.21 $\pm$ 0.89 & 80.00 $\pm$ 2.34 & 78.97 $\pm$ 2.44 \\
AM & 88.99 $\pm$ 0.39 & 81.67 $\pm$ 0.57 & 84.65 $\pm$ 0.62 \\
AMPLUS & 83.81 $\pm$ 0.13 & 76.85 $\pm$ 0.06 & 84.54 $\pm$ 0.08 \\
DMG777K & 62.51 $\pm$ 0.38 & 61.40 $\pm$ 0.32 & 63.97 $\pm$ 0.26 \\
DMGFULL & 57.52 $\pm$ 0.19 & 60.50 $\pm$ 0.26 & 63.38 $\pm$ 0.17 \\
MDGENRE & 67.33 $\pm$ 0.19 & 65.09 $\pm$ 0.10 & 67.15 $\pm$ 0.08 \\
DBLP & 68.51 $\pm$ 0.99 & 70.18 $\pm$ 0.11 & 70.61 $\pm$ 0.07 \\


In [7]:
cut_evaluation_results_df = pd.read_csv("evaluation_results_degree_cutting.csv")

In [8]:
cut = ["aifb", "bgs", "am"]
header = "\\smalltab \\textbf{Model}  & "
rgcn_line = "\\textsc{r-gcn} & "
rrgcn_line = "\\textsc{rr-gcn-ppv} & "
rgcn_cut_line = "\\textsc{r-gcn-cut} & "
rrgcn_cut_line = "\\textsc{rr-gcn-ppv-cut} & "

for dataset in cut:
    rgcn_cut_accuracy = np.array(accuracies_cut_rgcn[dataset])
    if dataset in kgb:
        rgcn_cut_accuracy *= 100

    rgcn_accuracy = np.array(accuracies_rgcn[dataset])
    if dataset in kgb:
        rgcn_accuracy *= 100

    rrgcn_cut_res_ppv = cut_evaluation_results_df[
        (cut_evaluation_results_df.dataset == dataset)
        & (cut_evaluation_results_df.ppv == True)
    ]
    rrgcn_cut_ppv_acc = rrgcn_cut_res_ppv["acc"].values * 100

    rrgcn_res_ppv = evaluation_results_df[
        (evaluation_results_df.dataset == dataset) & (evaluation_results_df.ppv == True)
    ]
    rrgcn_ppv_acc = rrgcn_res_ppv["acc"].values * 100

    rgcn_text = (
        format(rgcn_accuracy.mean(), ".2f")
        + " $\pm$ "
        + format(sem(rgcn_accuracy), ".2f")
    )
    rrgcn_ppv_text = (
        format(rrgcn_ppv_acc.mean(), ".2f")
        + " $\pm$ "
        + format(sem(rrgcn_ppv_acc), ".2f")
    )

    rgcn_cut_text = (
        format(rgcn_cut_accuracy.mean(), ".2f")
        + " $\pm$ "
        + format(sem(rgcn_cut_accuracy), ".2f")
    )
    rrgcn_cut_ppv_text = (
        format(rrgcn_cut_ppv_acc.mean(), ".2f")
        + " $\pm$ "
        + format(sem(rrgcn_cut_ppv_acc), ".2f")
    )

    header += "\\smalltab\\textbf{" + dataset.upper() + "} & "
    rgcn_line += f"{rgcn_text} & "
    rrgcn_line += f"{rrgcn_ppv_text} & "
    rgcn_cut_line += f"{rgcn_cut_text} & "
    rrgcn_cut_line += f"{rrgcn_cut_ppv_text} & "
print(header[:-3] + " \\\\\\midrule")
print(rgcn_line[:-3] + " \\\\")
print(rrgcn_line[:-3] + " \\\\")
print(rgcn_cut_line[:-3] + " \\\\")
print(rrgcn_cut_line[:-3] + " \\\\")


\smalltab \textbf{Model}  & \smalltab\textbf{AIFB} & \smalltab\textbf{BGS} & \smalltab\textbf{AM} \\\midrule
\textsc{r-gcn} & 96.11 $\pm$ 0.45 & 86.21 $\pm$ 0.89 & 88.99 $\pm$ 0.39 \\
\textsc{rr-gcn-ppv} & 86.11 $\pm$ 0.93 & 78.97 $\pm$ 2.44 & 84.65 $\pm$ 0.62 \\
\textsc{r-gcn-cut} & 95.56 $\pm$ 0.45 & 86.21 $\pm$ 0.89 & 88.13 $\pm$ 0.41 \\
\textsc{rr-gcn-ppv-cut} & 95.83 $\pm$ 0.62 & 84.14 $\pm$ 1.38 & 84.80 $\pm$ 0.23 \\


In [9]:
relcut_evaluation_results_df = pd.read_csv("evaluation_results_relation_cutting.csv")

In [10]:
relcuts = ["am"]
for dataset in relcuts:
    accs = (
        relcut_evaluation_results_df[
            (relcut_evaluation_results_df.dataset == dataset)
            & (relcut_evaluation_results_df.ppv == True)
        ]["acc"]
        * 100
    )
    print((format(accs.mean(), ".2f") + " $\pm$ " + format(sem(accs), ".2f")))


91.31 $\pm$ 0.24


In [11]:
from scipy.stats import mannwhitneyu

In [12]:
dataset = "am"
rrgcn_res_ppv = relcut_evaluation_results_df[
    (relcut_evaluation_results_df.dataset == dataset)
    & (relcut_evaluation_results_df.ppv == True)
]
rrgcn_ppv_acc = rrgcn_res_ppv["acc"].values * 100
rgcn_accuracy = np.array(accuracies_rgcn[dataset])
mannwhitneyu(rrgcn_ppv_acc, rgcn_accuracy)


MannwhitneyuResult(statistic=97.0, pvalue=0.0003873156135084648)

In [13]:
dataset = "mdgenre"
rrgcn_res_ppv = evaluation_results_df[
    (evaluation_results_df.dataset == dataset) & (evaluation_results_df.ppv == True)
]
rrgcn_ppv_acc = rrgcn_res_ppv["acc"].values
rgcn_accuracy = np.array(accuracies_rgcn[dataset])
mannwhitneyu(rrgcn_ppv_acc, rgcn_accuracy)


MannwhitneyuResult(statistic=38.0, pvalue=0.38413626923848176)

In [14]:
dataset = "bgs"
rrgcn_res_ppv = evaluation_results_df[
    (evaluation_results_df.dataset == dataset) & (evaluation_results_df.ppv == True)
]
rrgcn_ppv_acc = rrgcn_res_ppv["acc"].values * 100
rrgcn_res = evaluation_results_df[
    (evaluation_results_df.dataset == dataset) & (evaluation_results_df.ppv == False)
]
rrgcn_acc = rrgcn_res["acc"].values * 100

rgcn_accuracy = np.array(accuracies_rgcn[dataset])
mannwhitneyu(rrgcn_ppv_acc, rrgcn_acc)


MannwhitneyuResult(statistic=43.5, pvalue=0.6365014673152993)

In [15]:
dataset = "bgs"
rrgcn_res_ppv = cut_evaluation_results_df[
    (cut_evaluation_results_df.dataset == dataset)
    & (cut_evaluation_results_df.ppv == True)
]
rrgcn_ppv_acc = rrgcn_res_ppv["acc"].values * 100
rgcn_accuracy = np.array(accuracies_rgcn[dataset])
mannwhitneyu(rrgcn_ppv_acc, rgcn_accuracy)


MannwhitneyuResult(statistic=37.0, pvalue=0.32225177379007863)

In [17]:
dataset = "aifb"
rrgcn_res_ppv = cut_evaluation_results_df[
    (cut_evaluation_results_df.dataset == dataset)
    & (cut_evaluation_results_df.ppv == True)
]
rrgcn_ppv_acc = rrgcn_res_ppv["acc"].values * 100
rgcn_accuracy = np.array(accuracies_rgcn[dataset])
mannwhitneyu(rrgcn_ppv_acc, rgcn_accuracy)


MannwhitneyuResult(statistic=48.0, pvalue=0.8953333518420598)