In [1]:
import json
from collections import defaultdict
from itertools import product

import pandas as pd
import roach
import torch

from relbench.base import TaskType
from relbench.datasets import get_dataset_names
from relbench.tasks import get_task, get_task_names

In [2]:
all_stores = roach.scan("relbench/2024-07-05")

In [3]:
len(all_stores)

600

In [4]:
all_stores[-1]["__roach__"]

{'project': 'relbench/2024-07-05',
 'timestamp': 1720395884026350988,
 'caller_file': 'idgnn_link.py',
 'done': True}

In [15]:
def wrap(name):
    # return r"\texttt{" + name + r"}"
    return name

In [6]:
txt = {
    "val": "Val",
    "test": "Test",
}

# regression

In [7]:
metric = "mae"
higher_is_better = False

In [8]:
table_data = []
for dataset in get_dataset_names():
    for task in get_task_names(dataset):
        task_obj = get_task(dataset, task)
        if task_obj.task_type.value != TaskType.REGRESSION.value:
            continue
        for script in [
            "gnn_node",
            "lightgbm_node",
            "hybrid_node",
            "baseline_node",
        ]:
            for split in [
                "val",
                "test",
            ]:
                if script == "baseline_node":
                    stores = list(
                        filter(
                            lambda store: store["__roach__"]["caller_file"]
                            == f"{script}.py"
                            and store["args"]["dataset"] == dataset
                            and store["args"]["task"] == task,
                            all_stores,
                        ),
                    )
                    for baseline in [
                        "global_zero",
                        "global_mean",
                        "global_median",
                        "entity_mean",
                        "entity_median",
                    ]:
                        try:
                            store = stores[-1]
                            val = store[baseline][split][metric]
                        except IndexError:
                            val = float("nan")
                        record = {
                            "dataset": dataset,
                            "task": task,
                            "script": baseline,
                            "split": split,
                            "mean": val,
                            "std": 0.0,
                        }
                        table_data.append(record)
                else:
                    vals = []
                    for seed in range(5):
                        stores = list(
                            filter(
                                lambda store: store["__roach__"]["caller_file"]
                                == f"{script}.py"
                                and store["args"]["dataset"] == dataset
                                and store["args"]["task"] == task
                                and store["args"]["seed"] == seed,
                                all_stores,
                            )
                        )
                        try:
                            store = stores[-1]
                            val = store[split][metric]
                            vals.append(val)
                        except IndexError:
                            val = float("nan")
                            vals.append(val)
                            # pass
                    val = torch.tensor(vals)
                    mean = val.mean().item()
                    std = val.std().item()
                    record = {
                        "dataset": dataset,
                        "task": task,
                        "script": script,
                        "split": split,
                        "mean": mean,
                        "std": std,
                    }
                    table_data.append(record)

In [16]:
tex_tab = defaultdict(dict)
for rec in table_data:
    dataset = rec["dataset"]
    task = rec["task"]
    script = rec["script"]
    split = rec["split"]
    mean = rec["mean"]
    std = rec["std"]

    if script == "hybrid_node":
        continue

    is_best = True
    filter_data = filter(
        lambda r: r["dataset"] == dataset
        and r["task"] == task
        and r["split"] == split
        and r["script"] != "hybrid_node",
        table_data,
    )
    for comp_rec in filter_data:
        comp_mean = comp_rec["mean"]
        comp_std = comp_rec["std"]
        if higher_is_better:
            if mean + std < comp_mean - comp_std:
                is_best = False
        else:
            if mean - std > comp_mean + comp_std:
                is_best = False
    opt_bm_open = r"\bm{" if is_best else ""
    opt_bm_close = r"}" if is_best else ""
    if script in [
        "gnn_node",
        "lightgbm_node",
        # "hybrid_node",
    ]:
        tex_val = (
            r"$"
            + opt_bm_open
            + f"{mean:.3f}"
            + opt_bm_close
            + r"_{\pm "
            + f"{std:.3f}"
            + r"}$"
        )
    else:
        tex_val = r"$" + opt_bm_open + f"{mean:.3f}" + opt_bm_close + r"$"

    rec["tex"] = tex_val

    tex_tab[script][(wrap(dataset), wrap(task), txt[split])] = tex_val

tex_df = pd.DataFrame(tex_tab)
tex_df

Unnamed: 0,Unnamed: 1,Unnamed: 2,gnn_node,lightgbm_node,global_zero,global_mean,global_median,entity_mean,entity_median
rel-amazon,user-ltv,Val,$\bm{7.039}_{\pm 0.007}$,$7.730_{\pm 0.000}$,$7.730$,$13.559$,$7.730$,$11.595$,$10.218$
rel-amazon,user-ltv,Test,$\bm{7.960}_{\pm 0.019}$,$8.860_{\pm 0.000}$,$8.860$,$14.080$,$8.860$,$12.075$,$10.725$
rel-amazon,item-ltv,Val,$\bm{68.012}_{\pm 0.255}$,$83.322_{\pm 0.382}$,$104.830$,$106.235$,$89.349$,$109.973$,$96.793$
rel-amazon,item-ltv,Test,$\bm{76.712}_{\pm 0.672}$,$92.963_{\pm 0.530}$,$115.584$,$112.906$,$99.310$,$109.965$,$95.367$
rel-avito,ad-ctr,Val,$\bm{0.037}_{\pm 0.000}$,$0.037_{\pm 0.000}$,$0.048$,$0.048$,$0.040$,$0.044$,$0.044$
rel-avito,ad-ctr,Test,$\bm{0.041}_{\pm 0.001}$,$\bm{0.041}_{\pm 0.000}$,$0.052$,$0.051$,$0.043$,$0.046$,$0.046$
rel-event,user-attendance,Val,$\bm{0.258}_{\pm 0.002}$,$0.262_{\pm 0.000}$,$0.262$,$0.457$,$0.262$,$0.296$,$0.268$
rel-event,user-attendance,Test,$\bm{0.261}_{\pm 0.002}$,$0.264_{\pm 0.000}$,$0.264$,$0.470$,$0.264$,$0.304$,$0.269$
rel-f1,driver-position,Val,$\bm{3.193}_{\pm 0.024}$,$3.450_{\pm 0.030}$,$11.083$,$4.334$,$4.136$,$7.181$,$7.114$
rel-f1,driver-position,Test,$\bm{4.022}_{\pm 0.119}$,$\bm{4.170}_{\pm 0.137}$,$11.926$,$4.513$,$4.399$,$8.501$,$8.519$


In [13]:
tex = tex_df.to_latex()
tex = tex.replace(r"\multirow[t]", r"\multirow[c]")
tex = tex.replace(r"\cline", r"\cmidrule")
tex = tex.replace(r"\cmidrule{1-10} \cmidrule{2-10}", r"\cmidrule{1-10}")
print(tex)

\begin{tabular}{llllllllll}
\toprule
 &  &  & gnn_node & lightgbm_node & global_zero & global_mean & global_median & entity_mean & entity_median \\
\midrule
\multirow[c]{4}{*}{\texttt{rel-amazon}} & \multirow[c]{2}{*}{\texttt{user-ltv}} & Val & $\bm{7.039}_{\pm 0.007}$ & $7.730_{\pm 0.000}$ & $7.730$ & $13.559$ & $7.730$ & $11.595$ & $10.218$ \\
 &  & Test & $\bm{7.960}_{\pm 0.019}$ & $8.860_{\pm 0.000}$ & $8.860$ & $14.080$ & $8.860$ & $12.075$ & $10.725$ \\
\cmidrule{2-10}
 & \multirow[c]{2}{*}{\texttt{item-ltv}} & Val & $\bm{68.012}_{\pm 0.255}$ & $83.322_{\pm 0.382}$ & $104.830$ & $106.235$ & $89.349$ & $109.973$ & $96.793$ \\
 &  & Test & $\bm{76.712}_{\pm 0.672}$ & $92.963_{\pm 0.530}$ & $115.584$ & $112.906$ & $99.310$ & $109.965$ & $95.367$ \\
\cmidrule{1-10}
\multirow[c]{2}{*}{\texttt{rel-avito}} & \multirow[c]{2}{*}{\texttt{ad-ctr}} & Val & $\bm{0.037}_{\pm 0.000}$ & $0.037_{\pm 0.000}$ & $0.048$ & $0.048$ & $0.040$ & $0.044$ & $0.044$ \\
 &  & Test & $\bm{0.041}_{\pm 0.001}$

In [14]:
lb_sub = defaultdict(dict)
for rec in table_data:
    dataset = rec["dataset"]
    task = rec["task"]
    script = rec["script"]
    split = rec["split"]
    mean = rec["mean"]
    std = rec["std"]

    if split != "test":
        continue

    lb_sub[script][f"{dataset}/{task}"] = [mean, std]
print(json.dumps(lb_sub, indent=2))

{
  "gnn_node": {
    "rel-amazon/user-ltv": [
      7.959512105677888,
      0.018910855950465336
    ],
    "rel-amazon/item-ltv": [
      76.71226675772242,
      0.6715122676553593
    ],
    "rel-avito/ad-ctr": [
      0.040916200868064044,
      0.0006041307258214427
    ],
    "rel-event/user-attendance": [
      0.2613223736116375,
      0.0018278870804592942
    ],
    "rel-f1/driver-position": [
      4.021568305868851,
      0.11938469077320951
    ],
    "rel-hm/item-sales": [
      0.05562086323314284,
      0.00030196463294367723
    ],
    "rel-stack/post-votes": [
      0.06511105323456752,
      0.00012863883235743331
    ],
    "rel-trial/study-adverse": [
      44.47336148012007,
      0.2088276432509936
    ],
    "rel-trial/site-success": [
      0.40036298643780677,
      0.020001471301123532
    ]
  },
  "lightgbm_node": {
    "rel-amazon/user-ltv": [
      8.85977882288307,
      0.0
    ],
    "rel-amazon/item-ltv": [
      92.96326307019072,
      0.5302020577