In [11]:
from pathlib import Path
import time

import roach

from relbench.data import TaskType
from relbench.datasets import get_dataset_names
from relbench.tasks import get_task_names, get_task

In [4]:
project = "relbench/2024-07-01"
queue = "relbench"

# cache materialize tensorframes

In [13]:
# clear current cache with backup
ts = int(time.time())
home = Path.home()
try:
    Path(f"{home}/.cache/relbench_examples").rename(f"{home}/.cache/relbench_examples.{ts}")
except FileNotFoundError:
    pass

In [15]:
for dataset in get_dataset_names():
    task = get_task_names(dataset)[0]
    task_obj = get_task(dataset, task)
    if task_obj.task_type == TaskType.LINK_PREDICTION:
        script = "gnn_link"
    else:
        script = "gnn_node"
    # epochs=0 throws exception
    cmd = f"python {script}.py --dataset {dataset} --task {task} --epochs 1"
    roach.submit(queue, cmd)

# node tasks

In [24]:
for seed in range(5):
    for dataset, task in [
        # classification
        # ("rel-amazon", "user-churn"),
        # ("rel-amazon", "item-churn"),
        # ("rel-f1", "driver-dnf"),
        # ("rel-f1", "driver-top3"),
        # ("rel-hm", "user-churn"),
        # ("rel-stack", "user-engagement"),
        # ("rel-stack", "user-badge"),
        # ("rel-trial", "study-outcome"),
        # regression
        # ("rel-amazon", "user-ltv"),
        # ("rel-amazon", "item-ltv"),
        # ("rel-avito", "user-clicks"),
        # ("rel-event", "user-attendance"),
        # ("rel-f1", "driver-position"),
        # ("rel-hm", "item-sales"),
        # ("rel-stack", "post-votes"),
        ("rel-trial", "study-adverse"),
        # ("rel-trial", "site-success"),
    ]:
        for script in [
            "gnn_node",
            # "lightgbm_baseline",
            # "lightgbm_gnn_features_node",
            # "node_baseline",
        ]:
            cmd = (
                f"OMP_NUM_THREADS=8"
                f" python {script}.py"
                f" --dataset {dataset}"
                f" --task {task}"
                f" --seed {seed}"
                f" --roach_project {project}"
            )
            roach.submit(queue, cmd)

# link tasks

In [21]:
for seed in range(5):
    for dataset, task in [
        ("rel-amazon", "user-item-purchase"),
        ("rel-amazon", "user-item-rate"),
        ("rel-amazon", "user-item-review"),
        ("rel-avito", "user-ad-click"),
        ("rel-hm", "user-item-purchase"),
        ("rel-stack", "user-post-comment"),
        ("rel-stack", "post-post-related"),
        ("rel-trial", "condition-sponsor-rec"),
        ("rel-trial", "site-sponsor-rec"),
    ]:
        for script in [
            # "gnn_link",
            # "idgnn_link",
            # "lightgbm_link_baseline",
            "link_baseline",
        ]:
            cmd = (
                f"OMP_NUM_THREADS=8"
                f" python {script}.py"
                f" --dataset {dataset}"
                f" --task {task}"
                f" --seed {seed}"
                f" --roach_project {project}"
            )
            roach.submit(queue, cmd)