# 

In [24]:
import time
from pathlib import Path

import roach

from relbench.datasets import get_dataset, get_dataset_names
from relbench.tasks import get_task, get_task_names

In [8]:
# clear current cache with backup
ts = int(time.time())
home = Path.home()
Path(f"{home}/.cache/relbench").rename(f"{home}/.cache/relbench.{ts}")

PosixPath('/afs/cs.stanford.edu/u/ranjanr/.cache/relbench.1719789551')

In [13]:
queue = "relbench/2024-06-30"

In [20]:
cmd = (
    "kaggle competitions download -c h-and-m-personalized-fashion-recommendations && "
    "mkdir -p data/hm-recommendation && "
    "mv h-and-m-personalized-fashion-recommendations.zip data/hm-recommendation"
)

In [None]:
roach.submit(queue, cmd)

In [11]:
get_dataset_names()

['rel-amazon',
 'rel-avito',
 'rel-event',
 'rel-f1',
 'rel-hm',
 'rel-stack',
 'rel-trial']

In [21]:
for dataset_name in [
    # "rel-amazon",
    # "rel-avito",
    # "rel-event",
    # "rel-f1",
    "rel-hm",
    # "rel-stack",
    # "rel-trial",
]:
    py_cmd = (
        f"from relbench.datasets import get_dataset; "
        f'get_dataset("{dataset_name}", download=False).get_db()'
    )
    cmd = f"python -c '{py_cmd}'"
    if dataset_name == "rel-hm":
        requires = "test -f data/hm-recommendation/h-and-m-personalized-fashion-recommendations.zip"
    else:
        requires = "true"
    roach.submit(queue, cmd, requires)

In [25]:
pairs = []
for dataset_name in get_dataset_names():
    for task_name in get_task_names(dataset_name):
        pairs.append((dataset_name, task_name))
print(pairs)

[('rel-amazon', 'user-churn'), ('rel-amazon', 'user-ltv'), ('rel-amazon', 'item-churn'), ('rel-amazon', 'item-ltv'), ('rel-amazon', 'user-item-purchase'), ('rel-amazon', 'user-item-rate'), ('rel-amazon', 'user-item-review'), ('rel-avito', 'ads-clicks'), ('rel-avito', 'user-visits'), ('rel-avito', 'user-clicks'), ('rel-avito', 'user-ad-visit'), ('rel-event', 'user-attendance'), ('rel-event', 'user-repeat'), ('rel-event', 'user-ignore'), ('rel-f1', 'driver-position'), ('rel-f1', 'driver-dnf'), ('rel-f1', 'driver-top3'), ('rel-f1', 'driver-constructor-result'), ('rel-hm', 'user-item-purchase'), ('rel-hm', 'user-churn'), ('rel-hm', 'item-sales'), ('rel-stack', 'user-engagement'), ('rel-stack', 'post-votes'), ('rel-stack', 'user-badge'), ('rel-stack', 'user-post-comment'), ('rel-stack', 'post-post-related'), ('rel-trial', 'study-outcome'), ('rel-trial', 'study-adverse'), ('rel-trial', 'study-withdrawal'), ('rel-trial', 'site-success'), ('rel-trial', 'condition-sponsor-run'), ('rel-trial', '

In [26]:
for dataset_name, task_name in [
    ("rel-amazon", "user-churn"),
    ("rel-amazon", "user-ltv"),
    ("rel-amazon", "item-churn"),
    ("rel-amazon", "item-ltv"),
    ("rel-amazon", "user-item-purchase"),
    ("rel-amazon", "user-item-rate"),
    ("rel-amazon", "user-item-review"),
    ("rel-avito", "ads-clicks"),
    ("rel-avito", "user-visits"),
    ("rel-avito", "user-clicks"),
    ("rel-avito", "user-ad-visit"),
    ("rel-event", "user-attendance"),
    ("rel-event", "user-repeat"),
    ("rel-event", "user-ignore"),
    ("rel-f1", "driver-position"),
    ("rel-f1", "driver-dnf"),
    ("rel-f1", "driver-top3"),
    ("rel-f1", "driver-constructor-result"),
    ("rel-hm", "user-item-purchase"),
    ("rel-hm", "user-churn"),
    ("rel-hm", "item-sales"),
    ("rel-stack", "user-engagement"),
    ("rel-stack", "post-votes"),
    ("rel-stack", "user-badge"),
    ("rel-stack", "user-post-comment"),
    ("rel-stack", "post-post-related"),
    ("rel-trial", "study-outcome"),
    ("rel-trial", "study-adverse"),
    ("rel-trial", "study-withdrawal"),
    ("rel-trial", "site-success"),
    ("rel-trial", "condition-sponsor-run"),
    ("rel-trial", "site-sponsor-run"),
]:
    py_cmd = (
        f"from relbench.tasks import get_task; "
        f'task = get_task("{dataset_name}", "{task_name}", download=False); '
        f'task.get_table("train"); '
        f'task.get_table("val"); '
        f'task.get_table("test")'
    )
    cmd = f"python -c '{py_cmd}'"
    # TODO: add requires for dataset existence
    roach.submit(queue, cmd)