In [24]:
import time
from pathlib import Path

import roach

from relbench.datasets import get_dataset, get_dataset_names
from relbench.tasks import get_task, get_task_names

In [8]:
# clear current cache with backup
ts = int(time.time())
home = Path.home()
Path(f"{home}/.cache/relbench").rename(f"{home}/.cache/relbench.{ts}")

PosixPath('/afs/cs.stanford.edu/u/ranjanr/.cache/relbench.1719789551')

In [13]:
queue = "relbench"

In [None]:
cmd = (
    "kaggle competitions download -c h-and-m-personalized-fashion-recommendations && "
    "mkdir -p data/hm-recommendation && "
    "mv h-and-m-personalized-fashion-recommendations.zip data/hm-recommendation"
)
roach.submit(queue, cmd)

In [53]:
for dataset_name in get_dataset_names():
    py_cmd = (
        f"from relbench.datasets import get_dataset; "
        f'get_dataset("{dataset_name}", download=False).get_db()'
    )
    cmd = f"python -c '{py_cmd}'"
    if dataset_name == "rel-hm":
        requires = "test -f data/hm-recommendation/h-and-m-personalized-fashion-recommendations.zip"
    else:
        requires = "true"
    roach.submit(queue, cmd, requires)

In [59]:
for dataset_name in get_dataset_names():
    for task_name in get_task_names(dataset_name):
        py_cmd = (
            f"from relbench.tasks import get_task; "
            f'task = get_task("{dataset_name}", "{task_name}", download=False); '
            f'task.get_table("train"); '
            f'task.get_table("val"); '
            f'task.get_table("test")'
        )
        cmd = f"python -c '{py_cmd}'"
        requires = f"test -d ~/.cache/relbench/{dataset_name}/db"
        roach.submit(queue, cmd, requires)

In [54]:
for dataset_name in get_dataset_names():
    cmd = (
        f"mkdir -p ~/.cache/relbench_upload/{dataset_name} && "
        f"zip -r ~/.cache/relbench_upload/{dataset_name}/db ~/.cache/relbench/{dataset_name}/db"
    )
    requires = f"test -d ~/.cache/relbench/{dataset_name}/db"
    roach.submit(queue, cmd, requires)

In [55]:
for dataset_name in get_dataset_names():
    for task_name in get_task_names(dataset_name):
        cmd = (
            f"mkdir -p ~/.cache/relbench_upload/{dataset_name}/tasks && "
            f"zip -r ~/.cache/relbench_upload/{dataset_name}/tasks/{task_name} ~/.cache/relbench/{dataset_name}/tasks/{task_name}"
        )
        requires = f"test -d ~/.cache/relbench/{dataset_name}/tasks/{task_name}"
        roach.submit(queue, cmd, requires)

In [56]:
tests = []
for dataset_name in get_dataset_names():
    test = f"test -f ~/.cache/relbench_upload/{dataset_name}/db.zip"
    tests.append(test)
requires = " && ".join(tests)
cmd = f"python -c 'import utils; utils.db_hashes()'"
roach.submit(queue, cmd, requires)

In [57]:
tests = []
for dataset_name in get_dataset_names():
    for task_name in get_task_names():
        test = f"test -f ~/.cache/relbench_upload/{dataset_name}/tasks/{task_name}.zip"
        tests.append(test)
requires = " && ".join(tests)
cmd = f"python -c 'import utils; utils.task_hashes()'"
roach.submit(queue, cmd, requires)