## Run CPU baselines

In [None]:
import os
import subprocess
from joblib import Parallel, delayed
from multiprocessing import Queue

from itertools import product

### Params

In [None]:
# parameters
N_JOBS = 8
N_PARALLEL_TASKS = 3
PYTHON = 'rapids-24.04/bin/python'

### Utils

In [None]:
# run script to execute the task
def get_script(path, runner, tuner, model):
    """
    Get run script for the task
    """
    command = f"""
    {PYTHON} run_experiment.py \
        --path {os.path.join('datasets', path)} \
        --njobs {N_JOBS} \
        --seed 42 \
        --device 0 \
        --runner {runner} \
        --tuner {tuner} \
        --model {model} \
        --config config.yaml
    """
    return command


def run(path, model, runner, tuner, ):
    """
    Run task
    """
    # generate script
    script = get_script(path, runner, tuner, model)
    print(script)
    # run task
    subprocess.check_output(script, shell=True, stderr=subprocess.STDOUT,)
    
    return 

### Tasks list

In [None]:
# tasks list
datasets = [
    
    'synth1', 
    'hillstrom', 
    'criteo',
    'lenta',
    'megafon',
]

# tuple: (type of model, run function, objective with param space)
models = [
    # t learner
    ('xgb_t', 'meta', 'xgb_single'), 
    # x learner
    ('xgb_x', 'meta', 'xgb_single'), 
    # r learner
    ('xgb_r', 'meta', 'xgb_single'), 
    # dr learner
    ('xgb_dr', 'meta', 'xgb_single'), 
    # Causal RF
    ('crf', 'crf', 'crf')
]

# combine datasets and models
tasks = product(
    map(
        lambda x: x[0] + '_' + str(x[1]), product(datasets, range(5))
    ),
    models
)

### Run 

In [None]:
with Parallel(N_PARALLEL_TASKS, backend='threading') as p:
    p(delayed(run)(d, *m) for (d, m) in tasks)