In [8]:
import argparse
from __future__ import annotations

from itertools import product

import os

from eval_utils import Dataset, Results, arguments, do_evaluations, METHODS, METRICS, eval_method, set_seed

import tabpfn.scripts.tabular_baselines as tb
from tabpfn.scripts.tabular_metrics import (calculate_score, time_metric)

from submitit.submitit import SlurmExecutor

In [None]:
def do_evaluations(args: argparse.Namespace, datasets, slurm_executer: SlurmExecutor = None) -> Results:
    results = {}
    jobs = {}
    for seed, method, metric, time, split in product(
        args.seeds,
        args.methods,
        args.optimization_metrics,
        args.times,
        range(0, args.splits),
    ):
        set_seed(seed=seed)
        metric_f = METRICS[metric]
        metric_name = tb.get_scoring_string(metric_f, usage="")
        key = f"{method}_time_{time}{metric_name}_split_{split}_seed_{seed}"

        if slurm_executer is None:
            results[key] = eval_method(
            datasets=datasets,
            label=method,
            result_path=args.result_path,
            classifier_evaluator=METHODS[method],
            eval_positions=args.eval_positions,  # It's a constant basically
            fetch_only=args.fetch_only,
            verbose=args.verbose,
            max_time=time,
            metric_used=metric_f,
            split=split,
            seed=seed,
            overwrite=args.overwrite,
        )
        else:
            jobs[key] = slurm_executer.submit(slurm_executer.submit(eval_method,
            datasets=datasets,
            label=method,
            result_path=args.result_path,
            classifier_evaluator=METHODS[method],
            eval_positions=args.eval_positions,  # It's a constant basically
            fetch_only=args.fetch_only,
            verbose=args.verbose,
            max_time=time,
            metric_used=metric_f,
            split=split,
            seed=seed,
            overwrite=args.overwrite))

    return results, jobs

In [9]:
experiment_options = {
    'result_path': None,
    'times': [30], #3600],
    'seeds': [896], # , 125, 624, 438, 706],
    'splits': 5,
    'validation_datasets': [13], # None,
    'test_datasets': [39], # None,
    'optimization_metrics': ['roc'],
    'recorded_metrics': ["roc", "cross_entropy", "acc", "brier_score", "ece"],
    'methods': [
        'svm_default'],
    #     ,
    #     'gradient_boosting',
    #     'knn',
    #     'gp',
    #     'lightgbm',
    #     'xgb',
    #     'random_forest',
    #     'logistic',
    #     'svm_default',
    #     'gradient_boosting_default',
    #     'gp_default',
    #     'lightgbm_default',
    #     'xgb_default',
    #     'rf_default',
    #     'transformer_cpu_N_1',
    #     'transformer_cpu_N_8',
    #     'transformer_cpu_N_32'
        
    # ],
    'bptt': 2000,
    'overwrite': False
}


In [None]:

class BoschSlurmExecutor(SlurmExecutor):
    def _make_submission_command(self, submission_file_path):
        return ["sbatch", str(submission_file_path), '--bosch']

base_path = os.path.join('/work/dlclarge1/rkohli-results_tabpfn_180/results_1667931216')
log_folder = os.path.join(base_path, "log_test/%j")

slurm_executer = BoschSlurmExecutor(folder=log_folder)
slurm_executer.update_parameters(time=30,
                     partition="bosch_cpu-cascadelake",
                     mem_per_cpu=6000,
                     nodes=1,
                     cpus_per_task=1,
                     ntasks_per_node=1,
                    #  setup=['export MKL_THREADING_LAYER=GNU']
                    ) #  mldlc_gpu-rtx2080



In [None]:

args = argparse.Namespace(**experiment_options)

args.result_path = base_path

if not args.validation_datasets:
    args.validation_datasets = "cc_valid"

if not args.test_datasets:
    args.test_datasets = "cc_test"

# We need to create some directories for this to work
out_dir = os.path.join(args.result_path, "results", "tabular", "multiclass")
os.mkdir(out_dir,
    parents=True, exist_ok=True
)

# We ignore the flags datasets
filter_f = lambda d: d.name != "flags"  # noqa: ignore

valid_datasets = Dataset.fetch(args.validation_datasets, only=filter_f)
test_datasets = Dataset.fetch(args.test_datasets, only=filter_f)

all_datasets = valid_datasets + test_datasets
all_datasets = all_datasets

In [None]:
results, jobs = do_evaluations(args, all_datasets)
# for 

In [None]:
datasets_as_lists = [d.as_list() for d in all_datasets]

# This will update the results in place
for metric in args.recorded_metrics:
    metric_f = METRICS[metric]
    calculate_score(
        metric=metric_f,
        name=metric,
        global_results=results,
        ds=datasets_as_lists,
        eval_positions=args.eval_positions,
    )

# We also get the times
calculate_score(
    metric=time_metric,
    name="time",
    global_results=results,
    ds=datasets_as_lists,
    eval_positions=args.eval_positions,
)
final_results = Results.from_dict(
        results,
        datasets=all_datasets,
        recorded_metrics=args.recorded_metrics + ["time"],
    )
final_results.df.to_csv(os.path.join(out_dir, "results.csv"), index=True)