In [2]:
import matplotlib.pyplot as plt

from tabpfn.scripts import tabular_baselines

import seaborn as sns
import numpy as np
from datasets import load_openml_list, valid_dids_classification, test_dids_classification, open_cc_dids
from tabpfn.scripts.tabular_baselines import *
from tabpfn.scripts.tabular_evaluation import evaluate
from tabpfn.scripts.tabular_metrics import calculate_score, make_ranks_and_wins_table, make_metric_matrix
from tabpfn.scripts import tabular_metrics

In [3]:
from notebook_utils import *

In [4]:
%load_ext autoreload

%autoreload 2

# Datasets

In [5]:
cc_test_datasets_multiclass, cc_test_datasets_multiclass_df = load_openml_list(open_cc_dids, multiclass=True, shuffled=True, filter_for_nan=False, max_samples = 10000, num_feats=100, return_capped=True) # max_num_classes=10


Number of datasets: 30
Loading balance-scale 11 ..
Loading mfeat-fourier 14 ..
Loading breast-w 15 ..
Loading mfeat-karhunen 16 ..
Loading mfeat-morphological 18 ..
Loading mfeat-zernike 22 ..
Loading cmc 23 ..
Loading credit-approval 29 ..
Loading credit-g 31 ..
Loading diabetes 37 ..
Loading tic-tac-toe 50 ..
Loading vehicle 54 ..
Loading eucalyptus 188 ..
Loading analcatdata_authorship 458 ..
Loading analcatdata_dmft 469 ..
Loading pc4 1049 ..
Loading pc3 1050 ..
Loading kc2 1063 ..
Loading pc1 1068 ..
Loading banknote-authentication 1462 ..
Loading blood-transfusion-service-center 1464 ..
Loading ilpd 1480 ..
Loading qsar-biodeg 1494 ..
Loading wdbc 1510 ..
Loading cylinder-bands 6332 ..
Loading dresses-sales 23381 ..
Loading MiceProtein 40966 ..
Loading car 40975 ..
Loading steel-plates-fault 40982 ..
Loading climate-model-simulation-crashes 40994 ..


In [6]:
len(cc_test_datasets_multiclass)

30

In [7]:
def get_datasets(selector, task_type, suite='openml'):
    if task_type == 'binary':
        ds = valid_datasets_binary if selector == 'valid' else test_datasets_binary
    else:
        if suite == 'openml':
            ds = valid_datasets_multiclass if selector == 'valid' else test_datasets_multiclass
        elif suite == 'cc':
            ds = valid_datasets_multiclass if selector == 'valid' else cc_test_datasets_multiclass
            # print(f"cc_test_datasets_multiclass is used")
        else:
            raise Exception("Unknown suite")
    return ds

# Setting params

In [8]:
eval_positions = [1000]
max_features = 100
bptt = 2000
selector = 'test'
base_path = os.path.join('.')
overwrite=False
# max_times = [0.5, 1, 15, 30, 60, 60*5, 60*15, 60*60]
max_times = [0.5]
metric_used = tabular_metrics.auc_metric
# methods = ['transformer', 'logistic', 'gp', 'knn', 'catboost', 'xgb', 'autosklearn2', 'autogluon']
methods = ['transformer','logistic']
task_type = 'multiclass'

# max_times = [0.5, 1, 5, 30, 60, 60*5, 60*15, 60*60]

In [9]:
suite = 'cc'
test_datasets = get_datasets('test',task_type, suite=suite)

In [10]:
clf_dict= {
               'gp': gp_metric 
               , 'knn': knn_metric
               , 'catboost': catboost_metric
               , 'xgb': xgb_metric # kernel crashes
               , 'transformer': transformer_metric # our model (1) - trained 1h
               , 'i_transformer': transformer_metric # our model (2) - trained 12h
               , 'i_transformer_2': transformer_metric # our model (3) - trained 12h
               , 'i_transformer_3': transformer_metric # our model (4) - trained 12h
               , 'tab_transformer': transformer_metric # original TabPFN - trained 10h
               , 'logistic': logistic_metric
               , 'autosklearn': autosklearn_metric
               , 'autosklearn2': autosklearn2_metric
               , 'autogluon': autogluon_metric # kernel crashes
               , 'lgbm': lightgbm_metric # kernel crashes
                }

In [11]:
device = 'cpu'

def eval_method(task_type, method, dids, selector, eval_positions, max_time, metric_used, split_number, append_metric=True, fetch_only=False, verbose=False):
    
    dids = dids if type(dids) is list else [dids]
    
    for did in dids:

        ds = get_datasets(selector, task_type, suite=suite)

        ds = ds if did is None else ds[did:did+1]

        clf = clf_dict[method]

        time_string = '_time_'+str(max_time) if max_time else ''
        metric_used_string = '_'+tabular_baselines.get_scoring_string(metric_used, usage='') if append_metric else ''

        result = evaluate(datasets=ds
                          , model=clf
                          , method=method+time_string+metric_used_string
                          , bptt=bptt, base_path='/cluster/scratch/amurelis'
                          , eval_positions=eval_positions
                          , device=device, max_splits=1
                          , overwrite=overwrite
                          , save=True
                          , metric_used=metric_used
                          , path_interfix=task_type
                          , fetch_only=fetch_only
                          , split_number=split_number
                          , verbose=verbose
                          , max_time=max_time)
    
    return result

# Baseline Evaluation
This section runs baselines and saves results locally.

In [22]:
base_path = '/cluster/scratch/amurelis'
!mkdir {base_path}/results
!mkdir {base_path}/results/tabular/
!mkdir {base_path}/results/tabular/multiclass/

skipping


In [None]:
from typing import List
from submitit import SlurmExecutor
class BoschSlurmExecutor(SlurmExecutor):
    def _make_submission_command(self, submission_file_path) -> List[str]:
        return ["sbatch", str(submission_file_path)]

ex = BoschSlurmExecutor(folder="./")
ex.update_parameters(time=1200
                     , partition="baselines"
                     , mem_per_cpu=6000
                    , nodes=1
                    , cpus_per_task=1
                    , ntasks_per_node=1)

In [None]:
# %%script echo skipping

# RUN ALL METHODS, SPLITS AND DATASETS
test_datasets = get_datasets('test', task_type, suite=suite)

def submit_evaluation(dids, methods, max_times):
    
    mlp_jobs = [
        ex.submit(eval_method, task, m, did, selector, eval_positions, max_time, metric_used, split_number)
        #eval_method(task, m, did, selector, eval_positions, max_time)
        for did in dids
        for selector in ['test']
        for metric_used in metric_used
        for m in methods#['lightautoml', 'lightgbm', 'autogluon', 'logistic', 'gp', 'knn', 'catboost', 'xgb', 'autosklearn2'] #['knn', 'logistic', 'xgb', 'gp', 'autosklearn2', 'autogluon', 'catboost'] # 'knn',  'tabnet', 'logistic', 'xgb', 'autosklearn2'
        for task in ['multiclass']
        for max_time in max_times #[30, 60, 60*5, 60*15] # , 60, 60*15, 60*60
        for split_number in [1, 2, 3, 4, 5]
    ] 
    return mlp_jobs

In [13]:
methods = ['logistic']
max_times = [60*5, 60*15]
dids = range(30)
overwrite=False
suite = 'cc'

test_datasets = get_datasets('test',task_type, suite=suite)
methods = ['logistic']
jobs = submit_evaluation(dids, methods, max_times)

30

In [None]:
jobs

In [None]:
print(jobs[9].stdout())