In [5]:
X = np.load('/home/stefanos/pasteurAIzer/data/time_series/numpy/y.npy', allow_pickle=True)

In [2]:
# to catch any changes to libraries without restarting the notebook kernel every time
%load_ext autoreload
%autoreload 2

import json
import os
import sys
from copy import deepcopy
from pathlib import Path
import numpy as np
import pandas as pd

REPO_DIR = os.path.abspath('..')  # path to the root of the repository
sys.path.append(REPO_DIR)
os.environ["PROJECT_DIR"] = REPO_DIR
import lib
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, mean_absolute_error, r2_score

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [5]:
# CALIFORNIA = 'california_housing'
# ADULT = 'adult'
# HELENA = 'helena'
# JANNIS = 'jannis'
# HIGGS = 'higgs_small'
# ALOI = 'aloi'
# EPSILON = 'epsilon'
# YEAR = 'year'
# COVTYPE = 'covtype'
# YAHOO = 'yahoo'
# MICROSOFT = 'microsoft'
# ALL_DATASETS = [CALIFORNIA, ADULT, HELENA, JANNIS, HIGGS, ALOI, EPSILON, YEAR, COVTYPE, YAHOO, MICROSOFT]
PASTER = 'tabular_100_trials_32_batch_size'
ALL_DATASETS = [PASTER]
DATASET_NAMES = {
    PASTER: 'Paster whole'
}
"""
DATASET_NAMES = {
    CALIFORNIA: 'California Housing',
    ADULT: 'Adult',
    HELENA: 'Helena',
    JANNIS: 'Jannis',
    HIGGS: 'Higgs Small',
    ALOI: 'ALOI',
    EPSILON: 'Epsilon',
    YEAR: 'Year',
    COVTYPE: 'Covertype',
    YAHOO: 'Yahoo',
    MICROSOFT: 'Microsoft',
}"""
REGRESSION_DATASETS = [x for x in ALL_DATASETS if lib.load_dataset_info(x)['task_type'] == lib.REGRESSION]
DETAILS = ['task_type', 'n_objects', 'n_features']
PARTS = ['test', 'val', 'train']


def format_scores(df, precision):
    def f(record):
        if record['task_type'] == lib.REGRESSION:
            for part in PARTS:
                for suffix in 'best', 'score':
                    key = f'{part}_{suffix}'
                    if key in record:
                        record[key] *= -1
        for k, v in list(record.items()):
            if isinstance(v, float):
                record[k] = round(v, precision)
        return record
    return df.apply(f, axis=1)


def load_record(output):
    output = Path(output)
    if not output.exists():
        return None
    path = output / 'stats.json'
    if not path.exists():
        print(f'WARNING! This path does not exist: {path}')
        return None
    stats = lib.load_json(path)
    metrics = stats.get('metrics')
    if metrics is None:
        return None

    dataset = Path(stats['dataset']).name
    info = lib.load_dataset_info(dataset)
    dataset, algorithm, experiment, suffix = str(output.relative_to(lib.env.OUTPUT_DIR)).split('/', 4)
    r = {
        'dataset': DATASET_NAMES[dataset],
        'task_type': info['task_type'],
        'n_objects': info['size'],
        'n_features': info['n_num_features'] + info['n_cat_features'],
        'algorithm': algorithm + f' | {experiment}',
        's': suffix
    }
    for x in PARTS:
        if x in stats['metrics']:
            r[f'{x}_score'] = stats['metrics'][x]['score']
    return r


def sort(df, by):
    if isinstance(by, str):
        by = [by]
    return df.sort_values(['n_objects'] + by, ascending=[True] + ['score' not in x for x in by]).reset_index(drop=True)


def make_df(outputs_and_names):
    df = []
    for output, algorithm_name in outputs_and_names:
        record = load_record(output)
        if not record:
            continue
        if algorithm_name is not None:
            record['algorithm'] = algorithm_name
        df.append(record)
    df = sort(pd.DataFrame(df).fillna(0.0), 'val_score').reset_index(drop=True)
    return df


def collect_outputs(experiment_dir, filter_info=None):
    if isinstance(filter_info, int):
        filter_info = [str(x) for x in range(filter_info)]
    if isinstance(filter_info, list):
        assert all(isinstance(x, str) for x in filter_info)
        filter_fn = lambda x: x.name in filter_info
    elif callable(filter_info):
        filter_fn = filter_info
    else:
        assert filter_info is None
        filter_fn = lambda x: True

    outputs = []
    if not isinstance(experiment_dir, Path):
        experiment_dir = lib.env.OUTPUT_DIR / experiment_dir
    if experiment_dir.exists():
        outputs.extend(
            filter(
                filter_fn,
                filter(Path.is_dir, experiment_dir.iterdir())
            )
        )
    return outputs


def aggregate(df):
    aggrs = dict(
        task_type=('task_type', 'first'),
        n_objects=('n_objects', 'first'),
        n_features=('n_features', 'first'),
        test_score=('test_score', 'mean'),
        test_std=('test_score', 'std'),
        val_score=('val_score', 'mean'),
        val_std=('val_score', 'std'),
        count=('test_score', 'count')
    )
    if 'train_score' in df.columns:
        aggrs.update(dict(
            train_score=('train_score', 'mean'),
            train_std=('train_score', 'std'),
        ))
    df = df.groupby(['dataset', 'algorithm']).agg(**aggrs)
    df['count'] = df['count'].astype(int)
    return df.reset_index().fillna(0.0)


def build_report(outputs_and_names):
    df = make_df(outputs_and_names)
    df = aggregate(df)
    df = sort(df, 'test_score')
    df = format_scores(df, 4)
    # df = df.set_index(['dataset', 'algorithm']).drop(columns=DETAILS)
    df = df.set_index(['dataset'] + DETAILS + ['algorithm'])
    return df[['test_score', 'test_std', 'val_score', 'val_std', 'train_score', 'train_std', 'count']]

## Default configurations (GBDT and FT-Transformer)

In [3]:
""""
all_datasets = set(deepcopy(ALL_DATASETS))
n_seeds = 15
ensemble_names = ['0_4', '5_9', '10_14']
outputs_and_names = []
for experiment, algorithm_name, datasets in [
    ('ft_transformer/default', 'FT-Transformer', all_datasets),
    ('catboost/default', 'CatBoost', all_datasets),
    ('xgboost/default', 'XGBoost', all_datasets),
]:
    for dataset in datasets:
        for output in collect_outputs(dataset + '/' + experiment, n_seeds):
            outputs_and_names.append((output, algorithm_name))
        for output in collect_outputs(dataset + '/' + experiment + '_ensemble', ensemble_names):
            outputs_and_names.append((output, '(e) ' + algorithm_name))
build_report(outputs_and_names)
"""

'"\nall_datasets = set(deepcopy(ALL_DATASETS))\nn_seeds = 15\nensemble_names = [\'0_4\', \'5_9\', \'10_14\']\noutputs_and_names = []\nfor experiment, algorithm_name, datasets in [\n    (\'ft_transformer/default\', \'FT-Transformer\', all_datasets),\n    (\'catboost/default\', \'CatBoost\', all_datasets),\n    (\'xgboost/default\', \'XGBoost\', all_datasets),\n]:\n    for dataset in datasets:\n        for output in collect_outputs(dataset + \'/\' + experiment, n_seeds):\n            outputs_and_names.append((output, algorithm_name))\n        for output in collect_outputs(dataset + \'/\' + experiment + \'_ensemble\', ensemble_names):\n            outputs_and_names.append((output, \'(e) \' + algorithm_name))\nbuild_report(outputs_and_names)\n'

## Evaluate all ML models for the given dataset for 15 seeds

In [6]:
# Evaluate

all_datasets = set(deepcopy(ALL_DATASETS))
n_seeds = 15
outputs_and_names = []
for experiment, algorithm_name, datasets in [
    ('mlp/tuned', 'MLP', all_datasets),
    ('resnet/tuned', 'ResNet', all_datasets),
    ('snn/tuned', 'SNN', all_datasets),
    ('dcn2/tuned', 'DCN V2', all_datasets),
    ('tabnet/tuned', 'TabNet', all_datasets),
    ('grownet/tuned', 'GrowNet', all_datasets),  # GrowNet does not support multiclass problems
    # ('node/tuned', 'NODE', all_datasets - {HELENA, ALOI}),
    # ('node/default', 'NODE', {HELENA, ALOI}),
    ('autoint/tuned', 'AutoInt', all_datasets),
    ('ft_transformer/tuned', 'FT-Transformer', all_datasets),
    ('catboost_/tuned', 'CatBoost', all_datasets),
    ('xgboost_/tuned', 'XGBoost', all_datasets),
    ('lightgbm_/tuned', 'LightGBM', all_datasets),

    # ('ft_transformer/default', 'FT-Transformer | default', {YAHOO}),
]:
    for dataset in datasets:
        for output in collect_outputs(dataset + '/' + experiment, n_seeds):
            outputs_and_names.append((output, algorithm_name))
build_report(outputs_and_names)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,test_score,test_std,val_score,val_std,train_score,train_std,count
dataset,task_type,n_objects,n_features,algorithm,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Paster whole,regression,409,22,ResNet,3.018,0.3482,2.6966,0.1244,2.6553,0.497,15
Paster whole,regression,409,22,MLP,3.0276,0.1573,2.757,0.1527,1.9696,0.1776,15
Paster whole,regression,409,22,FT-Transformer,3.2137,0.3824,2.705,0.1113,2.4099,0.2293,15
Paster whole,regression,409,22,GrowNet,3.2142,0.4053,3.274,0.2868,2.2732,0.365,15
Paster whole,regression,409,22,AutoInt,3.3174,0.4278,2.8925,0.3002,2.3581,0.1716,15
Paster whole,regression,409,22,LightGBM,3.7207,0.1152,3.4906,0.1586,1.0157,0.4034,15
Paster whole,regression,409,22,SNN,3.8946,1.1779,3.0403,0.3704,3.0309,0.8791,15
Paster whole,regression,409,22,CatBoost,3.9061,0.3149,3.8787,0.2355,2.0268,0.1964,15
Paster whole,regression,409,22,DCN V2,3.9629,0.5124,2.9626,0.3041,2.7395,0.4104,15
Paster whole,regression,409,22,XGBoost,3.969,0.4123,3.7192,0.3762,1.1191,0.3533,15


## All models and their ensembles

In [None]:
all_datasets = set(deepcopy(ALL_DATASETS))
n_seeds = 15
ensemble_names = ['0_4', '5_9', '10_14']
outputs_and_names = []
for experiment, algorithm_name, datasets in [
    ('mlp/tuned', 'MLP', all_datasets),
    ('resnet/tuned', 'ResNet', all_datasets),
    ('snn/tuned', 'SNN', all_datasets),
    ('dcn2/tuned', 'DCN V2', all_datasets),
    ('tabnet/tuned', 'TabNet', all_datasets),
    ('grownet/tuned', 'GrowNet', all_datasets),  # GrowNet does not support multiclass problems
    # ('node/tuned', 'NODE', all_datasets - {HELENA, ALOI}),
    # ('node/default', 'NODE', {HELENA, ALOI}),
    ('autoint/tuned', 'AutoInt', all_datasets),
    ('ft_transformer/tuned', 'FT-Transformer', all_datasets),
    ('catboost_/tuned', 'CatBoost', all_datasets),
    ('xgboost_/tuned', 'XGBoost', all_datasets),
    ('lightgbm_/tuned', 'LightGBM', all_datasets),
]:
    for dataset in datasets:
        for output in collect_outputs(dataset + '/' + experiment, n_seeds):
            outputs_and_names.append((output, algorithm_name))
        for output in collect_outputs(dataset + '/' + experiment + '_ensemble', ensemble_names):
            outputs_and_names.append((output, '(e) ' + algorithm_name))
build_report(outputs_and_names)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,test_score,test_std,val_score,val_std,train_score,train_std,count
dataset,task_type,n_objects,n_features,algorithm,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Paster whole,regression,409,22,(e) SNN,2.6951,0.4247,2.4643,0.0345,2.3936,0.4293,3
Paster whole,regression,409,22,(e) FT-Transformer,2.7593,0.1567,2.5653,0.0027,2.2275,0.0701,3
Paster whole,regression,409,22,(e) ResNet,2.8308,0.337,2.5212,0.0753,2.4085,0.1852,3
Paster whole,regression,409,22,(e) GrowNet,2.8751,0.1798,2.9447,0.1399,1.723,0.1127,3
Paster whole,regression,409,22,(e) MLP,2.977,0.0721,2.6776,0.0691,1.8947,0.0251,3
Paster whole,regression,409,22,ResNet,3.018,0.3482,2.6966,0.1244,2.6553,0.497,15
Paster whole,regression,409,22,(e) AutoInt,3.0188,0.0791,2.6842,0.0599,2.2099,0.0954,3
Paster whole,regression,409,22,MLP,3.0276,0.1573,2.757,0.1527,1.9696,0.1776,15
Paster whole,regression,409,22,FT-Transformer,3.2137,0.3824,2.705,0.1113,2.4099,0.2293,15
Paster whole,regression,409,22,GrowNet,3.2142,0.4053,3.274,0.2868,2.2732,0.365,15


## Ablation Study

In [None]:
# all_datasets = {CALIFORNIA, HELENA, JANNIS, HIGGS, ALOI, YEAR, COVTYPE, MICROSOFT}
# all_datasets = {CALIFORNIA}
"""
n_seeds = 15
outputs_and_names = []
for experiment, algorithm_name, datasets in [
    ('autoint/tuned', 'AutoInt', all_datasets),
    ('ft_transformer/tuned_nobias', 'FT-Transformer | nobias', all_datasets),
    ('ft_transformer/tuned', 'FT-Transformer', all_datasets),
]:
    for dataset in datasets:
        for output in collect_outputs(dataset + '/' + experiment, n_seeds):
            outputs_and_names.append((output, algorithm_name))
build_report(outputs_and_names)
"""

"\nn_seeds = 15\noutputs_and_names = []\nfor experiment, algorithm_name, datasets in [\n    ('autoint/tuned', 'AutoInt', all_datasets),\n    ('ft_transformer/tuned_nobias', 'FT-Transformer | nobias', all_datasets),\n    ('ft_transformer/tuned', 'FT-Transformer', all_datasets),\n]:\n    for dataset in datasets:\n        for output in collect_outputs(dataset + '/' + experiment, n_seeds):\n            outputs_and_names.append((output, algorithm_name))\nbuild_report(outputs_and_names)\n"