In [1]:
import pandas as pd
import numpy as np
import ast
from tabulate import tabulate
import json
import yaml
import matplotlib.pyplot as plt
import itertools
import dataframe_image as dfi


pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

In [2]:
def process_csv(df):
    df.drop(columns = ["Unnamed: 0"], inplace = True)
    for col in df.columns:
        if col != "model":
    
            df[col] = df[col].apply(
                lambda x: str(round(float(x.replace("+", "±").split("±")[0]), 2)) + " ± (" + 
                str(round(float(x.replace("+", "±").replace("(","").replace(")","").split("±")[1]), 2)) + ")" )
    return df

# Define a custom styling function
def highlight_max_row(s):
    is_max = s == s.max()
    return ["background-color: green" if v else "" for v in is_max]


def highlight_min_row(s):
    is_max = s == s.min()
    return ["background-color: green" if v else "" for v in is_max]

In [3]:
folder = "default"
metric_cols = [
    "accuracy",
    "roc_auc",
    "lift",
    "f1",
    "recall",
    "precision",
    "area_under_pr",
]

# Titanic

In [4]:
dataset_name = "titanic"
df = pd.read_csv(f"/home/boom/sdev/WTabRun/notebooks/tables/default/csvs/{dataset_name}.csv", header = 3)
df = process_csv(df)
# Apply the styling function to the specified columns
styled_df = df.style.apply(highlight_max_row, subset=metric_cols, axis=0)
dfi.export(styled_df, f"./tables/{folder}/df_styled_{dataset_name}.png" , dpi=1200)
styled_df

Unnamed: 0,model,accuracy,roc_auc,lift,f1,recall,precision,area_under_pr
0,catboost,0.84 ± (0.03),0.88 ± (0.02),2.54 ± (0.09),0.77 ± (0.04),0.71 ± (0.05),0.84 ± (0.05),0.86 ± (0.02)
1,tabtransformer,0.8 ± (0.02),0.84 ± (0.01),2.54 ± (0.08),0.74 ± (0.02),0.74 ± (0.02),0.74 ± (0.02),0.83 ± (0.02)
2,autoint,0.79 ± (0.02),0.86 ± (0.02),2.51 ± (0.09),0.74 ± (0.03),0.78 ± (0.04),0.71 ± (0.02),0.83 ± (0.01)
3,gandalf,0.82 ± (0.03),0.87 ± (0.02),2.51 ± (0.09),0.76 ± (0.05),0.77 ± (0.07),0.76 ± (0.04),0.85 ± (0.02)
4,xgb,0.84 ± (0.02),0.87 ± (0.02),2.48 ± (0.12),0.78 ± (0.03),0.74 ± (0.04),0.82 ± (0.02),0.85 ± (0.03)
5,resnet,0.79 ± (0.03),0.84 ± (0.03),2.45 ± (0.25),0.67 ± (0.05),0.57 ± (0.07),0.84 ± (0.06),0.81 ± (0.05)
6,mlp,0.8 ± (0.01),0.83 ± (0.01),2.45 ± (0.14),0.71 ± (0.02),0.65 ± (0.03),0.79 ± (0.02),0.8 ± (0.01)
7,s1dcnn,0.81 ± (0.02),0.84 ± (0.03),2.3 ± (0.21),0.72 ± (0.03),0.65 ± (0.03),0.82 ± (0.05),0.78 ± (0.05)
8,fttransformer,0.78 ± (0.09),0.79 ± (0.17),2.21 ± (0.72),0.61 ± (0.31),0.62 ± (0.31),0.61 ± (0.31),0.76 ± (0.2)
9,gate,0.73 ± (0.08),0.78 ± (0.11),2.18 ± (0.41),0.55 ± (0.28),0.55 ± (0.29),0.55 ± (0.28),0.72 ± (0.13)


# Iris

In [5]:
dataset_name = "iris"
metric_cols = [
    "accuracy",
    "f1"
]

df = pd.read_csv(f"/home/boom/sdev/WTabRun/notebooks/tables/default/csvs/{dataset_name}.csv", header = 3)
df = process_csv(df)
# Apply the styling function to the specified columns
styled_df = df.style.apply(highlight_max_row, subset=metric_cols, axis=0)
dfi.export(styled_df, f"./tables/{folder}/df_styled_{dataset_name}.png" , dpi=1200)
styled_df

Unnamed: 0,model,accuracy,f1
0,autoint,0.98 ± (0.03),0.98 ± (0.03)
1,fttransformer,0.98 ± (0.03),0.98 ± (0.03)
2,catboost,0.96 ± (0.02),0.96 ± (0.03)
3,tabtransformer,0.96 ± (0.05),0.96 ± (0.05)
4,xgb,0.96 ± (0.02),0.96 ± (0.03)
5,gandalf,0.95 ± (0.05),0.95 ± (0.05)
6,s1dcnn,0.93 ± (0.05),0.93 ± (0.05)
7,gate,0.91 ± (0.13),0.89 ± (0.17)
8,resnet,0.91 ± (0.08),0.91 ± (0.08)
9,categoryembedding,0.91 ± (0.11),0.9 ± (0.11)


# Breascancer

In [6]:
dataset_name = "breastcancer"
metric_cols = [
    "accuracy",
    "roc_auc",
    "lift",
    "f1",
    "recall",
    "precision",
    "area_under_pr",
]
df = pd.read_csv(f"/home/boom/sdev/WTabRun/notebooks/tables/default/csvs/{dataset_name}.csv", header = 3)
df = process_csv(df)
# Apply the styling function to the specified columns
styled_df = df.style.apply(highlight_max_row, subset=metric_cols, axis=0)
dfi.export(styled_df, f"./tables/{folder}/df_styled_{dataset_name}.png" , dpi=1200)
styled_df

Unnamed: 0,model,accuracy,roc_auc,lift,f1,recall,precision,area_under_pr
0,gate,0.98 ± (0.01),1.0 ± (0.01),1.59 ± (0.01),0.98 ± (0.01),0.99 ± (0.01),0.98 ± (0.02),1.0 ± (0.0)
1,gandalf,0.98 ± (0.01),1.0 ± (0.01),1.59 ± (0.01),0.98 ± (0.01),0.98 ± (0.01),0.98 ± (0.02),1.0 ± (0.01)
2,fttransformer,0.98 ± (0.01),1.0 ± (0.01),1.59 ± (0.01),0.98 ± (0.01),0.98 ± (0.01),0.98 ± (0.01),1.0 ± (0.0)
3,catboost,0.97 ± (0.01),0.99 ± (0.01),1.59 ± (0.01),0.98 ± (0.01),0.98 ± (0.01),0.98 ± (0.02),0.99 ± (0.01)
4,tabtransformer,0.96 ± (0.01),1.0 ± (0.0),1.59 ± (0.01),0.97 ± (0.01),0.97 ± (0.02),0.97 ± (0.02),1.0 ± (0.0)
5,xgb,0.96 ± (0.02),0.99 ± (0.01),1.59 ± (0.01),0.97 ± (0.02),0.98 ± (0.01),0.96 ± (0.03),1.0 ± (0.0)
6,resnet,0.95 ± (0.02),0.99 ± (0.01),1.59 ± (0.01),0.96 ± (0.01),0.97 ± (0.03),0.96 ± (0.04),1.0 ± (0.01)
7,mlp,0.94 ± (0.03),0.98 ± (0.01),1.59 ± (0.01),0.95 ± (0.03),0.94 ± (0.05),0.96 ± (0.01),0.99 ± (0.01)
8,s1dcnn,0.81 ± (0.12),0.99 ± (0.0),1.59 ± (0.01),0.87 ± (0.07),0.94 ± (0.09),0.83 ± (0.15),0.99 ± (0.0)
9,categoryembedding,0.86 ± (0.22),0.96 ± (0.07),1.51 ± (0.17),0.82 ± (0.32),0.8 ± (0.35),0.94 ± (0.08),0.97 ± (0.06)


# Ageconditions

In [7]:
dataset_name = "ageconditions"

df = pd.read_csv(f"/home/boom/sdev/WTabRun/notebooks/tables/default/csvs/{dataset_name}.csv", header = 3)
df = process_csv(df)
# Apply the styling function to the specified columns
styled_df = df.style.apply(highlight_max_row, subset=metric_cols, axis=0)
dfi.export(styled_df, f"./tables/{folder}/df_styled_{dataset_name}.png" , dpi=1200)
styled_df

Unnamed: 0,model,accuracy,roc_auc,lift,f1,recall,precision,area_under_pr
0,s1dcnn,0.95 ± (0.01),0.96 ± (0.01),5.0 ± (0.6),0.85 ± (0.03),0.87 ± (0.04),0.84 ± (0.04),0.83 ± (0.06)
1,catboost,0.94 ± (0.02),0.97 ± (0.02),5.23 ± (0.5),0.82 ± (0.06),0.74 ± (0.06),0.91 ± (0.09),0.89 ± (0.08)
2,xgb,0.92 ± (0.02),0.96 ± (0.03),5.23 ± (0.24),0.73 ± (0.06),0.66 ± (0.06),0.85 ± (0.12),0.88 ± (0.06)
3,gandalf,0.89 ± (0.05),0.95 ± (0.04),4.77 ± (0.58),0.73 ± (0.1),0.84 ± (0.09),0.65 ± (0.13),0.83 ± (0.08)
4,gate,0.87 ± (0.04),0.91 ± (0.03),4.66 ± (0.51),0.68 ± (0.05),0.79 ± (0.02),0.61 ± (0.08),0.73 ± (0.08)
5,autoint,0.88 ± (0.02),0.86 ± (0.07),4.66 ± (0.42),0.64 ± (0.14),0.69 ± (0.24),0.71 ± (0.16),0.73 ± (0.09)
6,tabtransformer,0.79 ± (0.16),0.82 ± (0.16),3.89 ± (1.24),0.59 ± (0.17),0.74 ± (0.14),0.51 ± (0.18),0.64 ± (0.22)
7,categoryembedding,0.85 ± (0.04),0.79 ± (0.2),3.89 ± (1.49),0.55 ± (0.24),0.61 ± (0.28),0.53 ± (0.2),0.65 ± (0.26)
8,mlp,0.87 ± (0.05),0.86 ± (0.03),4.0 ± (0.93),0.53 ± (0.24),0.46 ± (0.23),0.65 ± (0.22),0.65 ± (0.14)
9,tabnet,0.4 ± (0.23),0.57 ± (0.1),1.52 ± (0.69),0.3 ± (0.04),0.71 ± (0.29),0.22 ± (0.1),0.26 ± (0.07)


# Heloc

In [8]:
dataset_name = "heloc"

df = pd.read_csv(f"/home/boom/sdev/WTabRun/notebooks/tables/default/csvs/{dataset_name}.csv", header = 3)
df = process_csv(df)
# Apply the styling function to the specified columns
styled_df = df.style.apply(highlight_max_row, subset=metric_cols, axis=0)
dfi.export(styled_df, f"./tables/{folder}/df_styled_{dataset_name}.png" , dpi=1200)
styled_df

Unnamed: 0,model,accuracy,roc_auc,lift,f1,recall,precision,area_under_pr
0,catboost,0.72 ± (0.01),0.8 ± (0.01),1.72 ± (0.03),0.74 ± (0.0),0.77 ± (0.01),0.72 ± (0.01),0.8 ± (0.01)
1,fttransformer,0.72 ± (0.01),0.79 ± (0.0),1.71 ± (0.02),0.74 ± (0.0),0.75 ± (0.01),0.73 ± (0.01),0.79 ± (0.01)
2,mlp,0.72 ± (0.01),0.79 ± (0.01),1.7 ± (0.03),0.74 ± (0.01),0.76 ± (0.01),0.72 ± (0.01),0.79 ± (0.01)
3,gandalf,0.72 ± (0.0),0.79 ± (0.0),1.69 ± (0.03),0.74 ± (0.01),0.75 ± (0.03),0.73 ± (0.01),0.79 ± (0.01)
4,gate,0.72 ± (0.01),0.79 ± (0.0),1.69 ± (0.05),0.74 ± (0.01),0.76 ± (0.0),0.72 ± (0.01),0.79 ± (0.01)
5,xgb,0.72 ± (0.01),0.79 ± (0.01),1.69 ± (0.01),0.74 ± (0.01),0.77 ± (0.01),0.72 ± (0.01),0.79 ± (0.01)
6,categoryembedding,0.72 ± (0.01),0.79 ± (0.0),1.69 ± (0.03),0.73 ± (0.01),0.73 ± (0.03),0.73 ± (0.0),0.79 ± (0.01)
7,tabtransformer,0.71 ± (0.01),0.78 ± (0.01),1.69 ± (0.03),0.72 ± (0.01),0.72 ± (0.03),0.73 ± (0.01),0.78 ± (0.01)
8,resnet,0.71 ± (0.01),0.78 ± (0.01),1.68 ± (0.04),0.74 ± (0.01),0.78 ± (0.02),0.7 ± (0.01),0.78 ± (0.01)
9,s1dcnn,0.7 ± (0.01),0.78 ± (0.01),1.66 ± (0.04),0.67 ± (0.02),0.58 ± (0.03),0.78 ± (0.01),0.78 ± (0.01)


# Adult

In [9]:
dataset_name = "adult"
metric_cols = [
    "accuracy",
    "roc_auc",
    "f1",
    "recall",
    "precision",
    "area_under_pr",
]
df = pd.read_csv(f"/home/boom/sdev/WTabRun/notebooks/tables/default/csvs/{dataset_name}.csv", header = 3)
df = process_csv(df)
# Apply the styling function to the specified columns
styled_df = df.style.apply(highlight_max_row, subset=metric_cols, axis=0)
dfi.export(styled_df, f"./tables/{folder}/df_styled_{dataset_name}.png" , dpi=1200)
styled_df

Unnamed: 0,model,accuracy,roc_auc,f1,recall,precision,area_under_pr
0,catboost,0.87 ± (0.0),0.93 ± (0.0),0.71 ± (0.01),0.65 ± (0.01),0.79 ± (0.01),0.83 ± (0.0)
1,xgb,0.87 ± (0.0),0.93 ± (0.0),0.71 ± (0.01),0.66 ± (0.01),0.78 ± (0.01),0.83 ± (0.0)
2,categoryembedding,0.8 ± (0.02),0.91 ± (0.0),0.68 ± (0.01),0.86 ± (0.02),0.56 ± (0.03),0.76 ± (0.01)
3,mlp,0.85 ± (0.0),0.91 ± (0.0),0.66 ± (0.01),0.61 ± (0.01),0.73 ± (0.01),0.77 ± (0.01)
4,gandalf,0.8 ± (0.01),0.91 ± (0.0),0.68 ± (0.01),0.85 ± (0.05),0.57 ± (0.03),0.77 ± (0.01)
5,resnet,0.83 ± (0.01),0.9 ± (0.0),0.49 ± (0.05),0.35 ± (0.05),0.87 ± (0.02),0.77 ± (0.01)
6,tabtransformer,0.79 ± (0.03),0.89 ± (0.03),0.66 ± (0.04),0.85 ± (0.04),0.54 ± (0.04),0.72 ± (0.08)
7,s1dcnn,0.82 ± (0.01),0.87 ± (0.01),0.67 ± (0.01),0.76 ± (0.04),0.6 ± (0.03),0.67 ± (0.01)
8,gate,0.79 ± (0.03),0.87 ± (0.05),0.62 ± (0.09),0.76 ± (0.22),0.57 ± (0.07),0.71 ± (0.08)
9,fttransformer,0.59 ± (0.28),0.82 ± (0.15),0.57 ± (0.15),0.9 ± (0.08),0.45 ± (0.17),0.65 ± (0.2)


# Housing

In [10]:
dataset_name = "housing"
metric_cols = ["r2_score", "mse", "rmse"]

df = pd.read_csv(f"/home/boom/sdev/WTabRun/notebooks/tables/default/csvs/{dataset_name}.csv", header = 3)
df = process_csv(df)
# Apply the styling function to the specified columns
styled_df = df.style.apply(highlight_max_row, subset=metric_cols, axis=0)
dfi.export(styled_df, f"./tables/{folder}/df_styled_{dataset_name}.png" , dpi=1200)
styled_df

Unnamed: 0,model,mse,r2_score,rmse
0,catboost,0.21 ± (0.01),0.85 ± (0.01),0.45 ± (0.01)
1,xgb,0.21 ± (0.01),0.84 ± (0.01),0.46 ± (0.01)
2,mlp,0.3 ± (0.02),0.78 ± (0.01),0.55 ± (0.02)
3,s1dcnn,0.3 ± (0.01),0.77 ± (0.01),0.55 ± (0.01)
4,resnet,0.31 ± (0.01),0.77 ± (0.01),0.55 ± (0.01)
5,gandalf,0.34 ± (0.02),0.74 ± (0.02),0.58 ± (0.02)
6,categoryembedding,0.35 ± (0.01),0.74 ± (0.01),0.59 ± (0.01)
7,gate,0.39 ± (0.03),0.71 ± (0.02),0.62 ± (0.02)
8,fttransformer,0.39 ± (0.02),0.71 ± (0.02),0.62 ± (0.02)
9,autoint,0.41 ± (0.02),0.7 ± (0.01),0.64 ± (0.01)


# Creditcard

In [11]:
dataset_name = "creditcard"
metric_cols = [
    "accuracy",
    "roc_auc",
    "lift",
    "f1",
    "recall",
    "precision",
    "area_under_pr",
]
df = pd.read_csv(f"/home/boom/sdev/WTabRun/notebooks/tables/default/csvs/{dataset_name}.csv", header = 3)
df = process_csv(df)
# Apply the styling function to the specified columns
styled_df = df.style.apply(highlight_max_row, subset=metric_cols, axis=0)
dfi.export(styled_df, f"./tables/{folder}/df_styled_{dataset_name}.png" , dpi=1200)
styled_df

Unnamed: 0,model,accuracy,roc_auc,lift,f1,recall,precision,area_under_pr
0,catboost,1.0 ± (0.0),0.99 ± (0.01),9.63 ± (0.15),0.87 ± (0.02),0.8 ± (0.02),0.95 ± (0.03),0.86 ± (0.02)
1,xgb,1.0 ± (0.0),0.98 ± (0.01),9.41 ± (0.16),0.86 ± (0.02),0.79 ± (0.02),0.95 ± (0.03),0.86 ± (0.02)
2,mlp,1.0 ± (0.0),0.97 ± (0.01),9.39 ± (0.17),0.83 ± (0.02),0.77 ± (0.02),0.9 ± (0.03),0.83 ± (0.03)
3,s1dcnn,0.97 ± (0.01),0.94 ± (0.02),9.25 ± (0.23),0.13 ± (0.06),0.89 ± (0.03),0.07 ± (0.04),0.11 ± (0.05)
4,resnet,1.0 ± (0.0),0.96 ± (0.01),9.1 ± (0.24),0.0 ± (0.0),0.0 ± (0.0),0.0 ± (0.0),0.7 ± (0.06)
5,gate,0.99 ± (0.01),0.9 ± (0.13),8.04 ± (1.89),0.2 ± (0.13),0.68 ± (0.34),0.12 ± (0.08),0.53 ± (0.26)
6,gandalf,0.96 ± (0.04),0.81 ± (0.34),7.57 ± (3.69),0.1 ± (0.05),0.73 ± (0.33),0.06 ± (0.03),0.59 ± (0.29)
7,fttransformer,0.99 ± (0.01),0.67 ± (0.37),5.74 ± (4.48),0.15 ± (0.19),0.53 ± (0.43),0.1 ± (0.14),0.4 ± (0.32)
8,autoint,0.92 ± (0.05),0.79 ± (0.14),4.8 ± (2.86),0.07 ± (0.08),0.55 ± (0.31),0.04 ± (0.05),0.15 ± (0.27)
9,categoryembedding,0.91 ± (0.15),0.64 ± (0.17),3.51 ± (3.14),0.01 ± (0.02),0.32 ± (0.39),0.01 ± (0.01),0.12 ± (0.21)


# Covertype

In [12]:
dataset_name = "covertype"
metric_cols = [
    "accuracy",
    "f1"
]
df = pd.read_csv(f"/home/boom/sdev/WTabRun/notebooks/tables/default/csvs/{dataset_name}.csv", header = 3)
df = process_csv(df)
# Apply the styling function to the specified columns
styled_df = df.style.apply(highlight_max_row, subset=metric_cols, axis=0)
dfi.export(styled_df, f"./tables/{folder}/df_styled_{dataset_name}.png" , dpi=1200)
styled_df

Unnamed: 0,model,accuracy,f1
0,resnet,0.92 ± (0.01),0.92 ± (0.01)
1,catboost,0.89 ± (0.0),0.89 ± (0.0)
2,gandalf,0.88 ± (0.02),0.88 ± (0.02)
3,s1dcnn,0.87 ± (0.05),0.88 ± (0.05)
4,xgb,0.87 ± (0.0),0.87 ± (0.0)
5,mlp,0.86 ± (0.0),0.86 ± (0.0)
6,categoryembedding,0.74 ± (0.05),0.75 ± (0.05)
7,gate,0.68 ± (0.03),0.7 ± (0.03)
8,tabtransformer,0.59 ± (0.03),0.62 ± (0.02)
9,tabnet,0.59 ± (0.12),0.57 ± (0.19)


# Diabetes

In [13]:
dataset_name = "diabetes"
metric_cols = [
    "accuracy",
    "roc_auc",
    "lift",
    "f1",
    "recall",
    "precision",
    "area_under_pr",
]
df = pd.read_csv(f"/home/boom/sdev/WTabRun/notebooks/tables/default/csvs/{dataset_name}.csv", header = 3)
df = process_csv(df)
# Apply the styling function to the specified columns
styled_df = df.style.apply(highlight_max_row, subset=metric_cols, axis=0)
dfi.export(styled_df, f"./tables/{folder}/df_styled_{dataset_name}.png" , dpi=1200)
styled_df

Unnamed: 0,model,accuracy,roc_auc,lift,f1,recall,precision,area_under_pr
0,xgb,0.89 ± (0.0),0.68 ± (0.0),2.47 ± (0.07),0.03 ± (0.01),0.02 ± (0.0),0.52 ± (0.05),0.23 ± (0.01)
1,catboost,0.89 ± (0.0),0.68 ± (0.01),2.45 ± (0.09),0.03 ± (0.01),0.01 ± (0.01),0.52 ± (0.09),0.23 ± (0.01)
2,gate,0.62 ± (0.03),0.68 ± (0.01),2.44 ± (0.04),0.27 ± (0.01),0.63 ± (0.04),0.17 ± (0.01),0.22 ± (0.01)
3,s1dcnn,0.79 ± (0.01),0.65 ± (0.01),2.3 ± (0.09),0.26 ± (0.01),0.33 ± (0.02),0.22 ± (0.01),0.2 ± (0.01)
4,gandalf,0.66 ± (0.06),0.66 ± (0.03),2.28 ± (0.25),0.26 ± (0.02),0.54 ± (0.13),0.18 ± (0.01),0.21 ± (0.02)
5,mlp,0.89 ± (0.0),0.65 ± (0.01),2.27 ± (0.09),0.04 ± (0.01),0.02 ± (0.0),0.44 ± (0.06),0.21 ± (0.01)
6,tabtransformer,0.66 ± (0.05),0.66 ± (0.02),2.24 ± (0.26),0.26 ± (0.01),0.55 ± (0.08),0.17 ± (0.01),0.21 ± (0.02)
7,resnet,0.89 ± (0.0),0.65 ± (0.01),2.23 ± (0.11),0.0 ± (0.0),0.0 ± (0.0),0.0 ± (0.0),0.21 ± (0.01)
8,fttransformer,0.53 ± (0.21),0.64 ± (0.07),2.11 ± (0.53),0.26 ± (0.03),0.68 ± (0.16),0.16 ± (0.03),0.2 ± (0.04)
9,tabnet,0.69 ± (0.08),0.62 ± (0.03),2.09 ± (0.2),0.25 ± (0.01),0.47 ± (0.14),0.18 ± (0.03),0.18 ± (0.02)
