In [1]:
import pandas as pd
import numpy as np
import ast
from tabulate import tabulate
import json
import yaml
import matplotlib.pyplot as plt
import itertools
import dataframe_image as dfi


pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

In [2]:
def process_csv(df):
    df.drop(columns = ["Unnamed: 0"], inplace = True)
    for col in df.columns:
        if col != "model":
            if col == "lift":
                multiplier = 10
            elif col in ["mse", "rmse"]:
                multiplier = 1
            else:
                multiplier = 100
    
            df[col] = df[col].apply(
                lambda x: str(round(multiplier * float(x.replace("+", "±").split("±")[0]), 2)) + " ± (" + 
                str(round(multiplier * float(x.replace("+", "±").replace("(","").replace(")","").split("±")[1]), 2)) + ")" )
    return df

# Define a custom styling function
def highlight_max_row(s):
    is_max = s == s.max()
    return ["background-color: green" if v else "" for v in is_max]


def highlight_min_row(s):
    is_max = s == s.min()
    return ["background-color: green" if v else "" for v in is_max]

In [3]:
folder = "default"
metric_cols = [
    "accuracy",
    "roc_auc",
    "lift",
    "f1",
    "recall",
    "precision",
    "area_under_pr",
]

# Titanic

In [4]:
dataset_name = "titanic"
df = pd.read_csv(f"/home/boom/sdev/WTabRun/notebooks/tables/default/csvs/{dataset_name}.csv", header = 3)
df = process_csv(df)
# Apply the styling function to the specified columns
styled_df = df.style.apply(highlight_max_row, subset=metric_cols, axis=0)
dfi.export(styled_df, f"./tables/{folder}/df_styled_{dataset_name}.png" , dpi=1200)
styled_df

Unnamed: 0,model,accuracy,roc_auc,lift,f1,recall,precision,area_under_pr
0,catboost,83.61 ± (2.64),88.11 ± (1.78),25.45 ± (0.9),76.91 ± (4.1),71.34 ± (5.09),83.69 ± (4.76),85.71 ± (1.63)
1,tabtransformer,80.02 ± (1.51),84.3 ± (1.45),25.44 ± (0.76),74.05 ± (1.95),74.26 ± (2.06),73.87 ± (2.49),82.93 ± (2.06)
2,autoint,79.12 ± (1.93),86.17 ± (1.62),25.14 ± (0.86),74.04 ± (2.83),77.75 ± (4.2),70.72 ± (2.08),83.0 ± (1.48)
3,gandalf,81.81 ± (3.46),86.88 ± (2.39),25.14 ± (0.86),76.34 ± (5.03),76.86 ± (6.5),75.91 ± (3.99),84.79 ± (2.16)
4,xgb,83.84 ± (1.66),87.43 ± (2.04),24.83 ± (1.19),77.79 ± (2.73),73.97 ± (4.03),82.11 ± (1.61),84.73 ± (2.94)
5,resnet,79.13 ± (2.93),84.18 ± (2.82),24.53 ± (2.47),67.43 ± (5.38),56.77 ± (6.88),83.99 ± (5.78),80.86 ± (5.03)
6,mlp,79.8 ± (1.38),83.45 ± (1.24),24.52 ± (1.43),71.11 ± (2.31),64.91 ± (3.49),78.76 ± (1.8),80.0 ± (1.14)
7,s1dcnn,80.92 ± (2.38),83.78 ± (2.73),22.98 ± (2.11),72.4 ± (3.43),65.2 ± (3.35),81.55 ± (4.99),78.1 ± (5.1)
8,fttransformer,77.77 ± (8.58),78.89 ± (16.91),22.05 ± (7.21),61.15 ± (30.85),61.59 ± (31.1),60.83 ± (30.8),75.53 ± (19.52)
9,gate,73.16 ± (7.74),78.0 ± (10.72),21.75 ± (4.11),54.8 ± (28.39),54.62 ± (28.6),55.13 ± (28.43),72.43 ± (13.38)


# Iris

In [5]:
dataset_name = "iris"
metric_cols = [
    "accuracy",
    "f1"
]

df = pd.read_csv(f"/home/boom/sdev/WTabRun/notebooks/tables/default/csvs/{dataset_name}.csv", header = 3)
df = process_csv(df)
# Apply the styling function to the specified columns
styled_df = df.style.apply(highlight_max_row, subset=metric_cols, axis=0)
dfi.export(styled_df, f"./tables/{folder}/df_styled_{dataset_name}.png" , dpi=1200)
styled_df

Unnamed: 0,model,accuracy,f1
0,autoint,98.0 ± (2.67),98.0 ± (2.67)
1,fttransformer,98.0 ± (2.67),97.98 ± (2.69)
2,catboost,96.0 ± (2.49),95.98 ± (2.51)
3,tabtransformer,96.0 ± (4.9),95.99 ± (4.91)
4,xgb,96.0 ± (2.49),95.98 ± (2.51)
5,gandalf,94.67 ± (5.42),94.63 ± (5.46)
6,s1dcnn,93.33 ± (4.71),93.3 ± (4.7)
7,gate,91.33 ± (12.58),89.11 ± (16.96)
8,resnet,91.33 ± (8.06),91.27 ± (8.08)
9,categoryembedding,90.67 ± (10.62),90.14 ± (11.48)


# Breascancer

In [6]:
dataset_name = "breastcancer"
metric_cols = [
    "accuracy",
    "roc_auc",
    "lift",
    "f1",
    "recall",
    "precision",
    "area_under_pr",
]
df = pd.read_csv(f"/home/boom/sdev/WTabRun/notebooks/tables/default/csvs/{dataset_name}.csv", header = 3)
df = process_csv(df)
# Apply the styling function to the specified columns
styled_df = df.style.apply(highlight_max_row, subset=metric_cols, axis=0)
dfi.export(styled_df, f"./tables/{folder}/df_styled_{dataset_name}.png" , dpi=1200)
styled_df

Unnamed: 0,model,accuracy,roc_auc,lift,f1,recall,precision,area_under_pr
0,gate,97.89 ± (0.89),99.61 ± (0.59),15.94 ± (0.1),98.34 ± (0.69),98.88 ± (1.04),97.83 ± (1.84),99.73 ± (0.42)
1,gandalf,97.54 ± (1.29),99.54 ± (0.7),15.94 ± (0.1),98.05 ± (1.03),98.32 ± (1.38),97.8 ± (1.62),99.64 ± (0.59)
2,fttransformer,97.54 ± (0.65),99.57 ± (0.61),15.94 ± (0.1),98.04 ± (0.51),98.04 ± (0.67),98.04 ± (0.67),99.7 ± (0.46)
3,catboost,97.37 ± (0.78),99.35 ± (0.67),15.94 ± (0.1),97.91 ± (0.61),98.32 ± (1.05),97.53 ± (1.58),99.49 ± (0.58)
4,tabtransformer,96.49 ± (1.36),99.53 ± (0.43),15.94 ± (0.1),97.2 ± (1.08),97.21 ± (1.97),97.25 ± (1.91),99.7 ± (0.28)
5,xgb,96.13 ± (2.12),99.35 ± (0.54),15.94 ± (0.1),96.97 ± (1.62),98.04 ± (1.42),96.0 ± (3.21),99.58 ± (0.37)
6,resnet,94.9 ± (1.72),99.35 ± (0.94),15.94 ± (0.1),96.0 ± (1.22),96.65 ± (2.87),95.59 ± (4.19),99.55 ± (0.68)
7,mlp,93.67 ± (3.11),98.35 ± (0.86),15.94 ± (0.1),94.86 ± (2.68),94.13 ± (5.07),95.75 ± (1.15),98.92 ± (0.53)
8,s1dcnn,80.85 ± (12.39),99.09 ± (0.48),15.94 ± (0.1),86.68 ± (7.28),93.82 ± (9.28),83.17 ± (15.0),99.36 ± (0.39)
9,categoryembedding,86.31 ± (22.15),95.94 ± (7.28),15.06 ± (1.7),81.83 ± (32.19),80.3 ± (35.24),93.91 ± (8.27),96.74 ± (5.93)


# Ageconditions

In [7]:
dataset_name = "ageconditions"

df = pd.read_csv(f"/home/boom/sdev/WTabRun/notebooks/tables/default/csvs/{dataset_name}.csv", header = 3)
df = process_csv(df)
# Apply the styling function to the specified columns
styled_df = df.style.apply(highlight_max_row, subset=metric_cols, axis=0)
dfi.export(styled_df, f"./tables/{folder}/df_styled_{dataset_name}.png" , dpi=1200)
styled_df

Unnamed: 0,model,accuracy,roc_auc,lift,f1,recall,precision,area_under_pr
0,s1dcnn,94.73 ± (1.06),96.06 ± (1.34),50.0 ± (5.97),85.28 ± (2.92),87.05 ± (3.72),83.69 ± (3.78),83.48 ± (5.94)
1,catboost,94.16 ± (2.09),96.7 ± (2.45),52.35 ± (4.96),81.62 ± (6.4),74.07 ± (6.3),91.4 ± (9.12),88.86 ± (8.08)
2,xgb,91.57 ± (2.17),95.72 ± (3.2),52.35 ± (2.36),73.29 ± (5.61),65.71 ± (6.41),84.68 ± (11.5),87.76 ± (6.28)
3,gandalf,88.5 ± (5.43),94.74 ± (3.72),47.67 ± (5.78),72.83 ± (9.81),84.29 ± (8.57),65.37 ± (12.91),82.56 ± (7.83)
4,gate,86.88 ± (4.0),91.2 ± (3.25),46.61 ± (5.06),68.35 ± (5.46),78.7 ± (2.24),61.06 ± (8.29),73.12 ± (7.75)
5,autoint,87.85 ± (1.77),86.26 ± (7.17),46.62 ± (4.19),63.77 ± (13.78),68.79 ± (23.8),70.7 ± (15.71),72.65 ± (9.28)
6,tabtransformer,78.58 ± (16.16),82.01 ± (16.05),38.92 ± (12.43),59.07 ± (17.06),74.03 ± (13.92),51.32 ± (18.21),63.57 ± (21.67)
7,categoryembedding,85.09 ± (4.2),78.61 ± (20.41),38.85 ± (14.91),54.58 ± (24.25),60.74 ± (28.05),52.61 ± (19.69),64.78 ± (25.51)
8,mlp,87.34 ± (5.13),86.45 ± (3.38),40.0 ± (9.32),53.49 ± (23.98),46.23 ± (23.29),65.29 ± (21.83),64.82 ± (13.73)
9,tabnet,39.76 ± (22.65),57.49 ± (9.9),15.21 ± (6.88),29.53 ± (3.97),70.69 ± (28.52),22.41 ± (10.32),25.91 ± (7.39)


# Heloc

In [8]:
dataset_name = "heloc"

df = pd.read_csv(f"/home/boom/sdev/WTabRun/notebooks/tables/default/csvs/{dataset_name}.csv", header = 3)
df = process_csv(df)
# Apply the styling function to the specified columns
styled_df = df.style.apply(highlight_max_row, subset=metric_cols, axis=0)
dfi.export(styled_df, f"./tables/{folder}/df_styled_{dataset_name}.png" , dpi=1200)
styled_df

Unnamed: 0,model,accuracy,roc_auc,lift,f1,recall,precision,area_under_pr
0,catboost,72.31 ± (0.75),79.8 ± (0.65),17.2 ± (0.29),74.36 ± (0.49),76.9 ± (0.53),71.99 ± (1.07),79.53 ± (0.9)
1,fttransformer,72.12 ± (0.68),79.17 ± (0.25),17.12 ± (0.23),73.64 ± (0.45),74.59 ± (1.0),72.73 ± (1.16),79.08 ± (0.59)
2,mlp,71.79 ± (0.65),78.87 ± (0.67),16.96 ± (0.3),73.7 ± (0.51),75.75 ± (0.89),71.78 ± (0.92),78.65 ± (0.83)
3,gandalf,72.1 ± (0.38),79.29 ± (0.49),16.94 ± (0.31),73.6 ± (0.99),74.63 ± (3.29),72.75 ± (1.43),78.96 ± (0.95)
4,gate,72.13 ± (0.76),79.24 ± (0.46),16.9 ± (0.46),74.13 ± (0.58),76.48 ± (0.36),71.92 ± (0.89),79.0 ± (0.73)
5,xgb,72.11 ± (0.85),78.97 ± (0.89),16.89 ± (0.15),74.34 ± (0.65),77.41 ± (0.95),71.53 ± (1.12),78.63 ± (0.77)
6,categoryembedding,71.78 ± (0.63),78.6 ± (0.45),16.89 ± (0.34),72.93 ± (1.1),72.93 ± (2.53),72.99 ± (0.47),78.5 ± (0.54)
7,tabtransformer,71.41 ± (0.59),77.89 ± (0.63),16.87 ± (0.34),72.42 ± (1.2),72.01 ± (2.95),72.93 ± (0.86),77.54 ± (0.72)
8,resnet,71.27 ± (0.69),78.38 ± (0.54),16.78 ± (0.37),73.89 ± (0.54),77.91 ± (1.9),70.32 ± (1.29),78.14 ± (0.62)
9,s1dcnn,69.55 ± (0.84),78.18 ± (0.72),16.56 ± (0.41),66.65 ± (1.97),58.45 ± (3.48),77.75 ± (1.04),77.65 ± (1.07)


# Adult

In [9]:
dataset_name = "adult"
metric_cols = [
    "accuracy",
    "roc_auc",
    "f1",
    "recall",
    "precision",
    "area_under_pr",
]
df = pd.read_csv(f"/home/boom/sdev/WTabRun/notebooks/tables/default/csvs/{dataset_name}.csv", header = 3)
df = process_csv(df)
# Apply the styling function to the specified columns
styled_df = df.style.apply(highlight_max_row, subset=metric_cols, axis=0)
dfi.export(styled_df, f"./tables/{folder}/df_styled_{dataset_name}.png" , dpi=1200)
styled_df

Unnamed: 0,model,accuracy,roc_auc,f1,recall,precision,area_under_pr
0,catboost,87.42 ± (0.3),92.97 ± (0.12),71.48 ± (0.63),65.45 ± (0.82),78.75 ± (1.07),83.21 ± (0.36)
1,xgb,87.33 ± (0.31),92.85 ± (0.17),71.35 ± (0.75),65.5 ± (0.96),78.36 ± (0.85),83.0 ± (0.47)
2,categoryembedding,80.18 ± (1.77),90.77 ± (0.37),67.82 ± (1.41),86.49 ± (2.41),55.9 ± (2.78),76.2 ± (1.24)
3,mlp,85.19 ± (0.22),90.74 ± (0.34),66.47 ± (0.57),60.97 ± (1.48),73.13 ± (1.24),76.79 ± (1.0)
4,gandalf,80.49 ± (1.5),90.73 ± (0.41),67.84 ± (0.8),85.41 ± (5.05),56.55 ± (2.99),76.5 ± (1.0)
5,resnet,82.98 ± (0.79),90.45 ± (0.38),49.41 ± (4.65),34.82 ± (5.05),86.75 ± (2.29),76.61 ± (0.94)
6,tabtransformer,78.56 ± (3.1),88.79 ± (3.45),65.59 ± (4.1),84.52 ± (3.86),53.63 ± (4.03),71.95 ± (7.88)
7,s1dcnn,81.73 ± (1.1),87.4 ± (1.25),66.6 ± (0.72),75.63 ± (3.64),59.72 ± (2.64),66.9 ± (1.08)
8,gate,79.2 ± (2.55),87.31 ± (5.35),62.12 ± (9.05),75.74 ± (21.79),57.19 ± (6.56),70.59 ± (8.35)
9,fttransformer,58.83 ± (28.38),82.43 ± (14.69),57.06 ± (14.91),90.4 ± (8.07),45.02 ± (17.18),65.09 ± (19.63)


# Housing

In [10]:
dataset_name = "housing"
metric_cols = ["r2_score", "mse", "rmse"]

df = pd.read_csv(f"/home/boom/sdev/WTabRun/notebooks/tables/default/csvs/{dataset_name}.csv", header = 3)
df = process_csv(df)
# Apply the styling function to the specified columns
styled_df = df.style.apply(highlight_max_row, subset=["r2_score"], axis=0).apply(highlight_min_row, subset=["mse", "rmse"], axis=0)
dfi.export(styled_df, f"./tables/{folder}/df_styled_{dataset_name}.png" , dpi=1200)
styled_df

Unnamed: 0,model,mse,r2_score,rmse
0,catboost,0.21 ± (0.01),84.54 ± (0.85),0.45 ± (0.01)
1,xgb,0.21 ± (0.01),83.96 ± (0.63),0.46 ± (0.01)
2,mlp,0.3 ± (0.02),77.65 ± (1.12),0.55 ± (0.02)
3,s1dcnn,0.3 ± (0.01),77.41 ± (0.66),0.55 ± (0.01)
4,resnet,0.31 ± (0.01),76.95 ± (0.73),0.55 ± (0.01)
5,gandalf,0.34 ± (0.02),74.44 ± (1.95),0.58 ± (0.02)
6,categoryembedding,0.35 ± (0.01),73.55 ± (1.0),0.59 ± (0.01)
7,gate,0.39 ± (0.03),71.07 ± (1.99),0.62 ± (0.02)
8,fttransformer,0.39 ± (0.02),70.69 ± (1.71),0.62 ± (0.02)
9,autoint,0.41 ± (0.02),69.53 ± (1.14),0.64 ± (0.01)


# Diabetes

In [11]:
dataset_name = "diabetes"
metric_cols = [
    "accuracy",
    "roc_auc",
    "lift",
    "f1",
    "recall",
    "precision",
    "area_under_pr",
]
df = pd.read_csv(f"/home/boom/sdev/WTabRun/notebooks/tables/default/csvs/{dataset_name}.csv", header = 3)
df = process_csv(df)
# Apply the styling function to the specified columns
styled_df = df.style.apply(highlight_max_row, subset=metric_cols, axis=0)
dfi.export(styled_df, f"./tables/{folder}/df_styled_{dataset_name}.png" , dpi=1200)
styled_df

Unnamed: 0,model,accuracy,roc_auc,lift,f1,recall,precision,area_under_pr
0,xgb,88.86 ± (0.04),67.86 ± (0.47),24.71 ± (0.67),3.19 ± (0.87),1.65 ± (0.46),52.09 ± (5.33),23.08 ± (0.62)
1,catboost,88.86 ± (0.03),68.11 ± (0.59),24.54 ± (0.86),2.52 ± (1.01),1.29 ± (0.53),52.03 ± (8.94),23.42 ± (0.93)
2,gate,62.4 ± (3.35),67.8 ± (0.58),24.43 ± (0.4),27.12 ± (0.69),62.51 ± (3.56),17.35 ± (0.84),22.46 ± (1.02)
3,s1dcnn,79.25 ± (0.93),65.33 ± (0.95),22.97 ± (0.86),26.36 ± (0.61),33.28 ± (1.54),21.87 ± (0.81),19.6 ± (0.73)
4,gandalf,66.2 ± (5.88),65.93 ± (3.26),22.82 ± (2.5),26.14 ± (2.28),54.31 ± (12.83),17.51 ± (1.27),20.87 ± (2.28)
5,mlp,88.77 ± (0.07),65.22 ± (1.2),22.68 ± (0.93),3.77 ± (0.66),1.97 ± (0.35),43.65 ± (5.89),20.54 ± (1.08)
6,tabtransformer,65.64 ± (4.8),65.94 ± (2.31),22.45 ± (2.59),26.37 ± (1.31),55.22 ± (8.17),17.48 ± (1.04),20.68 ± (2.01)
7,resnet,88.84 ± (0.0),65.2 ± (0.86),22.26 ± (1.15),0.0 ± (0.0),0.0 ± (0.0),0.0 ± (0.0),20.56 ± (1.13)
8,fttransformer,53.16 ± (21.14),63.96 ± (6.99),21.08 ± (5.31),25.7 ± (2.84),68.35 ± (16.11),16.23 ± (2.6),19.83 ± (4.39)
9,tabnet,68.55 ± (8.29),62.47 ± (2.6),20.86 ± (1.97),24.74 ± (1.18),46.64 ± (13.73),17.84 ± (2.74),18.02 ± (1.76)


# Creditcard

In [12]:
dataset_name = "creditcard"
metric_cols = [
    "accuracy",
    "roc_auc",
    "lift",
    "f1",
    "recall",
    "precision",
    "area_under_pr",
]
df = pd.read_csv(f"/home/boom/sdev/WTabRun/notebooks/tables/default/csvs/{dataset_name}.csv", header = 3)
df = process_csv(df)
# Apply the styling function to the specified columns
styled_df = df.style.apply(highlight_max_row, subset=metric_cols, axis=0)
dfi.export(styled_df, f"./tables/{folder}/df_styled_{dataset_name}.png" , dpi=1200)
styled_df

Unnamed: 0,model,accuracy,roc_auc,lift,f1,recall,precision,area_under_pr
0,catboost,99.96 ± (0.01),98.54 ± (0.85),96.34 ± (1.52),86.94 ± (2.26),79.88 ± (2.39),95.4 ± (2.59),85.92 ± (2.32)
1,xgb,99.96 ± (0.01),97.91 ± (0.68),94.11 ± (1.62),86.49 ± (2.12),79.27 ± (1.64),95.19 ± (3.26),85.6 ± (2.22)
2,mlp,99.95 ± (0.01),96.99 ± (0.69),93.91 ± (1.7),82.97 ± (1.77),77.23 ± (2.33),89.77 ± (3.38),82.59 ± (2.81)
3,s1dcnn,97.44 ± (1.31),94.2 ± (1.78),92.48 ± (2.3),13.35 ± (6.12),89.01 ± (3.15),7.37 ± (3.71),11.42 ± (4.96)
4,resnet,99.83 ± (0.0),96.17 ± (0.99),91.05 ± (2.39),0.0 ± (0.0),0.0 ± (0.0),0.0 ± (0.0),69.7 ± (6.45)
5,gate,99.15 ± (0.54),89.9 ± (13.2),80.43 ± (18.89),20.35 ± (13.09),67.6 ± (34.38),12.27 ± (8.4),52.68 ± (25.71)
6,gandalf,95.64 ± (4.34),80.9 ± (34.28),75.73 ± (36.87),10.23 ± (5.48),72.66 ± (33.34),5.52 ± (3.01),58.58 ± (29.28)
7,fttransformer,98.91 ± (1.08),67.18 ± (37.3),57.38 ± (44.77),15.34 ± (19.47),52.71 ± (43.1),10.22 ± (14.3),39.66 ± (32.48)
8,autoint,92.35 ± (5.45),78.67 ± (13.76),47.98 ± (28.64),7.48 ± (8.18),54.51 ± (31.32),4.18 ± (4.66),15.25 ± (27.34)
9,categoryembedding,91.45 ± (14.57),64.0 ± (17.45),35.11 ± (31.38),1.25 ± (2.15),32.08 ± (39.29),0.64 ± (1.11),11.82 ± (21.16)


# Covertype

In [13]:
dataset_name = "covertype"
metric_cols = [
    "accuracy",
    "f1"
]
df = pd.read_csv(f"/home/boom/sdev/WTabRun/notebooks/tables/default/csvs/{dataset_name}.csv", header = 3)
df = process_csv(df)
# Apply the styling function to the specified columns
styled_df = df.style.apply(highlight_max_row, subset=metric_cols, axis=0)
dfi.export(styled_df, f"./tables/{folder}/df_styled_{dataset_name}.png" , dpi=1200)
styled_df

Unnamed: 0,model,accuracy,f1
0,resnet,92.06 ± (0.78),92.26 ± (0.75)
1,catboost,89.25 ± (0.08),89.19 ± (0.08)
2,gandalf,87.9 ± (1.75),88.24 ± (1.66)
3,s1dcnn,87.4 ± (5.11),87.84 ± (4.78)
4,xgb,86.9 ± (0.04),86.84 ± (0.05)
5,mlp,85.81 ± (0.31),85.64 ± (0.33)
6,categoryembedding,74.2 ± (5.35),75.46 ± (4.87)
7,gate,68.0 ± (3.49),69.79 ± (3.06)
8,tabtransformer,59.21 ± (2.83),62.22 ± (2.2)
9,tabnet,59.11 ± (11.99),57.34 ± (19.25)
