In [1]:
!pip install requests black nb_black
%load_ext nb_black

You should consider upgrading via the 'pip install --upgrade pip' command.[0m


<IPython.core.display.Javascript object>

In [2]:
import os
from pathlib import Path

from requests import get
import pandas as pd
import numpy as np
import torch

np.random.seed(0)

from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import (
    RandomizedSearchCV,
    StratifiedKFold,
    ParameterSampler,
    StratifiedShuffleSplit,
)
from pytorch_tabnet.tab_model import TabNetClassifier

<IPython.core.display.Javascript object>

# Utilities

In [3]:
def download(url, out, force=False, verify=True):
    out.parent.mkdir(parents=True, exist_ok=True)
    if force:
        print(f"Removing file at {str(out)}")
        out.unlink()

    if out.exists():
        print("File already exists.")
        return
    print(f"Downloading {url} at {str(out)} ...")
    # open in binary mode
    with out.open(mode="wb") as file:
        # get request
        response = get(url, verify=verify)
        for chunk in response.iter_content(100000):
            # write to file
            file.write(chunk)

<IPython.core.display.Javascript object>

In [4]:
UNKNOWN_VALUE = ["Unkn0wnV@lue"]


class SafeLabelEncoder(LabelEncoder):
    """
    Safe label encoder, encoding every unknown value as Unkn0wnV@lue.
    """

    def fit(self, y):
        """
        Fit the label encoder, by casting the numpy array as a string, then adding the code for unknown.
        
        Parameters
        ----------
        y : numpy array
            the values to fit
        
        Returns
        -------
        SafeLabelEncoder
            itself, fitted
        """
        return super().fit(np.concatenate((y.astype("str"), UNKNOWN_VALUE)))

    def fit_transform(self, y):
        """
        Fit the encoder, then transform the input data and returns it.
        
        Parameters
        ----------
        y : numpy array
            the values to fit
        
        Returns
        -------
        numpy array
            the encoded data
        """
        self.fit(y)
        return super().transform(y)

    def transform(self, y):
        """
        Transform the input data and returns it.
        
        Parameters
        ----------
        y : numpy array
            the values to fit
        
        Returns
        -------
        numpy array
            the encoded data
        """
        return super().transform(
            np.where(
                np.isin(y.astype("str"), self.classes_), y.astype("str"), UNKNOWN_VALUE
            )
        )



<IPython.core.display.Javascript object>

# Download census-income dataset

In [5]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
url_test = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test"

dataset_name = "census-income"
out = Path(os.getcwd() + "/data/" + dataset_name + ".csv")
out_test = Path(os.getcwd() + "/data/" + dataset_name + "_test.csv")

download(url, out, force=True)
download(url_test, out_test, force=True)

Removing file at /work/data/census-income.csv
Downloading https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data at /work/data/census-income.csv ...
Removing file at /work/data/census-income_test.csv
Downloading https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test at /work/data/census-income_test.csv ...


<IPython.core.display.Javascript object>

# Load data and split

In [6]:
cols = [
    "age",
    "workclass",
    "fnlwgt",
    "education",
    "education-num",
    "marital-status",
    "occupation",
    "relationship",
    "race",
    "sex",
    "capital-gain",
    "capital-loss",
    "hours-per-week",
    "native-country",
    "target",
]

<IPython.core.display.Javascript object>

In [7]:
train = pd.read_csv(out, names=cols)
test = pd.read_csv(out_test, names=cols, skiprows=2)
target = "target"

train[target] = train[target].str.strip()
# Test has . in label, let's clean it
test[target] = test[target].str.strip().str.strip(".")

<IPython.core.display.Javascript object>

In [8]:
used_columns = list(set(train.columns.tolist()) - set([target]) - set(["Set"]))
used_columns

['native-country',
 'relationship',
 'fnlwgt',
 'workclass',
 'hours-per-week',
 'education-num',
 'age',
 'marital-status',
 'race',
 'capital-gain',
 'education',
 'occupation',
 'sex',
 'capital-loss']

<IPython.core.display.Javascript object>

# Simple preprocessing

Label encode categorical features and fill empty cells.

In [9]:
nunique = train[used_columns].nunique()
types = train[used_columns].dtypes

cat_cols = train[used_columns].columns[(nunique < 200) | (types == "object")]
other_cols = train[used_columns].columns[~train[used_columns].columns.isin(cat_cols)]
print(cat_cols)
print(other_cols)

Index(['native-country', 'relationship', 'workclass', 'hours-per-week',
       'education-num', 'age', 'marital-status', 'race', 'capital-gain',
       'education', 'occupation', 'sex', 'capital-loss'],
      dtype='object')
Index(['fnlwgt'], dtype='object')


<IPython.core.display.Javascript object>

In [10]:
nunique["education"]

16

<IPython.core.display.Javascript object>

In [11]:
# Fillna
train[cat_cols] = train[cat_cols].astype("str")
train[other_cols] = train[other_cols].fillna(train[other_cols].mean())

test[cat_cols] = test[cat_cols].astype("str")
test[other_cols] = test[other_cols].fillna(train[other_cols].mean())

<IPython.core.display.Javascript object>

In [12]:
train.isnull().sum().sum()

0

<IPython.core.display.Javascript object>

In [13]:
enc = {}
for col in cat_cols:
    label_enc = SafeLabelEncoder()
    enc[col] = label_enc
    train[col] = label_enc.fit_transform(train[col])
    test[col] = label_enc.transform(test[col])
enc[target] = SafeLabelEncoder()
train[target] = enc[target].fit_transform(train[target])
test[target] = enc[target].transform(test[target])

enc

{'native-country': SafeLabelEncoder(),
 'relationship': SafeLabelEncoder(),
 'workclass': SafeLabelEncoder(),
 'hours-per-week': SafeLabelEncoder(),
 'education-num': SafeLabelEncoder(),
 'age': SafeLabelEncoder(),
 'marital-status': SafeLabelEncoder(),
 'race': SafeLabelEncoder(),
 'capital-gain': SafeLabelEncoder(),
 'education': SafeLabelEncoder(),
 'occupation': SafeLabelEncoder(),
 'sex': SafeLabelEncoder(),
 'capital-loss': SafeLabelEncoder(),
 'target': SafeLabelEncoder()}

<IPython.core.display.Javascript object>

# Define categorical features for categorical embeddings

In [14]:
unused_feat = ["Set"]

cat_idxs = [i for i, f in enumerate(used_columns) if f in cat_cols]
cat_dims = [len(enc[f].classes_) for f in used_columns if f in cat_cols]
print(cat_idxs)
print(cat_dims)

[0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
[43, 7, 10, 95, 17, 74, 8, 6, 120, 17, 16, 3, 93]


<IPython.core.display.Javascript object>

# Training one model

In [15]:
X = train[used_columns].values
y = train[target].values

# Test here should be ignored for training, only purpose is benching with paper values
X_test = test[used_columns].values
y_test = test[target].values

<IPython.core.display.Javascript object>

# Random Grid search

In [16]:
from scipy.stats import randint, uniform, loguniform

<IPython.core.display.Javascript object>

In [17]:
loguniform(0.01, 0.5).rvs(size=10)  # 1e-2 ou 1e-3 à 1e-6

array([0.08558895, 0.1640912 , 0.10570046, 0.08428304, 0.05245378,
       0.12512806, 0.05539206, 0.32741274, 0.43374464, 0.04481833])

<IPython.core.display.Javascript object>

In [18]:
def emb_generator(cat_dim_list, max_dim):
    return [min(nb // 2, max_dim) for nb in cat_dim_list]

<IPython.core.display.Javascript object>

In [19]:
def log_emb_generator(cat_dim_list, max_dim):
    return [min(np.log2(nb).astype("int"), max_dim) for nb in cat_dim_list]

<IPython.core.display.Javascript object>

In [20]:
log_emb_generator(cat_dims, 5)

[5, 2, 3, 5, 4, 5, 3, 2, 5, 4, 4, 1, 5]

<IPython.core.display.Javascript object>

In [21]:
# Let's generate embedding size based on cat dims
# cat_emb_dim_list = []
# for max_dim in [1, 5, 10, 20, 50]:
#    cat_emb_dim_list.append([min(nb // 2, max_dim) for nb in cat_dims])

num_workers = os.cpu_count() if torch.cuda.is_available() else 0

grid = {
    # Model params
    "n_a": randint(8,65),
    # "n_d": [8], #
    "emb_generator": [emb_generator, log_emb_generator],
    "max_emd_dims": [1, 2, 5, 10, 20, 50],
    #"cat_emb_dim": cat_emb_dim_list,
    "n_independent": randint(1,6),
    "n_shared": randint(0,6),
    "n_steps": randint(2,11),
    "clip_value": [1],
    "gamma": uniform(1, 2),
    "momentum": loguniform(0.01, 0.5), # [0.1, 0.05, 0.02, 0.005],
    "lambda_sparse": loguniform(1e-6, 1e-1), # [0.1, 0.01, 0.001], 1e-2 ou 1e-3 à 1e-6
    "lr": [0.1], #, 0.02], #, 0.02, 0.001],
    "verbose": [1],
    # optimizer_fn
    
    # Fit params
    "patience":[5],
    "max_epochs":[1000],
    "num_workers":[num_workers],
    "drop_last":[False],
    "batch_size":[1024, 2048, 4096, 8192],
    "virtual_batch_size":[128, 256, 512],
}


<IPython.core.display.Javascript object>

In [22]:
outer_split = 2
outer_test = 0.2
inner_split = 1
inner_test = 0.2
n_iter = 60

<IPython.core.display.Javascript object>

In [23]:
MODEL_PARAMS_KEYS = list(TabNetClassifier().get_params().keys())

Device used : cuda


<IPython.core.display.Javascript object>

In [24]:
%%time
params_results = []

for params in ParameterSampler(grid, n_iter=n_iter, random_state=0):
    params["n_d"] = params["n_a"]
    preds_params = np.zeros(shape=y_test.shape)
    results_outer = []
    for train_valid_index, test_index in StratifiedShuffleSplit(
        n_splits=outer_split, test_size=outer_test, random_state=0
    ).split(X, y):
        results_inner = []
        preds_outer = np.zeros(shape=y_test.shape)
        preds_split = np.zeros(shape=y[test_index].shape)
        for train_index, valid_index in StratifiedShuffleSplit(
            n_splits=inner_split, test_size=inner_test, random_state=0
        ).split(X[train_valid_index], y[train_valid_index]):
            X_train = X[train_valid_index][train_index]
            y_train = y[train_valid_index][train_index]
            X_valid = X[train_valid_index][valid_index]
            y_valid = y[train_valid_index][valid_index]
            
            
            model_params = {}
            fit_params = {}
            for elt, value in params.items():
                if elt in MODEL_PARAMS_KEYS:
                    model_params[elt] = value
                elif elt in ["emb_generator", "max_emd_dims"]:
                    model_params["cat_emb_dim"] = params["emb_generator"](cat_dims, params["max_emd_dims"])
                else:
                    fit_params[elt] = value

            clf = TabNetClassifier(cat_idxs=cat_idxs, cat_dims=cat_dims, **model_params)
            history = clf.fit(
                X_train, y_train, X_valid=X_valid, y_valid=y_valid, **fit_params
            )
            preds_inner = clf.predict_proba(X_test)[:, 1]
            preds_split += clf.predict_proba(X[test_index])[:, 1]

            preds_outer += preds_inner
            auc_inner_test = roc_auc_score(y_score=preds_inner, y_true=y_test)

            # preds_inner = clf.predict_proba(X_valid)[:, 1]
            # auc_inner_valid = roc_auc_score(y_score=preds_inner, y_true=y_valid)

            res_inner = {
                #"history": history,
                #'auc_valid': auc_inner_valid,
                "auc_test": auc_inner_test,
            }
            results_inner.append(res_inner)
            del preds_inner, clf
        preds_outer = preds_outer / len(results_inner)
        preds_split = preds_split / len(results_inner)
        preds_params += preds_outer
        res_outer = {
            "inner_results": results_inner,
            "auc_test": roc_auc_score(y_score=preds_outer, y_true=y_test),
            "auc_split": roc_auc_score(y_score=preds_split, y_true=y[test_index]),
        }
        results_outer.append(res_outer)
        del preds_outer
    preds_params = preds_params / len(results_outer)
    res_params = {
        "params": params,
        "outer_results": results_outer,
        "auc_test": roc_auc_score(y_score=preds_params, y_true=y_test),
    }
    params_results.append(res_params)
    del preds_params

Device used : cuda
Will train until validation stopping metric hasn't improved in 5 rounds.
---------------------------------------
| EPOCH |  train  |   valid  | total time (s)
| 1     | 0.54126 |  0.63383 |   6.1       
| 2     | 0.77811 |  0.76281 |   11.3      
| 3     | 0.81333 |  0.78549 |   16.9      
| 4     | 0.83735 |  0.80371 |   22.3      
| 5     | 0.85264 |  0.82765 |   27.5      
| 6     | 0.86921 |  0.82433 |   32.5      
| 7     | 0.88870 |  0.86473 |   37.7      
| 8     | 0.89177 |  0.86091 |   43.0      
| 9     | 0.88985 |  0.82435 |   48.3      
| 10    | 0.88559 |  0.89799 |   53.5      
| 11    | 0.90417 |  0.89571 |   58.5      
| 12    | 0.89117 |  0.88243 |   63.5      
| 13    | 0.89903 |  0.87936 |   68.5      
| 14    | 0.90031 |  0.89862 |   73.6      
| 15    | 0.90562 |  0.90060 |   78.8      
| 16    | 0.90780 |  0.89944 |   83.9      
| 17    | 0.89975 |  0.89452 |   89.0      
| 18    | 0.89135 |  0.88898 |   94.1      
| 19    | 0.88471 |  0.90109 |

| 30    | 0.88087 |  0.87422 |   101.9     
| 31    | 0.88492 |  0.86448 |   105.3     
| 32    | 0.88469 |  0.88024 |   108.8     
| 33    | 0.88828 |  0.88017 |   112.2     
| 34    | 0.88988 |  0.87689 |   115.6     
| 35    | 0.89042 |  0.87668 |   118.9     
| 36    | 0.89367 |  0.87716 |   122.2     
| 37    | 0.89447 |  0.86953 |   125.5     
Early stopping occured at epoch 37
Training done in 125.459 seconds.
---------------------------------------
Device used : cuda
Will train until validation stopping metric hasn't improved in 5 rounds.
---------------------------------------
| EPOCH |  train  |   valid  | total time (s)
| 1     | 0.53374 |  0.58107 |   3.4       
| 2     | 0.65300 |  0.71841 |   7.0       
| 3     | 0.75700 |  0.69190 |   10.5      
| 4     | 0.78429 |  0.74101 |   14.4      
| 5     | 0.80479 |  0.74962 |   18.0      
| 6     | 0.81545 |  0.74779 |   21.5      
| 7     | 0.82584 |  0.73630 |   25.0      
| 8     | 0.84685 |  0.75877 |   28.5      
| 9     |

| 1     | 0.52810 |  0.51790 |   5.2       
| 2     | 0.53116 |  0.70392 |   10.4      
| 3     | 0.67656 |  0.68164 |   15.7      
| 4     | 0.77084 |  0.75730 |   21.2      
| 5     | 0.79634 |  0.75861 |   26.5      
| 6     | 0.80317 |  0.74815 |   31.9      
| 7     | 0.81791 |  0.80175 |   37.4      
| 8     | 0.82258 |  0.81927 |   42.9      
| 9     | 0.81782 |  0.81971 |   49.0      
| 10    | 0.82652 |  0.81207 |   54.6      
| 11    | 0.83001 |  0.82716 |   59.9      
| 12    | 0.84363 |  0.83528 |   65.6      
| 13    | 0.83717 |  0.81220 |   71.1      
| 14    | 0.84965 |  0.83303 |   76.4      
| 15    | 0.86189 |  0.84313 |   81.6      
| 16    | 0.86260 |  0.84956 |   87.0      
| 17    | 0.86470 |  0.84895 |   92.6      
| 18    | 0.86235 |  0.83786 |   98.1      
| 19    | 0.84870 |  0.82738 |   103.2     
| 20    | 0.83070 |  0.84170 |   108.3     
| 21    | 0.84443 |  0.83934 |   113.6     
Early stopping occured at epoch 21
Training done in 113.610 seconds.
-------

| 41    | 0.93422 |  0.91564 |   95.6      
Early stopping occured at epoch 41
Training done in 95.636 seconds.
---------------------------------------
Device used : cuda
Will train until validation stopping metric hasn't improved in 5 rounds.
---------------------------------------
| EPOCH |  train  |   valid  | total time (s)
| 1     | 0.63349 |  0.69852 |   2.3       
| 2     | 0.79917 |  0.78390 |   4.6       
| 3     | 0.82512 |  0.80459 |   6.9       
| 4     | 0.83836 |  0.80347 |   9.1       
| 5     | 0.84780 |  0.80998 |   11.3      
| 6     | 0.86036 |  0.83763 |   13.7      
| 7     | 0.86587 |  0.84392 |   16.2      
| 8     | 0.86932 |  0.78522 |   18.7      
| 9     | 0.86828 |  0.86062 |   21.2      
| 10    | 0.87442 |  0.86456 |   23.6      
| 11    | 0.88599 |  0.87723 |   26.1      
| 12    | 0.89101 |  0.87298 |   28.5      
| 13    | 0.89279 |  0.86797 |   31.0      
| 14    | 0.89513 |  0.85833 |   33.5      
| 15    | 0.89600 |  0.87157 |   35.9      
| 16    | 

Device used : cuda
Will train until validation stopping metric hasn't improved in 5 rounds.
---------------------------------------
| EPOCH |  train  |   valid  | total time (s)
| 1     | 0.51795 |  0.58565 |   3.2       
| 2     | 0.59667 |  0.61090 |   6.3       
| 3     | 0.61630 |  0.57645 |   9.5       
| 4     | 0.67428 |  0.70001 |   12.5      
| 5     | 0.76208 |  0.78001 |   15.6      
| 6     | 0.80327 |  0.77647 |   18.8      
| 7     | 0.81142 |  0.77351 |   22.2      
| 8     | 0.80790 |  0.82467 |   25.5      
| 9     | 0.82962 |  0.83290 |   28.7      
| 10    | 0.84168 |  0.81118 |   31.7      
| 11    | 0.84332 |  0.82788 |   34.6      
| 12    | 0.84524 |  0.82906 |   37.5      
| 13    | 0.84910 |  0.83605 |   40.4      
| 14    | 0.85765 |  0.84655 |   43.4      
| 15    | 0.86214 |  0.84878 |   46.5      
| 16    | 0.86340 |  0.85465 |   49.4      
| 17    | 0.86637 |  0.85934 |   52.4      
| 18    | 0.87147 |  0.86583 |   55.5      
| 19    | 0.87722 |  0.86968 |

Device used : cuda
Will train until validation stopping metric hasn't improved in 5 rounds.
---------------------------------------
| EPOCH |  train  |   valid  | total time (s)
| 1     | 0.50743 |  0.55081 |   2.1       
| 2     | 0.59636 |  0.58841 |   4.4       
| 3     | 0.58684 |  0.59869 |   6.6       
| 4     | 0.61847 |  0.62195 |   8.7       
| 5     | 0.66361 |  0.54672 |   10.8      
| 6     | 0.61922 |  0.62641 |   12.9      
| 7     | 0.65459 |  0.60174 |   15.1      
| 8     | 0.68806 |  0.63192 |   17.4      
| 9     | 0.71652 |  0.67703 |   19.6      
| 10    | 0.74444 |  0.68808 |   21.7      
| 11    | 0.76006 |  0.70062 |   23.9      
| 12    | 0.76927 |  0.72071 |   25.9      
| 13    | 0.77935 |  0.73487 |   28.1      
| 14    | 0.76211 |  0.74338 |   30.1      
| 15    | 0.77364 |  0.73996 |   32.3      
| 16    | 0.78406 |  0.75684 |   34.5      
| 17    | 0.79304 |  0.75301 |   36.6      
| 18    | 0.80667 |  0.76650 |   38.8      
| 19    | 0.80834 |  0.75676 |

| 27    | 0.88945 |  0.87054 |   78.4      
| 28    | 0.85649 |  0.84335 |   81.3      
| 29    | 0.84610 |  0.84579 |   84.2      
| 30    | 0.83537 |  0.82546 |   87.2      
| 31    | 0.84641 |  0.81317 |   90.5      
Early stopping occured at epoch 31
Training done in 90.474 seconds.
---------------------------------------
Device used : cuda
Will train until validation stopping metric hasn't improved in 5 rounds.
---------------------------------------
| EPOCH |  train  |   valid  | total time (s)
| 1     | 0.50162 |  0.54267 |   4.3       
| 2     | 0.57221 |  0.64221 |   8.5       
| 3     | 0.65137 |  0.62072 |   12.6      
| 4     | 0.63472 |  0.60554 |   16.8      
| 5     | 0.67128 |  0.64900 |   21.1      
| 6     | 0.60385 |  0.62076 |   25.2      
| 7     | 0.63891 |  0.56609 |   29.2      
| 8     | 0.62824 |  0.67343 |   33.3      
| 9     | 0.69975 |  0.63916 |   37.4      
| 10    | 0.66688 |  0.62799 |   41.5      
| 11    | 0.67724 |  0.65323 |   45.5      
| 12    | 

Device used : cuda
Will train until validation stopping metric hasn't improved in 5 rounds.
---------------------------------------
| EPOCH |  train  |   valid  | total time (s)
| 1     | 0.51159 |  0.62327 |   3.2       
| 2     | 0.65645 |  0.62575 |   6.5       
| 3     | 0.74746 |  0.65442 |   9.8       
| 4     | 0.67079 |  0.72261 |   13.0      
| 5     | 0.78918 |  0.73115 |   16.3      
| 6     | 0.80290 |  0.80162 |   19.6      
| 7     | 0.74483 |  0.75545 |   22.7      
| 8     | 0.79748 |  0.76249 |   25.9      
| 9     | 0.82144 |  0.76367 |   29.2      
| 10    | 0.83481 |  0.76732 |   32.4      
| 11    | 0.84336 |  0.81004 |   35.7      
| 12    | 0.86164 |  0.81575 |   39.0      
| 13    | 0.86451 |  0.82440 |   42.1      
| 14    | 0.86238 |  0.83333 |   45.4      
| 15    | 0.86418 |  0.82951 |   48.6      
| 16    | 0.86403 |  0.82859 |   51.8      
| 17    | 0.86523 |  0.83017 |   54.9      
| 18    | 0.87034 |  0.83455 |   58.2      
| 19    | 0.87014 |  0.83966 |

| 1     | 0.62061 |  0.71110 |   3.5       
| 2     | 0.79214 |  0.72197 |   7.0       
| 3     | 0.80923 |  0.78768 |   10.6      
| 4     | 0.82929 |  0.80743 |   14.0      
| 5     | 0.84267 |  0.77832 |   17.5      
| 6     | 0.86428 |  0.85720 |   20.9      
| 7     | 0.85491 |  0.83803 |   24.5      
| 8     | 0.83050 |  0.84474 |   28.1      
| 9     | 0.86626 |  0.86343 |   31.8      
| 10    | 0.85367 |  0.86685 |   35.5      
| 11    | 0.85937 |  0.74854 |   39.1      
| 12    | 0.86596 |  0.79115 |   42.7      
| 13    | 0.87523 |  0.85412 |   46.3      
| 14    | 0.87725 |  0.78900 |   49.6      
| 15    | 0.88445 |  0.76848 |   53.0      
Early stopping occured at epoch 15
Training done in 52.973 seconds.
---------------------------------------
Device used : cuda
Will train until validation stopping metric hasn't improved in 5 rounds.
---------------------------------------
| EPOCH |  train  |   valid  | total time (s)
| 1     | 0.52978 |  0.56464 |   4.5       
| 2     | 

| 25    | 0.90830 |  0.89597 |   77.5      
| 26    | 0.91116 |  0.89772 |   80.7      
| 27    | 0.91026 |  0.88413 |   83.7      
| 28    | 0.90692 |  0.87004 |   86.8      
| 29    | 0.91219 |  0.81889 |   89.8      
| 30    | 0.91580 |  0.87494 |   92.9      
| 31    | 0.91406 |  0.89187 |   96.1      
Early stopping occured at epoch 31
Training done in 96.074 seconds.
---------------------------------------
Device used : cuda
Will train until validation stopping metric hasn't improved in 5 rounds.
---------------------------------------
| EPOCH |  train  |   valid  | total time (s)
| 1     | 0.48887 |  0.50854 |   6.6       
| 2     | 0.55554 |  0.54611 |   13.2      
| 3     | 0.54929 |  0.65410 |   19.9      
| 4     | 0.51741 |  0.56708 |   26.3      
| 5     | 0.50233 |  0.49170 |   32.7      
| 6     | 0.51842 |  0.53531 |   39.0      
| 7     | 0.55205 |  0.57524 |   45.3      
| 8     | 0.62794 |  0.62762 |   51.8      
Early stopping occured at epoch 8
Training done in 51.

| 9     | 0.81416 |  0.75858 |   37.1      
| 10    | 0.82334 |  0.77664 |   41.2      
| 11    | 0.82765 |  0.78467 |   45.1      
| 12    | 0.82757 |  0.77018 |   49.2      
Early stopping occured at epoch 12
Training done in 49.158 seconds.
---------------------------------------
Device used : cuda
Will train until validation stopping metric hasn't improved in 5 rounds.
---------------------------------------
| EPOCH |  train  |   valid  | total time (s)
| 1     | 0.51394 |  0.48985 |   4.3       
| 2     | 0.53664 |  0.63032 |   8.3       
| 3     | 0.66628 |  0.58079 |   12.1      
| 4     | 0.69847 |  0.67224 |   16.1      
| 5     | 0.72186 |  0.66335 |   20.3      
| 6     | 0.75140 |  0.62585 |   24.4      
| 7     | 0.76224 |  0.68772 |   28.7      
| 8     | 0.77909 |  0.70892 |   32.8      
| 9     | 0.78482 |  0.75092 |   36.9      
| 10    | 0.78541 |  0.76770 |   41.0      
| 11    | 0.78952 |  0.75831 |   45.3      
| 12    | 0.81211 |  0.78614 |   49.5      
| 13    | 

| 22    | 0.91317 |  0.90451 |   50.5      
| 23    | 0.91799 |  0.90943 |   52.9      
| 24    | 0.92266 |  0.90967 |   55.1      
| 25    | 0.92458 |  0.91487 |   57.4      
| 26    | 0.92579 |  0.91554 |   59.6      
| 27    | 0.92782 |  0.91755 |   61.9      
| 28    | 0.92969 |  0.91748 |   64.3      
| 29    | 0.93074 |  0.91986 |   66.6      
| 30    | 0.93316 |  0.91412 |   69.0      
| 31    | 0.93215 |  0.92115 |   71.4      
| 32    | 0.93171 |  0.91975 |   73.8      
| 33    | 0.93293 |  0.92075 |   76.0      
| 34    | 0.93154 |  0.91463 |   78.3      
| 35    | 0.93235 |  0.91105 |   80.6      
| 36    | 0.93126 |  0.92046 |   82.8      
Early stopping occured at epoch 36
Training done in 82.815 seconds.
---------------------------------------
Device used : cuda
Will train until validation stopping metric hasn't improved in 5 rounds.
---------------------------------------
| EPOCH |  train  |   valid  | total time (s)
| 1     | 0.54591 |  0.58670 |   3.2       
| 2     | 

| 16    | 0.92083 |  0.90998 |   51.8      
| 17    | 0.91480 |  0.88807 |   55.2      
| 18    | 0.90331 |  0.89491 |   58.5      
| 19    | 0.90899 |  0.89782 |   61.8      
| 20    | 0.90860 |  0.89929 |   65.0      
| 21    | 0.91511 |  0.90004 |   68.3      
Early stopping occured at epoch 21
Training done in 68.310 seconds.
---------------------------------------
Device used : cuda
Will train until validation stopping metric hasn't improved in 5 rounds.
---------------------------------------
| EPOCH |  train  |   valid  | total time (s)
| 1     | 0.60122 |  0.59128 |   3.6       
| 2     | 0.78214 |  0.66724 |   7.6       
| 3     | 0.80067 |  0.73537 |   11.7      
| 4     | 0.78328 |  0.79192 |   16.0      
| 5     | 0.83495 |  0.82418 |   20.0      
| 6     | 0.85497 |  0.77943 |   23.9      
| 7     | 0.85738 |  0.83312 |   27.9      
| 8     | 0.86202 |  0.84496 |   31.8      
| 9     | 0.86834 |  0.86298 |   35.6      
| 10    | 0.87997 |  0.86759 |   39.4      
| 11    | 

| 28    | 0.91676 |  0.91224 |   143.2     
Early stopping occured at epoch 28
Training done in 143.179 seconds.
---------------------------------------
Device used : cuda
Will train until validation stopping metric hasn't improved in 5 rounds.
---------------------------------------
| EPOCH |  train  |   valid  | total time (s)
| 1     | 0.56692 |  0.60620 |   3.2       
| 2     | 0.71073 |  0.73782 |   6.4       
| 3     | 0.79445 |  0.79707 |   9.5       
| 4     | 0.83450 |  0.82763 |   12.7      
| 5     | 0.84784 |  0.82063 |   15.7      
| 6     | 0.84665 |  0.83793 |   18.9      
| 7     | 0.86074 |  0.84636 |   22.2      
| 8     | 0.86970 |  0.84789 |   25.3      
| 9     | 0.87891 |  0.86157 |   28.5      
| 10    | 0.88024 |  0.86216 |   31.7      
| 11    | 0.88189 |  0.86225 |   35.0      
| 12    | 0.88364 |  0.86718 |   38.3      
| 13    | 0.87928 |  0.86956 |   41.4      
| 14    | 0.88188 |  0.86804 |   44.6      
| 15    | 0.88553 |  0.87475 |   47.7      
| 16    |

| 19    | 0.89963 |  0.89077 |   111.8     
| 20    | 0.90390 |  0.89362 |   117.8     
| 21    | 0.90655 |  0.89473 |   123.8     
| 22    | 0.90754 |  0.89701 |   129.8     
| 23    | 0.90628 |  0.89065 |   135.7     
| 24    | 0.90764 |  0.88641 |   141.5     
| 25    | 0.90885 |  0.89507 |   147.5     
| 26    | 0.90916 |  0.87931 |   153.2     
| 27    | 0.91063 |  0.87342 |   159.0     
Early stopping occured at epoch 27
Training done in 159.043 seconds.
---------------------------------------
Device used : cuda
Will train until validation stopping metric hasn't improved in 5 rounds.
---------------------------------------
| EPOCH |  train  |   valid  | total time (s)
| 1     | 0.54339 |  0.58916 |   5.8       
| 2     | 0.68805 |  0.61557 |   11.6      
| 3     | 0.65379 |  0.60513 |   17.5      
| 4     | 0.73456 |  0.69310 |   23.2      
| 5     | 0.76397 |  0.77017 |   28.9      
| 6     | 0.78009 |  0.79027 |   34.8      
| 7     | 0.81034 |  0.79533 |   40.6      
| 8     |

| 10    | 0.85296 |  0.84546 |   38.7      
| 11    | 0.86028 |  0.84282 |   42.7      
| 12    | 0.85657 |  0.85433 |   46.6      
| 13    | 0.86051 |  0.85772 |   50.4      
| 14    | 0.86641 |  0.84886 |   54.5      
| 15    | 0.86574 |  0.86093 |   58.4      
| 16    | 0.87007 |  0.86612 |   62.4      
| 17    | 0.87518 |  0.86872 |   66.3      
| 18    | 0.88151 |  0.86617 |   70.2      
| 19    | 0.88342 |  0.87240 |   74.1      
| 20    | 0.88562 |  0.86991 |   77.7      
| 21    | 0.88607 |  0.87788 |   81.5      
| 22    | 0.88919 |  0.88011 |   85.5      
| 23    | 0.89095 |  0.86514 |   89.3      
| 24    | 0.88185 |  0.88102 |   93.1      
| 25    | 0.89198 |  0.88181 |   96.9      
| 26    | 0.88597 |  0.86861 |   100.8     
| 27    | 0.88570 |  0.87293 |   104.7     
| 28    | 0.88550 |  0.87280 |   108.5     
| 29    | 0.88652 |  0.87873 |   112.4     
| 30    | 0.89219 |  0.88419 |   116.1     
| 31    | 0.89346 |  0.88435 |   120.2     
| 32    | 0.89660 |  0.88787 |  

| 8     | 0.63079 |  0.64051 |   35.5      
| 9     | 0.66136 |  0.66726 |   39.9      
| 10    | 0.65647 |  0.66658 |   44.1      
| 11    | 0.64163 |  0.65188 |   48.3      
| 12    | 0.67677 |  0.67846 |   52.3      
| 13    | 0.65338 |  0.64543 |   56.4      
| 14    | 0.64638 |  0.66143 |   60.5      
| 15    | 0.69343 |  0.74892 |   64.6      
| 16    | 0.72300 |  0.76025 |   68.7      
| 17    | 0.74457 |  0.78100 |   72.8      
| 18    | 0.76258 |  0.74320 |   76.9      
| 19    | 0.77028 |  0.75204 |   81.1      
| 20    | 0.78034 |  0.80517 |   85.4      
| 21    | 0.79262 |  0.82132 |   89.5      
| 22    | 0.80268 |  0.81184 |   93.7      
| 23    | 0.81585 |  0.81656 |   97.9      
| 24    | 0.83560 |  0.82385 |   102.2     
| 25    | 0.84931 |  0.83086 |   106.7     
| 26    | 0.85675 |  0.82948 |   110.9     
| 27    | 0.85942 |  0.85104 |   115.1     
| 28    | 0.86781 |  0.84515 |   119.3     
| 29    | 0.86885 |  0.85247 |   123.5     
| 30    | 0.87252 |  0.85527 |  

| 10    | 0.87015 |  0.87175 |   74.8      
| 11    | 0.87698 |  0.87819 |   82.4      
| 12    | 0.88541 |  0.87844 |   89.6      
| 13    | 0.88871 |  0.88754 |   96.8      
| 14    | 0.89384 |  0.82006 |   104.3     
| 15    | 0.88755 |  0.73966 |   111.8     
| 16    | 0.89129 |  0.84487 |   119.3     
| 17    | 0.89516 |  0.77504 |   126.6     
| 18    | 0.89161 |  0.88225 |   133.9     
Early stopping occured at epoch 18
Training done in 133.916 seconds.
---------------------------------------
Device used : cuda
Will train until validation stopping metric hasn't improved in 5 rounds.
---------------------------------------
| EPOCH |  train  |   valid  | total time (s)
| 1     | 0.52284 |  0.47709 |   4.0       
| 2     | 0.58581 |  0.53985 |   8.1       
| 3     | 0.71934 |  0.64273 |   12.0      
| 4     | 0.79978 |  0.74624 |   15.9      
| 5     | 0.82849 |  0.80325 |   19.8      
| 6     | 0.83785 |  0.76095 |   23.8      
| 7     | 0.82180 |  0.81821 |   27.8      
| 8     |

Device used : cuda
Will train until validation stopping metric hasn't improved in 5 rounds.
---------------------------------------
| EPOCH |  train  |   valid  | total time (s)
| 1     | 0.63810 |  0.72577 |   4.3       
| 2     | 0.77203 |  0.77510 |   8.6       
| 3     | 0.81460 |  0.79318 |   13.1      
| 4     | 0.83460 |  0.82037 |   17.5      
| 5     | 0.84303 |  0.82953 |   21.8      
| 6     | 0.85462 |  0.85564 |   26.2      
| 7     | 0.86054 |  0.85916 |   30.6      
| 8     | 0.86689 |  0.85020 |   35.0      
| 9     | 0.86848 |  0.84210 |   39.3      
| 10    | 0.86747 |  0.84292 |   43.6      
| 11    | 0.86973 |  0.84565 |   48.0      
| 12    | 0.87568 |  0.84720 |   52.3      
Early stopping occured at epoch 12
Training done in 52.262 seconds.
---------------------------------------
Device used : cuda
Will train until validation stopping metric hasn't improved in 5 rounds.
---------------------------------------
| EPOCH |  train  |   valid  | total time (s)
| 1     

| 19    | 0.69020 |  0.68458 |   143.5     
| 20    | 0.68431 |  0.69482 |   151.1     
| 21    | 0.69958 |  0.72266 |   159.0     
| 22    | 0.74757 |  0.66235 |   166.6     
| 23    | 0.77825 |  0.69194 |   174.5     
| 24    | 0.78845 |  0.76788 |   182.3     
| 25    | 0.78582 |  0.77680 |   189.9     
| 26    | 0.78666 |  0.78686 |   197.5     
| 27    | 0.79643 |  0.77659 |   205.0     
| 28    | 0.79581 |  0.75425 |   212.6     
| 29    | 0.79506 |  0.77307 |   220.1     
| 30    | 0.80430 |  0.75889 |   227.8     
| 31    | 0.79815 |  0.73479 |   235.3     
Early stopping occured at epoch 31
Training done in 235.323 seconds.
---------------------------------------
Device used : cuda
Will train until validation stopping metric hasn't improved in 5 rounds.
---------------------------------------
| EPOCH |  train  |   valid  | total time (s)
| 1     | 0.59558 |  0.59512 |   2.6       
| 2     | 0.75475 |  0.71833 |   5.3       
| 3     | 0.78543 |  0.76881 |   8.0       
| 4     |

| 17    | 0.86695 |  0.86447 |   78.5      
| 18    | 0.86398 |  0.87478 |   82.7      
| 19    | 0.86999 |  0.87827 |   87.2      
Early stopping occured at epoch 19
Training done in 87.248 seconds.
---------------------------------------
Device used : cuda
Will train until validation stopping metric hasn't improved in 5 rounds.
---------------------------------------
| EPOCH |  train  |   valid  | total time (s)
| 1     | 0.51886 |  0.53701 |   4.5       
| 2     | 0.64675 |  0.72676 |   9.4       
| 3     | 0.70375 |  0.73001 |   14.1      
| 4     | 0.79624 |  0.77407 |   18.8      
| 5     | 0.81020 |  0.80167 |   23.4      
| 6     | 0.82137 |  0.79049 |   28.1      
| 7     | 0.84116 |  0.81864 |   32.7      
| 8     | 0.84493 |  0.75366 |   37.5      
| 9     | 0.84819 |  0.77089 |   42.1      
| 10    | 0.84113 |  0.76348 |   46.6      
| 11    | 0.84846 |  0.83902 |   51.3      
| 12    | 0.86230 |  0.82603 |   55.9      
| 13    | 0.86245 |  0.85967 |   60.6      
| 14    | 

<IPython.core.display.Javascript object>

In [25]:
agg_results = []
for res in params_results:
    outer_auc = np.array(list(map(lambda elt: elt["auc_test"], res["outer_results"])))
    inner_auc = np.array(
        list(
            map(
                lambda elt: list(map(lambda e: e["auc_test"], elt["inner_results"])),
                res["outer_results"],
            )
        )
    )
    split_auc = np.array(list(map(lambda elt: elt["auc_split"], res["outer_results"])))
    agg = {
        "params": res["params"],
        "auc_test": res["auc_test"],
        "outer_auc": outer_auc,
        "inner_auc": inner_auc,
        "split_auc": split_auc,
    }
    agg_results.append(agg)

<IPython.core.display.Javascript object>

In [26]:
agg_results

[{'params': {'batch_size': 1024,
   'clip_value': 1,
   'drop_last': False,
   'emb_generator': <function __main__.log_emb_generator(cat_dim_list, max_dim)>,
   'gamma': 2.430378732744839,
   'lambda_sparse': 0.0010323260351976567,
   'lr': 0.1,
   'max_emd_dims': 10,
   'max_epochs': 1000,
   'momentum': 0.2750776403889842,
   'n_a': 27,
   'n_independent': 3,
   'n_shared': 4,
   'n_steps': 9,
   'num_workers': 12,
   'patience': 5,
   'verbose': 1,
   'virtual_batch_size': 512,
   'n_d': 27},
  'auc_test': 0.9176059369863938,
  'outer_auc': array([0.89990042, 0.91493624]),
  'inner_auc': array([[0.89990042],
         [0.91493624]]),
  'split_auc': array([0.89421693, 0.91595626])},
 {'params': {'batch_size': 1024,
   'clip_value': 1,
   'drop_last': False,
   'emb_generator': <function __main__.emb_generator(cat_dim_list, max_dim)>,
   'gamma': 1.5453125891602264,
   'lambda_sparse': 0.00024452630570839887,
   'lr': 0.1,
   'max_emd_dims': 1,
   'max_epochs': 1000,
   'momentum': 0.0

<IPython.core.display.Javascript object>

In [27]:
df = pd.DataFrame(agg_results).reset_index().rename(columns={"index": "params_idx"})

outer_agg = df[["params_idx", "outer_auc"]].explode("outer_auc")
outer_agg["outer_auc"] = outer_agg["outer_auc"].astype("float")
outer_agg = outer_agg.groupby("params_idx").agg(
    {"outer_auc": ["mean", "max", "min", "std"]}
)
outer_agg.columns = ["_".join(col).rstrip("_") for col in outer_agg.columns.values]
outer_agg.head()

split_agg = df[["params_idx", "split_auc"]].explode("split_auc")
split_agg["split_auc"] = split_agg["split_auc"].astype("float")
split_agg = split_agg.groupby("params_idx").agg(
    {"split_auc": ["mean", "max", "min", "std"]}
)
split_agg.columns = ["_".join(col).rstrip("_") for col in split_agg.columns.values]
split_agg.head()

inner_agg = df[["params_idx", "inner_auc"]].explode("inner_auc").explode("inner_auc")
inner_agg["inner_auc"] = inner_agg["inner_auc"].astype("float")
inner_agg = inner_agg.groupby("params_idx").agg(
    {"inner_auc": ["mean", "max", "min", "std"]}
)
inner_agg.columns = ["_".join(col).rstrip("_") for col in inner_agg.columns.values]
inner_agg.head()

df = (
    df.merge(outer_agg, on="params_idx")
    .merge(split_agg, on="params_idx")
    .merge(inner_agg, on="params_idx")
    .drop(columns=["params_idx", "outer_auc", "inner_auc", "split_auc"])
)
df = df.sort_values(by=["auc_test"], ascending=False).reset_index(drop=True)
df.head()

Unnamed: 0,params,auc_test,outer_auc_mean,outer_auc_max,outer_auc_min,outer_auc_std,split_auc_mean,split_auc_max,split_auc_min,split_auc_std,inner_auc_mean,inner_auc_max,inner_auc_min,inner_auc_std
0,"{'batch_size': 1024, 'clip_value': 1, 'drop_la...",0.925806,0.92246,0.924191,0.92073,0.002448,0.92026,0.922063,0.918457,0.00255,0.92246,0.924191,0.92073,0.001999
1,"{'batch_size': 2048, 'clip_value': 1, 'drop_la...",0.925298,0.920686,0.922044,0.919327,0.001921,0.922803,0.924874,0.920733,0.002928,0.920686,0.922044,0.919327,0.001569
2,"{'batch_size': 1024, 'clip_value': 1, 'drop_la...",0.923642,0.9174,0.920491,0.914308,0.004372,0.916278,0.917346,0.91521,0.00151,0.9174,0.920491,0.914308,0.00357
3,"{'batch_size': 2048, 'clip_value': 1, 'drop_la...",0.922637,0.915354,0.916212,0.914497,0.001212,0.916081,0.92068,0.911481,0.006505,0.915354,0.916212,0.914497,0.00099
4,"{'batch_size': 1024, 'clip_value': 1, 'drop_la...",0.921805,0.914663,0.916327,0.912998,0.002354,0.91325,0.914964,0.911536,0.002424,0.914663,0.916327,0.912998,0.001922


<IPython.core.display.Javascript object>

In [28]:
df["params"][0]

{'batch_size': 1024,
 'clip_value': 1,
 'drop_last': False,
 'emb_generator': <function __main__.emb_generator(cat_dim_list, max_dim)>,
 'gamma': 1.73961618549668,
 'lambda_sparse': 5.397700876300653e-06,
 'lr': 0.1,
 'max_emd_dims': 5,
 'max_epochs': 1000,
 'momentum': 0.028756461671198243,
 'n_a': 42,
 'n_independent': 4,
 'n_shared': 5,
 'n_steps': 2,
 'num_workers': 12,
 'patience': 5,
 'verbose': 1,
 'virtual_batch_size': 128,
 'n_d': 42}

<IPython.core.display.Javascript object>

In [29]:
df["params"][1]

{'batch_size': 2048,
 'clip_value': 1,
 'drop_last': False,
 'emb_generator': <function __main__.emb_generator(cat_dim_list, max_dim)>,
 'gamma': 1.7112254756999112,
 'lambda_sparse': 0.05036858219677769,
 'lr': 0.1,
 'max_emd_dims': 5,
 'max_epochs': 1000,
 'momentum': 0.18583213116478564,
 'n_a': 9,
 'n_independent': 3,
 'n_shared': 5,
 'n_steps': 2,
 'num_workers': 12,
 'patience': 5,
 'verbose': 1,
 'virtual_batch_size': 512,
 'n_d': 9}

<IPython.core.display.Javascript object>

In [30]:
df["params"][2]

{'batch_size': 1024,
 'clip_value': 1,
 'drop_last': False,
 'emb_generator': <function __main__.emb_generator(cat_dim_list, max_dim)>,
 'gamma': 2.757739483554942,
 'lambda_sparse': 0.00028929518644642103,
 'lr': 0.1,
 'max_emd_dims': 20,
 'max_epochs': 1000,
 'momentum': 0.22794035656379164,
 'n_a': 57,
 'n_independent': 2,
 'n_shared': 4,
 'n_steps': 3,
 'num_workers': 12,
 'patience': 5,
 'verbose': 1,
 'virtual_batch_size': 256,
 'n_d': 57}

<IPython.core.display.Javascript object>

In [31]:
df.to_csv("tabnet_grid_census.csv", index=False)

<IPython.core.display.Javascript object>

In [32]:
df

Unnamed: 0,params,auc_test,outer_auc_mean,outer_auc_max,outer_auc_min,outer_auc_std,split_auc_mean,split_auc_max,split_auc_min,split_auc_std,inner_auc_mean,inner_auc_max,inner_auc_min,inner_auc_std
0,"{'batch_size': 1024, 'clip_value': 1, 'drop_la...",0.925806,0.92246,0.924191,0.92073,0.002448,0.92026,0.922063,0.918457,0.00255,0.92246,0.924191,0.92073,0.001999
1,"{'batch_size': 2048, 'clip_value': 1, 'drop_la...",0.925298,0.920686,0.922044,0.919327,0.001921,0.922803,0.924874,0.920733,0.002928,0.920686,0.922044,0.919327,0.001569
2,"{'batch_size': 1024, 'clip_value': 1, 'drop_la...",0.923642,0.9174,0.920491,0.914308,0.004372,0.916278,0.917346,0.91521,0.00151,0.9174,0.920491,0.914308,0.00357
3,"{'batch_size': 2048, 'clip_value': 1, 'drop_la...",0.922637,0.915354,0.916212,0.914497,0.001212,0.916081,0.92068,0.911481,0.006505,0.915354,0.916212,0.914497,0.00099
4,"{'batch_size': 1024, 'clip_value': 1, 'drop_la...",0.921805,0.914663,0.916327,0.912998,0.002354,0.91325,0.914964,0.911536,0.002424,0.914663,0.916327,0.912998,0.001922
5,"{'batch_size': 2048, 'clip_value': 1, 'drop_la...",0.921788,0.911665,0.920194,0.903136,0.012062,0.912039,0.919941,0.904137,0.011176,0.911665,0.920194,0.903136,0.009849
6,"{'batch_size': 2048, 'clip_value': 1, 'drop_la...",0.919137,0.907132,0.919315,0.894948,0.01723,0.905553,0.915406,0.895699,0.013935,0.907132,0.919315,0.894948,0.014068
7,"{'batch_size': 1024, 'clip_value': 1, 'drop_la...",0.917606,0.907418,0.914936,0.8999,0.010632,0.905087,0.915956,0.894217,0.015372,0.907418,0.914936,0.8999,0.008681
8,"{'batch_size': 2048, 'clip_value': 1, 'drop_la...",0.916831,0.902693,0.918672,0.886714,0.022597,0.900001,0.915255,0.884748,0.021571,0.902693,0.918672,0.886714,0.018451
9,"{'batch_size': 2048, 'clip_value': 1, 'drop_la...",0.916487,0.909845,0.910799,0.908891,0.001349,0.906228,0.907861,0.904594,0.00231,0.909845,0.910799,0.908891,0.001102


<IPython.core.display.Javascript object>

In [33]:
df.sort_values(by=["outer_auc_std"], ascending=True)

Unnamed: 0,params,auc_test,outer_auc_mean,outer_auc_max,outer_auc_min,outer_auc_std,split_auc_mean,split_auc_max,split_auc_min,split_auc_std,inner_auc_mean,inner_auc_max,inner_auc_min,inner_auc_std
32,"{'batch_size': 1024, 'clip_value': 1, 'drop_la...",0.900088,0.886363,0.886437,0.886289,0.000104,0.889974,0.893886,0.886061,0.005533,0.886363,0.886437,0.886289,8.5e-05
26,"{'batch_size': 2048, 'clip_value': 1, 'drop_la...",0.904174,0.892176,0.892375,0.891977,0.000282,0.888673,0.890631,0.886716,0.002769,0.892176,0.892375,0.891977,0.00023
58,"{'batch_size': 4096, 'clip_value': 1, 'drop_la...",0.686786,0.652929,0.653418,0.652441,0.000691,0.648247,0.648409,0.648086,0.000228,0.652929,0.653418,0.652441,0.000564
3,"{'batch_size': 2048, 'clip_value': 1, 'drop_la...",0.922637,0.915354,0.916212,0.914497,0.001212,0.916081,0.92068,0.911481,0.006505,0.915354,0.916212,0.914497,0.00099
9,"{'batch_size': 2048, 'clip_value': 1, 'drop_la...",0.916487,0.909845,0.910799,0.908891,0.001349,0.906228,0.907861,0.904594,0.00231,0.909845,0.910799,0.908891,0.001102
12,"{'batch_size': 1024, 'clip_value': 1, 'drop_la...",0.912061,0.900836,0.902177,0.899496,0.001896,0.895642,0.897171,0.894114,0.002162,0.900836,0.902177,0.899496,0.001548
1,"{'batch_size': 2048, 'clip_value': 1, 'drop_la...",0.925298,0.920686,0.922044,0.919327,0.001921,0.922803,0.924874,0.920733,0.002928,0.920686,0.922044,0.919327,0.001569
4,"{'batch_size': 1024, 'clip_value': 1, 'drop_la...",0.921805,0.914663,0.916327,0.912998,0.002354,0.91325,0.914964,0.911536,0.002424,0.914663,0.916327,0.912998,0.001922
0,"{'batch_size': 1024, 'clip_value': 1, 'drop_la...",0.925806,0.92246,0.924191,0.92073,0.002448,0.92026,0.922063,0.918457,0.00255,0.92246,0.924191,0.92073,0.001999
53,"{'batch_size': 8192, 'clip_value': 1, 'drop_la...",0.850021,0.831989,0.833729,0.830248,0.002462,0.826592,0.83133,0.821854,0.0067,0.831989,0.833729,0.830248,0.00201


<IPython.core.display.Javascript object>

In [34]:
df.sort_values(by=["split_auc_std"], ascending=True)

Unnamed: 0,params,auc_test,outer_auc_mean,outer_auc_max,outer_auc_min,outer_auc_std,split_auc_mean,split_auc_max,split_auc_min,split_auc_std,inner_auc_mean,inner_auc_max,inner_auc_min,inner_auc_std
58,"{'batch_size': 4096, 'clip_value': 1, 'drop_la...",0.686786,0.652929,0.653418,0.652441,0.000691,0.648247,0.648409,0.648086,0.000228,0.652929,0.653418,0.652441,0.000564
18,"{'batch_size': 2048, 'clip_value': 1, 'drop_la...",0.911241,0.900845,0.904539,0.897151,0.005224,0.89672,0.89691,0.896531,0.000268,0.900845,0.904539,0.897151,0.004265
30,"{'batch_size': 4096, 'clip_value': 1, 'drop_la...",0.900811,0.886719,0.890499,0.88294,0.005345,0.884555,0.884826,0.884283,0.000384,0.886719,0.890499,0.88294,0.004364
42,"{'batch_size': 8192, 'clip_value': 1, 'drop_la...",0.889792,0.877497,0.88162,0.873375,0.00583,0.873819,0.874143,0.873496,0.000458,0.877497,0.88162,0.873375,0.00476
54,"{'batch_size': 8192, 'clip_value': 1, 'drop_la...",0.846501,0.82638,0.831962,0.820798,0.007894,0.825727,0.826464,0.824991,0.001041,0.82638,0.831962,0.820798,0.006445
2,"{'batch_size': 1024, 'clip_value': 1, 'drop_la...",0.923642,0.9174,0.920491,0.914308,0.004372,0.916278,0.917346,0.91521,0.00151,0.9174,0.920491,0.914308,0.00357
43,"{'batch_size': 2048, 'clip_value': 1, 'drop_la...",0.88463,0.866506,0.870379,0.862633,0.005477,0.864945,0.866428,0.863461,0.002098,0.866506,0.870379,0.862633,0.004472
12,"{'batch_size': 1024, 'clip_value': 1, 'drop_la...",0.912061,0.900836,0.902177,0.899496,0.001896,0.895642,0.897171,0.894114,0.002162,0.900836,0.902177,0.899496,0.001548
9,"{'batch_size': 2048, 'clip_value': 1, 'drop_la...",0.916487,0.909845,0.910799,0.908891,0.001349,0.906228,0.907861,0.904594,0.00231,0.909845,0.910799,0.908891,0.001102
4,"{'batch_size': 1024, 'clip_value': 1, 'drop_la...",0.921805,0.914663,0.916327,0.912998,0.002354,0.91325,0.914964,0.911536,0.002424,0.914663,0.916327,0.912998,0.001922


<IPython.core.display.Javascript object>