In [None]:
import os
import gc
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

from pytorch_tabnet.tab_model import TabNetClassifier
import torch

from sklearn.impute import SimpleImputer

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
DATA_DIRECTORY = ""

In [None]:
train = pd.read_csv(os.path.join(DATA_DIRECTORY, 'train.csv'))
test = pd.read_csv(os.path.join(DATA_DIRECTORY, 'test.csv'))
labels = pd.read_csv(os.path.join(DATA_DIRECTORY, 'labels.csv'))

In [None]:
test_id = test['SK_ID_CURR']

In [None]:
imputer = SimpleImputer(strategy = 'median')
imputer.fit(train)
train = imputer.transform(train)
test = imputer.transform(test)

In [None]:
target = labels.to_numpy()
del labels
gc.collect()

In [None]:
x_train, x_val, y_train, y_val = train_test_split(train, target, test_size=0.30, random_state=8)

In [None]:
def accuracy_score(y_true, y_pred):
    y_pred = np.concatenate(tuple(y_pred))
    y_true = np.concatenate(tuple([t for t in y] for y in y_true)).reshape(
        y_pred.shape
    )

    return (y_true == y_pred).sum() / float(len(y_true))

In [None]:
from sklearn.model_selection import ParameterGrid
from sklearn.metrics import roc_auc_score

# Function that instantiates a tabnet model.
def create_tabnet(n_d=32, n_steps=5, lr=0.02, gamma=1.5, 
                  n_independent=2, n_shared=2, lambda_sparse=1e-4, 
                  momentum=0.3, clip_value=2.):
    
    return TabNetClassifier(
        n_d=n_d, n_a=n_d, n_steps=n_steps,
        gamma=gamma, n_independent=n_independent, n_shared=n_shared,
        lambda_sparse=lambda_sparse, momentum=momentum, clip_value=clip_value,
        optimizer_fn=torch.optim.Adam,
        scheduler_params = {"gamma": 0.95,
                         "step_size": 20},
        scheduler_fn=torch.optim.lr_scheduler.StepLR, epsilon=1e-15, verbose = 0
    )
                  
# Generate the parameter grid.
param_grid = dict(n_d = [8, 16, 32, 64],
                  n_steps = [3, 4, 5],
                  gamma = [1, 1.5, 2],
                  lambda_sparse = [1e-2, 1e-3, 1e-4],
                  momentum = [0.3, 0.4, 0.5],
                  n_shared = [2],
                  n_independent = [2],
                  clip_value = [2.],     
)

grid = ParameterGrid(param_grid)

search_results = pd.DataFrame() 
for params in grid:
    params['n_a'] = params['n_d'] # n_a=n_d always per the paper
    tabnet = create_tabnet()
    tabnet.set_params(**params)
    tabnet.fit(
        x_train,y_train[:,0],
        eval_set=[(x_train, y_train[:,0]), (x_val, y_val[:,0])],
        eval_name=['train', 'valid'],
        eval_metric=['auc'],
        max_epochs=1000 , patience=50,
        batch_size=256, virtual_batch_size=128,
        num_workers=0,
        weights=1,
        drop_last=False
    )  

    y_prob = tabnet.predict_proba(x_val)
    auc = roc_auc_score(y_val, y_prob[:, 1])
    score = max(2*auc - 1, 0.)
    # score = accuracy_score(y_val[:, 0], y_prob[: 1])
    
    results = pd.DataFrame([params])
    results['score'] = np.round(score, 3)
    search_results = search_results.append(results)


In [None]:
search_results.to_csv(os.path.join(DATA_DIRECTORY, 'search_results.csv'), index=False)