In [None]:
import os
import gc
import pandas as pd
from sklearn.model_selection import train_test_split

from pytorch_tabnet.tab_model import TabNetClassifier
import torch

from sklearn.impute import SimpleImputer

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
DATA_DIRECTORY = ""

In [None]:
train = pd.read_csv(os.path.join(DATA_DIRECTORY, 'train.csv'))
test = pd.read_csv(os.path.join(DATA_DIRECTORY, 'test.csv'))
labels = pd.read_csv(os.path.join(DATA_DIRECTORY, 'labels.csv'))

In [None]:
test_id = test['SK_ID_CURR']

In [None]:
imputer = SimpleImputer(strategy = 'median')
imputer.fit(train)
train = imputer.transform(train)
test = imputer.transform(test)

In [None]:
target = labels.to_numpy()
del labels
gc.collect()

In [None]:
x_train, x_val, y_train, y_val = train_test_split(train, target, test_size=0.30, random_state=8)

In [None]:
# https://towardsdatascience.com/tabnet-deep-neural-network-for-structured-tabular-data-39eb4b27a9e4
tabnet_clf = TabNetClassifier(optimizer_fn=torch.optim.Adam,
                       optimizer_params=dict(lr=2e-2),
                       scheduler_params={"step_size":10, # how to use learning rate scheduler
                                         "gamma":0.9},
                       scheduler_fn=torch.optim.lr_scheduler.StepLR,
                       mask_type='entmax' # "sparsemax"
                      ) 

In [None]:
tabnet_clf.fit(
    x_train,y_train[:,0],
    eval_set=[(x_train, y_train[:,0]), (x_val, y_val[:,0])],
    eval_name=['train', 'valid'],
    eval_metric=['auc','accuracy'],
    max_epochs=1000 , patience=50,
    batch_size=256, virtual_batch_size=128,
    num_workers=0,
    weights=1,
    drop_last=False
)    

In [None]:
predictions = tabnet_clf.predict_proba(test)[:, 1]
submission = pd.DataFrame({'SK_ID_CURR': test_id.values, 'TARGET': predictions})
submission.to_csv('tabnet.csv', index = False)