![](https://storage.googleapis.com/kaggle-competitions/kaggle/28009/logos/header.png?)

In [None]:
!pip install pytorch_tabnet 

# Import

In [None]:
import sys
sys.path.append("../input/rank-gauss")

from pytorch_tabnet.tab_model import TabNetClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import roc_auc_score
import pandas as pd
import numpy as np
np.random.seed(0)
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import QuantileTransformer,  KBinsDiscretizer
from sklearn.impute import SimpleImputer
from gauss_rank_scaler import GaussRankScaler

import torch

from sklearn.decomposition import PCA

import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline

%matplotlib inline

# Preprocessing

In [None]:
%%time
train = pd.read_csv('../input/tabular-playground-series-sep-2021/train.csv')
test = pd.read_csv('../input/tabular-playground-series-sep-2021/test.csv')

train['n_missing'] = train.isna().sum(axis=1)
test['n_missing'] = test.isna().sum(axis=1)
features = [col for col in train.columns if col not in ['claim', 'id']]

train['claim'] = train['claim'].astype(str)
train[features] = train[features].fillna(train[features].mean())
test[features]  = test[features].fillna(train[features].mean())

In [None]:
%%time
scaler = QuantileTransformer(n_quantiles=300, output_distribution='normal')
X = scaler.fit_transform(train[features])
X_test = scaler.transform(test[features])
y = train.claim.values
del train , test

# Training

In [None]:
tabnet_params = dict(n_steps = 1,
                   n_d = 16,
                   n_a = 16,
                   seed = 42,
                   optimizer_fn=torch.optim.Adam,
                   optimizer_params=dict(lr=2e-2, weight_decay = 5e-5),
                   scheduler_params = dict(mode = "min", patience = 1, min_lr = 1e-5, factor = 0.6),
                   scheduler_fn=torch.optim.lr_scheduler.ReduceLROnPlateau,
                   mask_type='entmax',
                   verbose = 5)

In [None]:
from sklearn.model_selection import KFold
import torch

kf = KFold(n_splits=5, random_state = 42, shuffle = True)
preds = np.zeros((X_test.shape[0],))
for  fold , (train_index, test_index) in enumerate(kf.split(X)):
    print(20*"*")
    print("Fold {}:".format(fold))
    X_train, X_valid = X[train_index], X[test_index]
    y_train, y_valid = y[train_index], y[test_index]

    clf = TabNetClassifier(**tabnet_params)
    clf.fit(
        X_train=X_train, y_train=y_train,
        eval_set=[(X_train, y_train), (X_valid, y_valid)],
        eval_name=['train', 'valid'],
        eval_metric=['auc'],
        max_epochs= 20, patience=5,
        batch_size=1024*10, virtual_batch_size=128*10,
        num_workers=0,
        weights=1,
        drop_last=False
    ) 
    preds += clf.predict_proba(X_test)[:,1]/5


In [None]:
sub = pd.read_csv("../input/tabular-playground-series-sep-2021/sample_solution.csv")
sub.iloc[:,1]= preds
sub=sub.set_index('id')
sub.to_csv('submission.csv')
