# how to use tabnet
- データセットはGBDTと同じように準備
- 欠損値は受け入れないので、補完が必要
- custom metricを調整して各タスクに最適な目的関数を定める
- modelのparametersを設定する
- 特徴量の重要度も確認できる

- https://github.com/dreamquark-ai/tabnet

In [1]:
!pip install pytorch-tabnet

Collecting pytorch-tabnet
  Downloading pytorch_tabnet-3.0.0-py3-none-any.whl (38 kB)
Installing collected packages: pytorch-tabnet
Successfully installed pytorch-tabnet-3.0.0
You should consider upgrading via the '/opt/conda/bin/python3.7 -m pip install --upgrade pip' command.[0m


In [2]:
import os
import numpy as np 
import pandas as pd 

import torch
import random
from pytorch_tabnet.metrics import Metric
from pytorch_tabnet.tab_model import TabNetRegressor, TabNetClassifier

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import log_loss, accuracy_score

In [3]:
train = pd.read_csv("/kaggle/input/titanic/train.csv")
test = pd.read_csv("/kaggle/input/titanic/test.csv")
sub = pd.read_csv("/kaggle/input/titanic/gender_submission.csv")

In [4]:
train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [5]:
target = "Survived"
feature = ["Pclass", "Age", "SibSp", "Parch", "Fare"]
X_train = train[feature]
y_train = train[target]
X_train = X_train.fillna(-1) 

X_test = test[feature]
X_test = X_test.fillna(-1)

In [6]:
fn_train = X_train.to_numpy()
fn_targets = y_train.to_numpy().reshape(-1,1)
fn_test = X_test.to_numpy()

# custom metric

In [7]:
class LogitsLogLoss(Metric):
    def __init__(self):
        self._name = "logits_ll"
        self._maximize = False

    def __call__(self, y_true, y_pred):

        logits = 1 / (1 + np.exp(-y_pred))
        
        aux = (1-y_true)*np.log(1-logits+1e-15) + y_true*np.log(logits+1e-15)
        return np.mean(-aux)
    
class Accuracy(Metric):
    def __init__(self):
        self._name = "accuracy_ll"
        self._maximize = True

    def __call__(self, y_true, y_pred):

        y_pred[y_pred >= 0.5 ] = 1
        y_pred[y_pred < 0.5 ] = 0
        return accuracy_score(y_true, y_pred)

In [8]:
MAX_EPOCH=200
device = "cuda" if torch.cuda.is_available() else "cpu"

def seed_everything(seed_value):
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    os.environ['PYTHONHASHSEED'] = str(seed_value)
    
    if torch.cuda.is_available(): 
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

def modelling_tabnet(tr, target, te, sample_seed):
    seed_everything(sample_seed) 
    tabnet_params = dict(n_d=12, n_a=12, n_steps=1, gamma=1.3, seed = sample_seed,
                     lambda_sparse=0, optimizer_fn=torch.optim.Adam,
                     optimizer_params=dict(lr=2e-2, weight_decay=1e-5),
                     mask_type='entmax',
                     scheduler_params=dict(mode="min",
                                           patience=5,
                                           min_lr=1e-5,
                                           factor=0.9,),
                     scheduler_fn=torch.optim.lr_scheduler.ReduceLROnPlateau,
                     verbose=10,
                     )
    test_cv_preds = []

    oof_preds = np.zeros([len(tr),1])
    scores = []
    NB_SPLITS = 5
    mskf = StratifiedKFold(n_splits=NB_SPLITS, random_state=0, shuffle=True)
    for fold_nb, (train_idx, val_idx) in enumerate(mskf.split(train, target)):
        print("FOLDS : ", fold_nb+1)

        ## model
        X_train, y_train = tr[train_idx, :], target[train_idx]
        X_val, y_val = tr[val_idx, :], target[val_idx]
        model = TabNetRegressor(**tabnet_params)
    
        model.fit(X_train=X_train,
              y_train=y_train,
              eval_set=[(X_val, y_val)],
              eval_name = ["val"],
              eval_metric = ["logits_ll"],
              max_epochs=MAX_EPOCH,
              patience=20, batch_size=1024, virtual_batch_size=128,
              num_workers=1, drop_last=False,
              # use binary cross entropy as this is not a regression problem
              loss_fn=torch.nn.functional.binary_cross_entropy_with_logits)
    
        preds_val = model.predict(X_val)
        # Apply sigmoid to the predictions
        preds =  1 / (1 + np.exp(-preds_val))
        score = np.min(model.history["val_logits_ll"])
        oof_preds[val_idx] = preds
        scores.append(score)

        # preds on test
        preds_test = model.predict(te)
        test_cv_preds.append(1 / (1 + np.exp(-preds_test)))
        
    test_preds_all = np.stack(test_cv_preds)
    print("OOF log loss:", log_loss(np.ravel(target), np.ravel(np.array(oof_preds))))
    mod_oof_preds = oof_preds.copy()
    mod_oof_preds[mod_oof_preds >= 0.5 ] = 1
    mod_oof_preds[mod_oof_preds < 0.5 ] = 0
    print("OOF Accurcy:", accuracy_score(np.ravel(target), np.ravel(np.array(mod_oof_preds))))
    
    print("Feature Importance")
    feat_importances = model.feature_importances_
    indices = np.argsort(feat_importances)
    for i in indices:
        print(feature[i], feat_importances[i])
    
    return oof_preds, test_preds_all

target_oof = np.zeros([len(fn_train),1])
target_pred = np.zeros([len(fn_test),1])

seeds = [0]
for seed_ in seeds:
    oof_preds, test_preds_all = modelling_tabnet(fn_train, fn_targets, fn_test, seed_)
    target_oof += oof_preds / len(seeds)
    target_pred += test_preds_all.mean(axis=0) / len(seeds)

FOLDS :  1
Device used : cpu
epoch 0  | loss: 0.79825 | val_logits_ll: 3.89767 |  0:00:00s
epoch 10 | loss: 0.57874 | val_logits_ll: 0.95585 |  0:00:01s
epoch 20 | loss: 0.5617  | val_logits_ll: 1.47337 |  0:00:02s
epoch 30 | loss: 0.53704 | val_logits_ll: 1.2653  |  0:00:04s

Early stopping occured at epoch 33 with best_epoch = 13 and best_val_logits_ll = 0.68921
Best weights from best epoch are automatically used!
FOLDS :  2
Device used : cpu
epoch 0  | loss: 0.79041 | val_logits_ll: 4.67615 |  0:00:00s
epoch 10 | loss: 0.56846 | val_logits_ll: 0.79109 |  0:00:01s
epoch 20 | loss: 0.54085 | val_logits_ll: 1.02012 |  0:00:02s
epoch 30 | loss: 0.52202 | val_logits_ll: 1.20451 |  0:00:03s

Early stopping occured at epoch 31 with best_epoch = 11 and best_val_logits_ll = 0.75348
Best weights from best epoch are automatically used!
FOLDS :  3
Device used : cpu
epoch 0  | loss: 0.78053 | val_logits_ll: 6.50363 |  0:00:00s
epoch 10 | loss: 0.55581 | val_logits_ll: 0.88196 |  0:00:01s
epoch 2

In [9]:
sub[target] = target_pred
sub.to_csv("submission.csv", index=False)