# Tabnet: Attentive Tabular Learning

In [1]:
import pandas as pd                                        # pandas dataframes
import numpy  as np                                        # numpy arrays
import torch                                               # Pytorch: neural network backend
from pytorch_tabnet.tab_model import TabNetClassifier      # Tabnet: neural network models for tabular learning
from pytorch_tabnet.pretraining import TabNetPretrainer    # semisupervised pre-training
from sklearn.model_selection import KFold                  # k-fold cross-validation
from sklearn.metrics import accuracy_score                 # classification accuracy
from sklearn.metrics import cohen_kappa_score              # loss function appropriate for imbalanced classes

In [2]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data"

# wisconsin diagnostic breast cancer data set
wdbc = pd.read_csv(url, header=None, usecols=range(1, 32)) # remove zeroth column (irrelevant index feature)
label = list(wdbc.columns)[0]                              # column name of the class label
features = list(wdbc.columns)[1:]                          # column names of the features
y = pd.factorize(wdbc[label])[0]                           # y-vector (with target encoding)
X = wdbc[features]                                         # design matrix

In [3]:
X = X.to_numpy()     # encode for PyTorch
y = torch.tensor(y)  # encode for PyTorch

In [4]:
X                    # preview

array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
        1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
        8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
        8.758e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]])

In [5]:
y[0:40] # preview

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0])

In [6]:
kf = KFold(n_splits=5, random_state=42, shuffle=True)  # define k-fold cross-validation plan

log_loss_array = []  # initialize list of out-of-fold prediction log loss
kappa_array    = []  # initialize list of out-of-fold prediction kappa
accuracy_array = []  # initialize list of out-of-fold prediction accuracy

for train_index, test_index in kf.split(X):
    
    X_train, X_valid = X[train_index], X[test_index]   # X variables
    y_train, y_valid = y[train_index], y[test_index]   # y vector
    
    # feature scaling
    X_train = (X_train - np.min(X_train))/np.ptp(X_train)
    X_valid = (X_valid - np.min(X_valid))/np.ptp(X_valid)

    # semisupervised pre-training
    unsupervised_model = TabNetPretrainer(
        verbose=0, seed=42,
        optimizer_fn=torch.optim.Adam,
        optimizer_params=dict(lr=2e-2),
        mask_type='entmax' # "sparsemax"
        )

    # obtain pre-trained model weights
    unsupervised_model.fit(
        X_train=X_train,
        eval_set=[X_valid],
        pretraining_ratio=0.8,
        )
    
    # define the model
    classifier = TabNetClassifier(
                    verbose=0, seed=42,
                    optimizer_fn=torch.optim.Adam,     # Adam optimizer
                    optimizer_params=dict(lr=2e-2)     # default learning rate
                    )
    
    classifier.fit(X_train=X_train, y_train=y_train,   # in-sample data
               eval_set=[(X_valid, y_valid)],          # out-of-fold data
               patience=100,                           # stopping threshhold if no improvement
               max_epochs=2000,                        # maximum training epochs
               eval_metric=["logloss"],                # loss function to optimize
               from_unsupervised=unsupervised_model)   # load pre-trained model weights      

    log_loss_array.append(classifier.best_cost)
    kappa_array.append(cohen_kappa_score(y1=y_valid, y2=classifier.predict(X_valid)))
    accuracy_array.append(accuracy_score(y_true=y_valid, y_pred=classifier.predict(X_valid)))
    
    print("True class labels:", y_valid[0:10])
    print("Predicted class labels:", classifier.predict(X_valid[0:10]))
    print("Kappa:", cohen_kappa_score(y1=y_valid, y2=classifier.predict(X_valid)))
    print("Accuracy:", accuracy_score(y_true=y_valid, y_pred=classifier.predict(X_valid)))

    # ignore UserWarning: CUDA initialization: Found no NVIDIA driver on your system


Early stopping occured at epoch 20 with best_epoch = 10 and best_val_0_unsup_loss = 4.52838
Best weights from best epoch are automatically used!
Loading weights from unsupervised pretraining

Early stopping occured at epoch 268 with best_epoch = 168 and best_val_0_logloss = 0.09929
Best weights from best epoch are automatically used!
True class labels: tensor([0, 0, 0, 0, 0, 0, 0, 0, 1, 0])
Predicted class labels: [0 0 1 0 0 0 0 1 1 0]
Kappa: 0.9432082364662903
Accuracy: 0.9736842105263158

Early stopping occured at epoch 17 with best_epoch = 7 and best_val_0_unsup_loss = 5.16916
Best weights from best epoch are automatically used!
Loading weights from unsupervised pretraining

Early stopping occured at epoch 214 with best_epoch = 114 and best_val_0_logloss = 0.11177
Best weights from best epoch are automatically used!
True class labels: tensor([0, 0, 0, 0, 0, 1, 0, 0, 0, 0])
Predicted class labels: [0 0 0 0 0 1 0 0 0 0]
Kappa: 0.9395546129374337
Accuracy: 0.9736842105263158

Early st

In [7]:
log_loss_array

[0.09929120780087256,
 0.11177492534523226,
 0.23848992636418156,
 0.24085603396505145,
 0.21835174915591915]

In [8]:
kappa_array

[0.9432082364662903,
 0.9395546129374337,
 0.7898089171974523,
 0.7629116117850954,
 0.8141447368421053]

In [9]:
accuracy_array

[0.9736842105263158,
 0.9736842105263158,
 0.9035087719298246,
 0.8947368421052632,
 0.911504424778761]

In [10]:
sum(kappa_array) / len(kappa_array)

0.8499256230456753

In [11]:
sum(accuracy_array) / len(accuracy_array)

0.9314236919732961