### TabNet: Attentive Interpretable Tabular Learning

https://github.com/dreamquark-ai/tabnet

This model uses sequential attention to learn which features to focus on. 

In [12]:
import pandas as pd
from pytorch_tabnet.tab_model import TabNetClassifier
from imblearn.under_sampling import InstanceHardnessThreshold
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import torch

In [15]:
df = pd.read_csv('data/cleaned_data_2.csv')

X = df.drop(['Show'], axis=1)
y = df['Show']

#### With IHT

In [16]:
iht = InstanceHardnessThreshold(random_state=42)
X_res, y_res = iht.fit_resample(X, y)

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)


clf = TabNetClassifier()

In [20]:
clf.fit(
    X_train=X_train.values,
    y_train=y_train.values,
    eval_set=[(X_train.values, y_train.values), (X_val.values, y_val.values)],
    eval_name=['train', 'valid'],
    eval_metric=['auc'],
    max_epochs=15,
    patience=5,
)

epoch 0  | loss: 0.5173  | train_auc: 0.63971 | valid_auc: 0.63669 |  0:00:03s
epoch 1  | loss: 0.42167 | train_auc: 0.57016 | valid_auc: 0.571   |  0:00:07s
epoch 2  | loss: 0.39116 | train_auc: 0.67736 | valid_auc: 0.67587 |  0:00:11s
epoch 3  | loss: 0.3587  | train_auc: 0.87041 | valid_auc: 0.86862 |  0:00:15s
epoch 4  | loss: 0.33307 | train_auc: 0.90827 | valid_auc: 0.90733 |  0:00:19s
epoch 5  | loss: 0.30851 | train_auc: 0.92379 | valid_auc: 0.92425 |  0:00:22s
epoch 6  | loss: 0.29686 | train_auc: 0.93167 | valid_auc: 0.9309  |  0:00:26s
epoch 7  | loss: 0.29046 | train_auc: 0.93486 | valid_auc: 0.93293 |  0:00:30s
epoch 8  | loss: 0.28829 | train_auc: 0.93652 | valid_auc: 0.93524 |  0:00:34s
epoch 9  | loss: 0.28406 | train_auc: 0.94068 | valid_auc: 0.93886 |  0:00:38s
epoch 10 | loss: 0.28286 | train_auc: 0.94194 | valid_auc: 0.94053 |  0:00:42s
epoch 11 | loss: 0.28308 | train_auc: 0.94203 | valid_auc: 0.93998 |  0:00:46s
epoch 12 | loss: 0.28442 | train_auc: 0.94285 | vali



In [21]:
y_pred = clf.predict(X_test.values)

print('Accuracy: ', accuracy_score(y_test, y_pred))

Accuracy:  0.9133771929824561


#### Without IHT

In [22]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)


clf = TabNetClassifier()



In [23]:
clf.fit(
    X_train=X_train.values,
    y_train=y_train.values,
    eval_set=[(X_train.values, y_train.values), (X_val.values, y_val.values)],
    eval_name=['train', 'valid'],
    eval_metric=['auc'],
    max_epochs=15,
    patience=5,
)

epoch 0  | loss: 0.49932 | train_auc: 0.59931 | valid_auc: 0.60038 |  0:00:09s
epoch 1  | loss: 0.47691 | train_auc: 0.60579 | valid_auc: 0.60744 |  0:00:18s
epoch 2  | loss: 0.47471 | train_auc: 0.67692 | valid_auc: 0.6693  |  0:00:27s
epoch 3  | loss: 0.47189 | train_auc: 0.68533 | valid_auc: 0.67771 |  0:00:36s
epoch 4  | loss: 0.47057 | train_auc: 0.69023 | valid_auc: 0.67912 |  0:00:45s
epoch 5  | loss: 0.46648 | train_auc: 0.69921 | valid_auc: 0.68879 |  0:00:54s
epoch 6  | loss: 0.46381 | train_auc: 0.70142 | valid_auc: 0.69421 |  0:01:03s
epoch 7  | loss: 0.46202 | train_auc: 0.70299 | valid_auc: 0.69509 |  0:01:13s
epoch 8  | loss: 0.45842 | train_auc: 0.70937 | valid_auc: 0.69884 |  0:01:22s
epoch 9  | loss: 0.4583  | train_auc: 0.70923 | valid_auc: 0.69973 |  0:01:31s
epoch 10 | loss: 0.45692 | train_auc: 0.71352 | valid_auc: 0.70347 |  0:01:40s
epoch 11 | loss: 0.45793 | train_auc: 0.71085 | valid_auc: 0.7009  |  0:01:49s
epoch 12 | loss: 0.459   | train_auc: 0.70764 | vali



In [25]:
y_pred = clf.predict(X_test.values)

print('Accuracy: ', accuracy_score(y_test, y_pred))

Accuracy:  0.7971499660710246
