# TabNet for Tabular data
TabNet is a Transformer-based Deep Learning model which is developed by Google researchers. A PyTorch implementation that is sklearn-friendly can be found here: https://github.com/dreamquark-ai/tabnet

Here is the original paper: https://arxiv.org/pdf/1908.07442.pdf

# Install TabNet and load data

In [None]:
%%capture
try:
    from pytorch_tabnet.tab_model import TabNetRegressor
except:
    !pip install ../input/officialpytorchtabnet/pytorch_tabnet-3.0.0-py3-none-any.whl

In [None]:
import pandas as pd
import numpy as np
from pytorch_tabnet.tab_model import TabNetRegressor
import torch
from torch import nn
import torch.optim as optim
from torch.nn import functional as F
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.model_selection import KFold

train = pd.read_csv("../input/tabular-playground-series-jan-2021/train.csv")
test = pd.read_csv("../input/tabular-playground-series-jan-2021/test.csv")
sub = pd.read_csv("../input/tabular-playground-series-jan-2021/sample_submission.csv")
train

# Drop the ID column

In [None]:
train = train.drop(["id"], axis=1)
features = [c for c in train.columns if "cont" in c]
test = test.drop("id", axis=1)
test

# Feature Engineering

In [None]:
fe = dict(
    rankgauss = False,
    stats = True,
    gaussmix = True,
    pca = True,
    tsne = True,
    umap = True,
    drop_original = True,
)

In [None]:
all_data = pd.concat([train, test], axis=0, ignore_index=True)
targets = all_data.target[:300000]
all_data = all_data.drop("target", axis=1)
COLS = [c for c in all_data.columns if "cont" in c]
all_data

In [None]:
import tqdm

if fe["stats"]:
    for stats in tqdm.tqdm(["sum", "mean", "std", "kurt", "skew"]):
        all_data["cont_" + stats] = getattr(all_data[COLS], stats)(axis = 1)
        
all_data

In [None]:
import sys
sys.path.append("../input/rank-gauss")
from gauss_rank_scaler import GaussRankScaler

if fe["rankgauss"]:
    scaler = GaussRankScaler()
    rankgauss_feat = scaler.fit_transform(all_data[COLS])
    rankgauss_df = pd.DataFrame(rankgauss_feat, columns=[f"rankgauss_{i}" for i in range(rankgauss_feat.shape[1])])
    all_data = pd.concat([all_data, rankgauss_df], axis=1)
all_data

In [None]:
from sklearn.mixture import GaussianMixture

if fe["gaussmix"]:
    def get_gmm_class_feature(feat, n):
        gmm = GaussianMixture(n_components=n, random_state=42)

        gmm.fit(all_data[feat].values.reshape(-1, 1))

        all_data[f'{feat}_class'] = gmm.predict(all_data[feat].values.reshape(-1, 1))

    get_gmm_class_feature('cont1', 4)
    get_gmm_class_feature('cont2', 10)
    get_gmm_class_feature('cont3', 6)
    get_gmm_class_feature('cont4', 4)
    get_gmm_class_feature('cont5', 3)
    get_gmm_class_feature('cont6', 2)
    get_gmm_class_feature('cont7', 3)
    get_gmm_class_feature('cont8', 4)
    get_gmm_class_feature('cont9', 4)
    get_gmm_class_feature('cont10', 8)
    get_gmm_class_feature('cont11', 5)
    get_gmm_class_feature('cont12', 4)
    get_gmm_class_feature('cont13', 6)
    get_gmm_class_feature('cont14', 6)
    CLASS_COLS = [c for c in all_data.columns if "_class" in c]
    CLASS_COLS_IDX = []
    for c in CLASS_COLS:
        CLASS_COLS_IDX.append(all_data.columns.get_loc(c))
    assert len(CLASS_COLS) > 0
all_data

In [None]:
from sklearn.decomposition import PCA

if fe["pca"]:
    pca = PCA(n_components = 0.8, random_state = 42).fit(all_data[COLS])
    pca_feat = pca.transform(all_data[COLS])
    pca_df = pd.DataFrame(pca_feat, columns = [f"pca_cont{i}" for i in range(pca.n_components_)])
    all_data = pd.concat([all_data, pca_df], axis=1)
    PCA_COLS = [c for c in all_data.columns if "pca" in c]
    assert len(PCA_COLS) > 0

all_data

In [None]:
from cuml import TSNE

if fe["tsne"]:
    tsne_components = 2
    
    perplexity = [10, 20, 30, 40, 50]
    for per in perplexity:
        tsne = TSNE(n_components = tsne_components, perplexity = per, n_neighbors = 3.01 * per)
        tsne_feat = tsne.fit_transform(all_data[COLS])
        tsne_df = pd.DataFrame(tsne_feat, columns=[f"tsne_{per}_{i}" for i in range(tsne_components)])
        all_data = pd.concat([all_data, tsne_df], axis = 1)
    TSNE_COLS = [c for c in all_data.columns if "tsne" in c]
all_data

In [None]:
from cuml import UMAP

if fe["umap"]:
    umap_components = 10
    umap = UMAP(n_components = umap_components)
    umap_feat = umap.fit_transform(all_data[COLS])
    umap_df = pd.DataFrame(umap_feat, columns=[f"umap{i}" for i in range(umap_components)])
    all_data = pd.concat([all_data, umap_df], axis=1)
    UMAP_COLS = [c for c in all_data.columns if "umap" in c]
    assert len(UMAP_COLS) > 0
all_data

In [None]:
if fe["drop_original"]:
    all_data = all_data.drop(COLS, axis=1)

In [None]:
train = all_data[:300000]
test = all_data[300000:]
features = list(all_data.columns)

# Train the model
We will not use any Feature Engineering technique to test the model's power!

In [None]:
all_data

In [None]:
MAX_EPOCHS = 200
BATCH_SIZE = 512
VIRTUAL_BS = 32
SEED = 421789
N_SPLITS = 5
tabnet_params = dict(
    n_d = 16,
    n_a = 16,
    n_steps = 3,
    gamma = 1.2,
    lambda_sparse = 1e-5,
    optimizer_fn = optim.RMSprop,
    optimizer_params = dict(lr = 2e-2, weight_decay=1e-5),
    mask_type = "entmax",
    scheduler_params = dict(
        mode = "min", patience = 5, min_lr = 1e-5, factor = 0.9),
    scheduler_fn = ReduceLROnPlateau,
    seed = SEED,
    verbose = 1,
    cat_idxs = CLASS_COLS_IDX if fe["gaussmix"] else None
)


predictions = np.zeros((N_SPLITS, len(test), 1))
for fold, (tr_idx, val_idx) in enumerate(KFold(n_splits=N_SPLITS, shuffle=True).split(train, targets)):
    print(f"FOLD: {fold}")
    X_tr, y_tr = train.loc[tr_idx, features].values, targets[tr_idx].values.reshape(-1, 1)
    X_val, y_val = train.loc[val_idx, features].values, targets[val_idx].values.reshape(-1, 1)
    
    model = TabNetRegressor(**tabnet_params)
    model.fit(
        X_train = X_tr,
        y_train = y_tr,
        eval_set = [(X_val, y_val)],
        eval_metric = ["rmse"],
        max_epochs = MAX_EPOCHS,
        batch_size = BATCH_SIZE,
        virtual_batch_size = VIRTUAL_BS,
        num_workers = 1,
        drop_last=False,
        patience = 20
    )
    predictions[fold] = model.predict(test.values)
    

# Get mean of predictions and submit!

In [None]:
sub["target"] = predictions.mean(axis=0)
sub

In [None]:
sub.to_csv("submission.csv", index=False)