In [None]:
from lightgbm import LGBMClassifier 
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from tqdm.notebook import tqdm
plt.style.use('ggplot')

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
train_df = pd.read_csv('/kaggle/input/tabular-playground-series-nov-2021/train.csv')
test_df = pd.read_csv('/kaggle/input/tabular-playground-series-nov-2021/test.csv')
sub_df = pd.read_csv('/kaggle/input/tabular-playground-series-nov-2021/sample_submission.csv')
sub_df.head()

# 1 quick view

In [None]:
display(train_df.head())
train_df.shape, train_df.columns

In [None]:
train_df.target.value_counts().plot.pie()
plt.show()

# 2- feature generator

In [None]:
def get_cross_feature(train_df, test_df):
    # Feature cross 
    # f5: p_:0.07643
    # f36: p_:0.08091
    # f47: p_:0.04571
    cross_a = ['f5', 'f36', 'f47']
    cross_top = ['f34', 'f55', 'f43']
    for ct in tqdm(cross_top):
        for ca in cross_a:
            train_df[f'{ct}_{ca}'] = train_df[ct] *  train_df[ca] 
            test_df[f'{ct}_{ca}'] = test_df[ct] *  test_df[ca] 

    print('Cross feature Done')
    return train_df, test_df

train_df, test_df = get_cross_feature(train_df, test_df)
drop2 = ['f47_freq', 'f55_freq', 'f5_freq', 'f43_freq', 'f34_freq',
       'f43_f47', 'f34_f36', 'f34_f47', 'f43_f5', 'f43_f36']

train_cols = [i for i in train_df if  i not in ['id', 'target'] + drop2]
print('Reflashed train_cols', len(train_cols))

# 3 Restnet
- inference: https://openreview.net/forum?id=i_Q1yrOegLY
- inference: https://github.com/yandex-research/rtdl/tree/main/bin    


**Revisiting Deep Learning Models for Tabular Data**  
Yury Gorishniy, Ivan Rubachev, Valentin Khrulkov, Artem Babenko  
22 May 2021 (modified: 09 Nov 2021)NeurIPS 2021 PosterReaders:  EveryoneShow Bibtex  
Keywords: tabular data, architecture, DNN  
TL;DR: Compared many DL models for tabular data, identified a strong baseline (ResNet) and proposed a powerful Transformer-based model.
Abstract: The existing literature on deep learning for tabular data proposes a wide range of novel architectures and reports competitive results on various datasets. However, the proposed models are usually not properly compared to each other and existing works often use different benchmarks and experiment protocols. As a result, it is unclear for both researchers and practitioners what models perform best. Additionally, the field still lacks effective baselines, that is, the easy-to-use models that provide competitive performance across different problems.  

In this work, we perform an overview of the main families of DL architectures for tabular data and raise the bar of baselines in tabular DL by identifying two simple and powerful deep architectures. The first one is a ResNet-like architecture which turns out to be a strong baseline that is often missing in prior works. The second model is our simple adaptation of the Transformer architecture for tabular data, which outperforms other solutions on most tasks. Both models are compared to many existing architectures on a diverse set of tasks under the same training and tuning protocols. We also compare the best DL models with Gradient Boosted Decision Trees and conclude that there is still no universally superior solution. The source code is available at https://github.com/yandex-research/rtdl.  

Code Of Conduct: I certify that all co-authors of this work have read and commit to adhering to the NeurIPS Statement on Ethics, Fairness, Inclusivity, and Code of Conduct.  
Code: https://github.com/yandex-research/rtdl

In [None]:
import math
import typing as ty
import numpy as np
import torch as t
import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor

def reglu(x: Tensor) -> Tensor:
    a, b = x.chunk(2, dim=-1)
    return a * F.relu(b)


def geglu(x: Tensor) -> Tensor:
    a, b = x.chunk(2, dim=-1)
    return a * F.gelu(b)


def get_activation_fn(name: str) -> ty.Callable[[Tensor], Tensor]:
    return (
        reglu
        if name == 'reglu'
        else geglu
        if name == 'geglu'
        else t.sigmoid
        if name == 'sigmoid'
        else getattr(F, name)
    )


class swish(nn.Module):
    def __init__(self):
        super(swish, self).__init__()
 
    def forward(self, x):
        x = x * t.sigmoid(x)
        return x

def get_nonglu_activation_fn(name: str) -> ty.Callable[[Tensor], Tensor]:
    return (
        F.relu
        if name == 'reglu'
        else F.gelu
        if name == 'geglu'
        else get_activation_fn(name)
    )


class ResNet(nn.Module):
    def __init__(
        self,
        *,
        d_numerical: int,
        d: int,
        d_hidden_factor: float,
        n_layers: int,
        activation: str,
        normalization: str,
        hidden_dropout: float,
        residual_dropout: float,
        d_out: int,
    ) -> None:
        super().__init__()

        def make_normalization():
            return {'batchnorm': nn.BatchNorm1d, 'layernorm': nn.LayerNorm}[
                normalization
            ](d)
        if activation == 'swish':
            swish_act = swish()
            self.main_activation = swish_act 
            self.last_activation = t.sigmoid 
        else:
            self.main_activation = get_activation_fn(activation)
            self.last_activation = get_nonglu_activation_fn(activation)
        
        self.residual_dropout = residual_dropout
        self.hidden_dropout = hidden_dropout

        d_in = d_numerical
        d_hidden = int(d * d_hidden_factor)

        self.first_layer = nn.Linear(d_in, d)
        self.layers = nn.ModuleList(
            [
                nn.ModuleDict(
                    {
                        'norm': make_normalization(),
                        'linear0': nn.Linear(
                            d, d_hidden * (2 if activation.endswith('glu') else 1)
                        ),
                        'linear1': nn.Linear(d_hidden, d),
                    }
                )
                for _ in range(n_layers)
            ]
        )
        self.last_normalization = make_normalization()
        self.head = nn.Linear(d, d_out)

    def forward(self, x_num: Tensor) -> Tensor:
        x = []
        if x_num is not None:
            x.append(x_num)
        x = t.cat(x, dim=-1)
        x = self.first_layer(x)
        for layer in self.layers:
            layer = ty.cast(ty.Dict[str, nn.Module], layer)
            z = x
            z = layer['norm'](z)
            z = layer['linear0'](z)
            z = self.main_activation(z)
            if self.hidden_dropout:
                z = F.dropout(z, self.hidden_dropout, self.training)
            z = layer['linear1'](z)
            if self.residual_dropout:
                z = F.dropout(z, self.residual_dropout, self.training)
            x = x + z
        x = self.last_normalization(x)
        x = self.last_activation(x)
        x = self.head(x)
        x = x.squeeze(-1)
        return x

## 3.1 Train & eval & predict function 

In [None]:
from datetime import datetime
def train(model, data_loader, epochs, lr=0.001, cuda_flag=False):
    model.train()
    cost_func = F.binary_cross_entropy_with_logits
    if cuda_flag:
        model = model.to(t.device('cuda'))
#     opt = t.optim.Adam(model.parameters(), lr=lr)
    opt = t.optim.AdamW(model.parameters(), lr=lr)
    total_st = datetime.now()
    acc_his = []
    for ep_i in range(epochs):
        ep_st = datetime.now()
        print('--'*20, f'[epoch {ep_i}]', '--'*20)
        print(f'Start {ep_st}: .....')
        ep_true_cnt = 0
        ep_total_cnt = 0
        for idx, (tmp_x, tmp_y) in enumerate(data_loader):
            if cuda_flag:
                tmp_x = tmp_x.to(t.device('cuda')).float()
                tmp_y = tmp_y.to(t.device('cuda'))
            opt.zero_grad()
            p = model(tmp_x)
            loss = cost_func(p, tmp_y)
            loss.backward()
            opt.step()
            if idx % 200 == 0:
                pr_loss = loss.item()
                idx_p = str(idx).zfill(3)
                ture_cnt = t.sum((p > 0.5) == tmp_y)
                ep_true_cnt += ture_cnt
                ep_total_cnt += tmp_y.size()[0]
                auc_ = ture_cnt / tmp_y.size()[0]
                print(f'[{ep_i}- {idx_p}] loss: {pr_loss:.5f} auc:{auc_:.5f}')
        ep_cost = datetime.now() - ep_st
        auc_ = ep_true_cnt / ep_total_cnt
        acc_his.append(auc_)
        if auc_ > 0.755:
            print(f'[ {ep_i} ] epoch-auc: {auc_:.5f} cost:{ep_cost}')
            break
        print(f'[ {ep_i} ] epoch-auc: {auc_:.5f} cost:{ep_cost}')
    
    t_cost = datetime.now() - total_st
    print(f'Done! Train cost {t_cost}')
    return model, acc_his


def validation(model, te_dataloader, cuda_flag=False):
    model.eval()
    cost_func = F.binary_cross_entropy_with_logits
    pred_out = []
    total_st = datetime.now()
    ep_true_cnt = 0
    ep_total_cnt = 0
    for idx, (tmp_x, tmp_y) in enumerate(te_dataloader):
        if cuda_flag:
            tmp_x = tmp_x.to(t.device('cuda')).float()
            tmp_y = tmp_y.to(t.device('cuda'))
        p = model(tmp_x)
        pred_out.append(np.floor(p.cpu().detach().flatten()))
        loss = cost_func(p, tmp_y)
        if idx % 50 == 0:
            pr_loss = loss.item()
            idx_p = str(idx).zfill(3)
            ture_cnt = t.sum((p > 0.5) == tmp_y)
            ep_true_cnt += ture_cnt
            ep_total_cnt += tmp_y.size()[0]
            auc_ = ture_cnt / tmp_y.size()[0]
            print(f'[{idx_p}] loss: {pr_loss:.5f} auc:{auc_:.5f}')
    ep_cost = datetime.now() - total_st
    auc_ = ep_true_cnt / ep_total_cnt
    print(f'val-auc: {auc_:.5f} cost:{ep_cost}')
    return pred_out

def model_predict(model, te_dataloader, cuda_flag=False):
    model.eval()
    cost_func = F.binary_cross_entropy_with_logits
    pred_out = []
    total_st = datetime.now()
    for idx, tmp_x in enumerate(te_dataloader):
        tmp_x = tmp_x[0]
        if cuda_flag:
            tmp_x = tmp_x.to(t.device('cuda')).float()
        p = model(tmp_x)
        pred_out.append(np.floor(p.cpu().detach().flatten().numpy()))

    ep_cost = datetime.now() - total_st
    print(f'Done cost:{ep_cost}')
    return pred_out

## 3.1 model-train

In [None]:
## data prepare
from torch.utils.data import DataLoader, TensorDataset
len_ = train_df.shape[0]
tr_x, tr_y = train_df.loc[:int(len_*0.85), train_cols].values, train_df.loc[:int(len_*0.85), 'target'].values
te_x, te_y = train_df.loc[int(len_*0.85):, train_cols].values, train_df.loc[int(len_*0.85):, 'target'].values
tr_ts = TensorDataset(t.tensor(tr_x).float(),
                      t.tensor(tr_y).float())
tr_loader = DataLoader(dataset = tr_ts, batch_size=512, shuffle=True)
te_ts = TensorDataset(t.tensor(te_x).float(),
                      t.tensor(te_y).float())
te_loader = DataLoader(te_ts, batch_size=512)


test_ts = TensorDataset(t.tensor(test_df[train_cols].values).float())
test_loader = DataLoader(test_ts, batch_size=512)

In [None]:
# model train
device = t.device('cuda')
print(device)
model = ResNet(
    d_numerical=len(train_cols),
    d_out=1,
    d_hidden_factor=32, # 64
    n_layers=6, # 4
    activation='reglu', #'reglu', 'swish'
    normalization='layernorm',
    d=16, # 8
    hidden_dropout=0.2, # 0.2
    residual_dropout=0.1
)
# print(model)
model, acc_his = train(model, tr_loader, epochs=100, lr=0.001, cuda_flag=True)

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(20, 4))
plt.plot([i.cpu().numpy() for i in acc_his])
plt.title('acc trend by epoches')
plt.show()

In [None]:
_pred_out = validation(model, te_loader, cuda_flag=True)
print('\n')
pred_out = model_predict(model, test_loader, cuda_flag=True)

In [None]:
pred_out_ =  np.concatenate(pred_out)
np.max(pred_out_), np.min(pred_out_) ,np.mean(pred_out_)

# submit

In [None]:
predict_out_df = test_df[['id', 'f0']].copy(deep=True)
predict_out_df.columns = ['id', 'target']

In [None]:
predict_out_df['target'] = pred_out_

In [None]:
sub_df_final = sub_df[['id']].merge(predict_out_df, on='id', how='left')
sub_df_final.to_csv('submission.csv', index=False)
print(os.getcwd())

In [None]:
!mkdir ~/.kaggle
!echo '{"username":"scchuy","key":"03dc93223547d8ce9f34aefbb1d89ee2"}' > ~/.kaggle/kaggle.json
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle competitions submit -c tabular-playground-series-nov-2021 -f submission.csv -m "submit.2021-11-30-4"