In [None]:
# TabNet
!pip install --no-index --find-links /kaggle/input/pytorchtabnet/pytorch_tabnet-2.0.1-py3-none-any.whl pytorch-tabnet

In [None]:
# project 
import sys
sys.path.append('../input/moa-models')
sys.path.append('../input/moa-scripts')
from pytorch import *
seed_everything(seed=42)

from moa import *
from moa_pipeline import *
from metrics import logloss
from validation import mlp_oof

# np, pd
import numpy as np
import pandas as pd 
from scipy import stats 

# misc
import warnings
from tqdm.auto import tqdm
import os
import random
import warnings 
import copy
import joblib
import gc 
from functools import partial
import glob 

# sklearn
from sklearn import preprocessing
from sklearn.model_selection import KFold

# torch 
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# viz 
import matplotlib.pyplot as plt
import seaborn as sns

os.environ["CUDA_LAUNCH_BLOCKING"] = '1'
warnings.simplefilter('ignore')

# Tabnet 
from pytorch_tabnet.metrics import Metric
from pytorch_tabnet.tab_model import TabNetRegressor

!ls ../input/moa-pipelines

In [None]:
## load raw data
X, y, genes, cells, classnames, features, X_test, test_control, submission = load()

## load splits
splits = sorted(glob.glob("../input/moa-pipelines/x*.pkl"))
folds, pipes = list(zip(*[joblib.load(split) for split in splits]))
print(splits)

In [None]:
def tabnet_oof(splits, seeds, pretrained_pipes, tabnet_params, train_params, stop=-1):
    S = len(seeds)
    oof_train = np.zeros((len(X), y.shape[1]), dtype=np.float64)
    oof_test  = np.zeros((len(X_test[~test_control]), y.shape[1]), dtype=np.float64)
    for split, (folds, seed) in enumerate(zip(splits, seeds)):
        # loop over kf
        nfolds = len(set(folds))
        folds_oof = np.zeros((len(X), y.shape[1]), dtype=np.float64)
        for fold, (t, v) in enumerate(index2folds(folds)):
            # loop over folds
            pipe = pretrained_pipes[split][fold]
            # apply pipe
            xtrain, xvalid, xtest = X[t].copy(), X[v].copy(), X_test[~test_control].copy()
            xtrain = pipe.transform(xtrain)
            xvalid = pipe.transform(xvalid)
            xtest  = pipe.transform(xtest)
            model = TabNetRegressor(seed=seed, **tabnet_params)
            model.fit(X_train = xtrain, y_train = y[t].copy(), eval_set = [(xvalid, y[v].copy())], **train_params)
            fold_preds = sigmoid(model.predict(xvalid))
            test_preds = sigmoid(model.predict(xtest))
            folds_oof[v] += fold_preds
            oof_test += (test_preds / nfolds)
            # model.save_model(f"split{split}_seed{seed}_fold{fold}")
            torch.save(model.network.state_dict(), f"split{split}_seed{seed}_fold{fold}.pth")
            if fold == stop: break
        oof_train += folds_oof
    oof_train /= S
    oof_test  /= S
    # joblib.dump(oof_train, "oof_train.pkl")
    return oof_train, oof_test


In [None]:
class LogitsLogLoss(Metric):
    def __init__(self):
        self._name = "logits_ll"
        self._maximize = False
    def __call__(self, y_true, y_pred):
        return logloss(y_true, sigmoid(y_pred))

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

MAX_EPOCH  = 200
MASK_TYPE = "entmax"
N = 36
N_STEPS = 1
BATCH_SIZE = 1024
BATCH_SIZE_V = 32
LR = 2e-2
LAMBDA_SPARSE = 0
N_IND = 2
N_SHARED = 2
MOMENTUM = 0.02
PL_FACTOR = 0.9
rdpt_params = dict(mode = "min", patience = 5, min_lr = 1e-5, factor = PL_FACTOR)

tabnet_params = dict(
    n_d = N,
    n_a = N,
    n_steps = N_STEPS,
    lambda_sparse = LAMBDA_SPARSE,
    mask_type = MASK_TYPE,
    n_independent = N_IND,
    n_shared = N_SHARED,
    momentum = MOMENTUM,
    optimizer_fn = optim.Adam,
    optimizer_params = dict(lr=LR, weight_decay=1e-5),
    gamma = 1.3,
    scheduler_params = rdpt_params,
    scheduler_fn = optim.lr_scheduler.ReduceLROnPlateau,
    verbose = 10
)

train_params = dict(
    eval_name = ["val"],
    eval_metric = ["logits_ll"],
    max_epochs = MAX_EPOCH,
    batch_size = BATCH_SIZE, 
    virtual_batch_size = BATCH_SIZE_V,
    patience = 20,
    num_workers = 1,
    drop_last = False,
    loss_fn = F.binary_cross_entropy_with_logits
    #loss_fn = LabelSmoothingCrossEntropy(1e-6, True, 5e-5)
)

In [None]:
data = [ # folds/seeds/pipes
    folds,
    list(range(len(folds))),
    pipes
]
oof_train, oof_test = tabnet_oof(*data, tabnet_params, train_params)

In [None]:
print(oof_train.mean(), oof_test.mean(), y.mean())
oof_result(y, oof_train, oof_test)

In [None]:
prediction = join_control_group(oof_test, test_control, submission, classnames)
submit_preds(prediction, submission, test_control, classnames)
pd.read_csv("submission.csv").iloc[:5, :5]

In [None]:
!ls