Based on the ideas of these notebooks:

[OWN Jane Street with Keras NN](https://www.kaggle.com/tarlannazarov/own-jane-street-with-keras-nn)

[TabNet Starter](https://www.kaggle.com/yifor01/tabnet-starter)

...and so many kernels


I believe it is a good model for ensembles with neural network models.

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install ../input/python-datatable/datatable-0.11.0-cp37-cp37m-manylinux2010_x86_64.whl > /dev/null 2>&1
!pip install  ../input/officialpytorchtabnet/pytorch_tabnet-3.0.0-py3-none-any.whl pytorch-tabnet  > /dev/null

In [None]:
import datatable as dt
import janestreet
from tqdm.notebook import tqdm
from pytorch_tabnet.pretraining import TabNetPretrainer
from pytorch_tabnet.tab_model import TabNetClassifier
import torch
from sklearn.metrics import roc_auc_score
import pickle

In [None]:
train = dt.fread('../input/jane-street-market-prediction/train.csv').to_pandas()
features = [c for c in train.columns if 'feature' in c]
train = train.query('date > 85').reset_index(drop = True) 
train = train.query('weight > 0').reset_index(drop = True)
train[features[1:]] = train[features[1:]].fillna(train[features[1:]].mean())
train['action'] = (train['resp'] > 0).astype('int')
X = train[features].values
y = train['action'].values
f_mean = np.mean(train[features[1:]].values,axis=0)

In [None]:
TRAINING = False

In [None]:
if TRAINING:
    tabnet_params = dict(n_d=36, 
                    n_a=36, 
                    n_steps=3,
                    gamma=1.5,
                    lambda_sparse=1e-4, 
                    momentum=0.3, 
                    clip_value=2., 
                    mask_type='entmax',
                    optimizer_fn=torch.optim.Adam,
                    optimizer_params=dict(lr=1e-3, weight_decay=1e-4),
                    scheduler_params=dict(mode="min",patience=3,min_lr=1e-5,factor=0.9,),
                    scheduler_fn=torch.optim.lr_scheduler.ReduceLROnPlateau,
                    epsilon=1e-15,
                    verbose=2,
                    seed=8012,
                    )

    model = TabNetClassifier(**tabnet_params)

    model.fit(X_train=X,
              y_train=y,
              max_epochs=50,
              batch_size=2048,
              virtual_batch_size=128,
              num_workers=0, drop_last=False,
             )
    pickle.dump(model, open(f'tabnet_model.pickle', 'wb'))
else:
    model = pickle.load(open(f'../input/jane-street-tabnet-model/tabnet_model.pickle',"rb"))

In [None]:
from numba import jit
from numba import prange

@jit(nopython=True)
def fillna_npwhere(array, values):
    if np.isnan(array.sum()):
        array = np.where(np.isnan(array), values, array)
    return array

@jit(parallel=True,nopython=True)
def for_loop(method, matrix, values):
    for i in prange(matrix.shape[0]):
        matrix[i] = method(matrix[i], values)
    return matrix

In [None]:
env = janestreet.make_env()
env_iter = env.iter_test()

In [None]:
for (test_df, pred_df) in tqdm(env_iter):
    if test_df['weight'].item() > 0:
        x_tt = test_df.loc[:, features].values
        x_tt[:, 1:] = for_loop(fillna_npwhere, x_tt[:, 1:], f_mean)
        pred_df.action = model.predict(x_tt)
    else:
        pred_df.action = 0
    env.predict(pred_df)