In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import gc
import pandas as pd
import numpy as np

import fastai
from   fastai.callback import *
from   fastai.callback.all import *
from   fastai.callback.training import GradientClip
from   fastai.callback.all import SaveModelCallback, EarlyStoppingCallback, ReduceLROnPlateau 
from   fastai.tabular import *
from   fastai.tabular.data import *
from   fastai.tabular.all import *
from   fastai.tabular.all import TabularPandas, RandomSplitter, CategoryBlock, MultiCategoryBlock, range_of, accuracy, tabular_learner, TabularDataLoaders
# from fastai import datasets
# from fastai.dataset import ModelData,ArraysIndexDataset
# from fastai.dataloader import DataLoader
from   fastai.learner import Learner
from   fastai.metrics import RocAucMulti

from   sklearn.pipeline import Pipeline
from   sklearn.impute import SimpleImputer
from   sklearn.preprocessing import StandardScaler

import torch.nn as nn
from   torch.nn import CrossEntropyLoss, MSELoss
from   torch.nn.modules.loss import _WeightedLoss

from   functools import partial
import warnings
warnings.filterwarnings ("ignore")

In [None]:
# Global Vars
TP   = None
DF   = None
DLs  = None
PIPE = None
BS   = 10000
N_FEATURES  = 0
N_FEAT_TAGS = 0

In [None]:
dtype = {
    'feature'  : 'str', 
    'tag_0'    : 'int8'
}
for i in range (1, 29):
    k = 'tag_' + str (i)
    dtype[k] = 'int8'
    
features_df = pd.read_csv ('../input/jane-street-market-prediction/features.csv', usecols=range (1,30), dtype=dtype)
N_FEATURES  = features_df.shape[0]  # the features.csv has 130 features (1st row) = no of features in train.csv (feature_0 to feature_129)
N_FEAT_TAGS = features_df.shape[1]  # the features.csv has 29 tags

# features_df.head ()
del features_df
gc.collect ()
N_FEATURES, N_FEAT_TAGS

In [None]:
def preprocess_data (filename='../input/jane-street-market-prediction/train.csv', df=None, isTrainData=True):
    
    global PIPE
    dtype = None
    if isTrainData:
        
        dtype = {
            'date'      : 'int64', 
            'weight'    : 'float32',
            'resp'      : 'float32',
            'ts_id'     : 'int64',  
            'feature_0' : 'float32'
        }
    else:
        
        dtype = {
            'date'      : 'int64', 
            'weight'    : 'float32',
            'feature_0' : 'float32'
        }
    for i in range (1, 130):
        k = 'feature_' + str (i)
        dtype[k] = 'float32'
    
    to   = None
    if isTrainData:
        df         = pd.read_csv (filename, dtype=dtype)
        df         = df.query ('date > 85')
        # df       = df[df['weight'] != 0].reset_index (drop = True)
        df         = df.reset_index (drop = True)
        
        resp_cols  = ['resp_1', 'resp_2', 'resp_3','resp_4', 'resp']    
        # df[:5000].to_csv (filename+'.dummy', index=False) 
        y          = np.stack ([(df[c] > 0).astype ('int') for c in resp_cols]).T
        df.drop (columns=['weight', 'date', 'ts_id']+resp_cols, inplace=True)
        f_columns  = [c for c in df.columns if "feature" in c]
        PIPE       = Pipeline ([
                        ("imputer", SimpleImputer (missing_values=np.nan, strategy='mean')),
                        # ("stand",   StandardScaler (with_mean=False))
        ])
        columns    = list (df.columns) + resp_cols
        X          = PIPE.fit_transform (df)
        df         = pd.DataFrame (np.hstack ((X, y)))
        df.columns = columns
        del X, y

        splits    = RandomSplitter (valid_pct=0.05) (range_of (df))
        to        = TabularPandas (df, cont_names=f_columns, cat_names=None, y_names=resp_cols, y_block=MultiCategoryBlock(encoded=True, vocab=resp_cols), splits=splits)
    else:
        
        df         = df.drop (columns=['weight', 'date']).reset_index (drop = True)
        columns    = df.columns
        X          = PIPE.transform (df)
        df         = pd.DataFrame (X)
        df.columns = columns
        # del X
    return to, df

In [None]:
TP, DF = preprocess_data ()
TP.xs.iloc[:2]

In [None]:
TP.ys.iloc[:2]

In [None]:
TP.xs.shape, TP.ys.shape

In [None]:
DLs = TP.dataloaders (bs=BS)
DLs.show_batch ()

In [None]:
DLs.one_batch ()[2].shape

In [None]:
x_cat, x_cont, y = DLs.train.one_batch ()
x_cat.shape, x_cont.shape, y.shape

# Custom Model

In [None]:
class SmoothBCEwLogits(_WeightedLoss):
    
    def __init__(self, weight=None, reduction='mean', smoothing=0.0):
        super().__init__(weight=weight, reduction=reduction)
        self.smoothing = smoothing
        self.weight = weight
        self.reduction = reduction

    @staticmethod
    def _smooth(targets:torch.Tensor, n_labels:int, smoothing=0.0):
        assert 0 <= smoothing < 1
        with torch.no_grad():
            targets = targets * (1.0 - smoothing) + 0.5 * smoothing
        return targets

    def forward(self, inputs, targets):
        targets = SmoothBCEwLogits._smooth(targets, inputs.size(-1),
            self.smoothing)
        loss = F.binary_cross_entropy_with_logits(inputs, targets,self.weight)

        if  self.reduction == 'sum':
            loss = loss.sum()
        elif  self.reduction == 'mean':
            loss = loss.mean()

        return loss

In [None]:
class Resnet (nn.Module):
    
    def __init__(self, inputSize, aLayerCount, drop_prob):
        
        super (Resnet, self).__init__()
        # the same dropout and nonlinear activation node can by re-used by all the layers
        self.dropout     = nn.Dropout (drop_prob)
        self.nonlin      = nn.LeakyReLU (negative_slope=0.01, inplace=True)
                 
        self.batch_norm0 = nn.BatchNorm1d (inputSize)
        
        self.dense1       = nn.Linear (inputSize, aLayerCount)
        self.batch_norm1  = nn.BatchNorm1d (aLayerCount)

        self.dense2       = nn.Linear (aLayerCount, aLayerCount)
        self.batch_norm2  = nn.BatchNorm1d (aLayerCount)

        self.dense3       = nn.Linear (aLayerCount, aLayerCount)
        self.batch_norm3  = nn.BatchNorm1d (aLayerCount)

        self.dense4       = nn.Linear (aLayerCount, aLayerCount)
        self.batch_norm4  = nn.BatchNorm1d (aLayerCount)

        self.dense5       = nn.Linear (aLayerCount, aLayerCount)
        self.batch_norm5  = nn.BatchNorm1d (aLayerCount)
        
        self.dense6       = nn.Linear (aLayerCount, aLayerCount)
        self.batch_norm6  = nn.BatchNorm1d (aLayerCount)
        
        self.dense7       = nn.Linear (aLayerCount, aLayerCount)
        self.batch_norm7  = nn.BatchNorm1d (aLayerCount)
        
        self.dense8       = nn.Linear (aLayerCount, aLayerCount)
        self.batch_norm8  = nn.BatchNorm1d (aLayerCount)

        self.dense9       = nn.Linear (aLayerCount, aLayerCount)
        self.batch_norm9  = nn.BatchNorm1d (aLayerCount)
        
        self.dense10      = nn.Linear (aLayerCount, aLayerCount)
        self.batch_norm10 = nn.BatchNorm1d (aLayerCount)
        
        self.dense11      = nn.Linear (aLayerCount, aLayerCount)
        self.batch_norm11 = nn.BatchNorm1d (aLayerCount)

        # self.nonlin    = nn.ReLU(inplace=True)
        # self.nonlin    = nn.PReLU()
        # self.nonlin    = nn.GELU()
        # self.nonlin    = nn.RReLU()

    def forward (self, X):
        
        # X0 = self.dropout (self.batch_norm0 (X))
        X0  = self.batch_norm0 (X)
        X1  = self.dropout (self.nonlin (self.batch_norm1  (self.dense1  (X0))))
        X2  = self.dropout (self.nonlin (self.batch_norm2  (self.dense2  (X1))))
        X3  = self.dropout (self.nonlin (self.batch_norm3  (self.dense3  (X2))))
        X4  = self.dropout (self.nonlin (self.batch_norm4  (self.dense4  (X3  + X1))))
        X5  = self.dropout (self.nonlin (self.batch_norm5  (self.dense5  (X4  + X2))))
        X6  = self.dropout (self.nonlin (self.batch_norm6  (self.dense6  (X5  + X3))))
        X7  = self.dropout (self.nonlin (self.batch_norm7  (self.dense7  (X6  + X4))))
        X8  = self.dropout (self.nonlin (self.batch_norm8  (self.dense8  (X7  + X5))))
        X9  = self.dropout (self.nonlin (self.batch_norm9  (self.dense9  (X8  + X6))))
        X10 = self.dropout (self.nonlin (self.batch_norm10 (self.dense10 (X9  + X7))))
        X11 = self.dropout (self.nonlin (self.batch_norm11 (self.dense11 (X10 + X8))))
        return X11

In [None]:
class Emb_Resnet_Model (nn.Module):
    
    def __init__(self, embed_dim=N_FEAT_TAGS, csv_file='../input/jane-street-market-prediction/features.csv'):
        
        super (Emb_Resnet_Model, self).__init__()
        global N_FEAT_TAGS
        N_FEAT_TAGS = 29
        
        # store the features to tags mapping as a datframe tdf, feature_i mapping is in tdf[i, :]
        dtype = {'tag_0' : 'int8'}
        for i in range (1, 29):
            k = 'tag_' + str (i)
            dtype[k] = 'int8'
        t_df = pd.read_csv (csv_file, usecols=range (1,N_FEAT_TAGS+1), dtype=dtype)
        t_df['tag_29'] = np.array ([1] + ([0] * (t_df.shape[0]-1)) ).astype ('int8')
        self.features_tag_matrix = torch.tensor (t_df.to_numpy ())
        N_FEAT_TAGS += 1
        
        # print ('self.features_tag_matrix =', self.features_tag_matrix)
        
        # embeddings for the tags. Each feature is taken a an embedding which is an avg. of its' tag embeddings
        self.embed_dim     = embed_dim
        self.tag_embedding = nn.Embedding (N_FEAT_TAGS+1, embed_dim) # create a special tag if not known tag for any feature
        self.tag_weights   = nn.Linear (N_FEAT_TAGS, 1)
        
        drop_prob          = 0.45
        self.ffn           = Resnet (130+embed_dim, 350, drop_prob)
        self.outDense      = nn.Linear (350, 5)
        return
    
    def features2emb (self):
        """
        idx : int feature index 0 to N_FEATURES-1 (129)
        """
        
        all_tag_idxs = torch.LongTensor (np.arange (N_FEAT_TAGS)) #.to (DEVICE)              # (29,)
        tag_bools    = self.features_tag_matrix                                # (130, 29)
        # print ('tag_bools.shape =', tag_bools.size())
        f_emb        = self.tag_embedding (all_tag_idxs).repeat (130, 1, 1)    #;print ('1. f_emb =', f_emb) # (29, 7) * (130, 1, 1) = (130, 29, 7)
        # print ('f_emb.shape =', f_emb.size())
        f_emb        = f_emb * tag_bools[:, :, None]                           #;print ('2. f_emb =', f_emb) # (130, 29, 7) * (130, 29, 1) = (130, 29, 7)
        # print ('f_emb.shape =', f_emb.size())
        
        # Take avg. of all the present tag's embeddings to get the embedding for a feature
        s = torch.sum (tag_bools, dim=1)                                       # (130,)
        # print ('s =', s)              
        f_emb = torch.sum (f_emb, dim=-2) / s[:, None]                         # (130, 7)
        # print ('f_emb =', f_emb)        
        # print ('f_emb.shape =', f_emb.shape)
        
        # take a linear combination of the present tag's embeddings
        # f_emb = f_emb.permute (0, 2, 1)                                        # (130, 7, 29)
        # f_emb = self.tag_weights (f_emb)                      #;print ('3. f_emb =', f_emb)                 # (130, 7, 1)
        # f_emb = torch.squeeze (f_emb, dim=-1)                 #;print ('4. f_emb =', f_emb)                 # (130, 7)
        return f_emb
    
    def forward (self, cat_featrs, features):
        """
        when you call `model (x ,y, z, ...)` then this method is invoked
        """
        
        cat_featrs = None
        features   = features.view (-1, N_FEATURES)
        f_emb      = self.features2emb ()                                #;print ('5. f_emb =', f_emb); print ('6. features =', features) # (130, 7)
        # print ('features.shape =', features.shape, 'f_emb.shape =', f_emb.shape)
        features_2 = torch.matmul (features, f_emb)                      #;print ('7. features =', features) # (1, 130) * (130, 7) = (1, 7)
        # print ('features.shape =', features.shape)
        
        # Concatenate the two features (features + their embeddings)
        features   = torch.hstack ((features, features_2))        
        
        x          = self.ffn (features)                               #;print ('8. x.shape = ', x.shape, 'x =', x)   # (1, 7) -> (1, 7)
        # x        = self.layer_normal (x + features)                  #;print ('9. x.shape = ', x.shape, 'x =', x)   # (1, 7) -> (1, 2)
        
        out_logits = self.outDense (x)                                 #;print ('10. out_logits.shape = ', out_logits.shape, 'out_logits =', out_logits)        
        # return sigmoid probs
        # out_probs = F.sigmoid (out_logits)
        return out_logits

In [None]:
@delegates (torch.optim.AdamW.__init__)
def pytorch_AdamW (param_groups, **kwargs):
    return OptimWrapper (torch.optim.AdamW ([{'params': ps, **kwargs} for ps in param_groups]))

In [None]:
# for vanilla NN use this
path  = "../input/jane-embedding-resnet/Jane_Embedding_Resnet"
learn = TabularLearner (DLs, model=Emb_Resnet_Model (), model_dir='/kaggle/working/',
                        loss_func=SmoothBCEwLogits (smoothing=0.015), metrics=RocAucMulti (),
                        opt_func=partial (pytorch_AdamW, lr=0.007, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01)
                       )
learn = learn.load (path)
learn.save (learn.model_dir)
learn.summary ()

In [None]:
logits = learn.model (x_cat, x_cont)
logits

In [None]:
x_cat, x_cont, y = learn.dls.one_batch ()
init_loss = learn.loss_func (learn.model (x_cat, x_cont), y)
init_loss

In [None]:
# lr_min, lr_steep = learn.lr_find (start_lr=1e-3, end_lr=5e-2, num_it=100)
# lr_min, lr_steep

In [None]:
modelfile = 'Jane_Embedding_Resnet'
callbacks = [
    EarlyStoppingCallback (monitor='roc_auc_score', min_delta=0.0001, patience=12),
    SaveModelCallback     (monitor='roc_auc_score', fname=modelfile),
    ReduceLROnPlateau     (monitor='roc_auc_score', min_delta=0.0001, factor=2.0, min_lr=1e-8, patience=1),
    GradientClip (0.1)
]

epochs = 80
# lr     = lr_min
# learn.fit_one_cycle (epochs, lr, wd=1e-2, cbs=callbacks)

In [None]:
from fastai.imports import *
from fastai.torch_core import *
from fastai.learner import *
    
@patch
@delegates(subplots)
def plot_metrics(self: Recorder, nrows=None, ncols=None, figsize=None, **kwargs):
    metrics = np.stack(self.values)
    names = self.metric_names[1:-1]
    n = len(names) - 1
    if nrows is None and ncols is None:
        nrows = int(math.sqrt(n))
        ncols = int(np.ceil(n / nrows))
    elif nrows is None: nrows = int(np.ceil(n / ncols))
    elif ncols is None: ncols = int(np.ceil(n / nrows))
    figsize = figsize or (ncols * 6, nrows * 4)
    fig, axs = subplots(nrows, ncols, figsize=figsize, **kwargs)
    axs = [ax if i < n else ax.set_axis_off() for i, ax in enumerate(axs.flatten())][:n]
    for i, (name, ax) in enumerate(zip(names, [axs[0]] + axs)):
        ax.plot(metrics[:, i], color='#1f77b4' if i == 0 else '#ff7f0e', label='valid' if i > 0 else 'train')
        ax.set_title(name if i > 1 else 'losses')
        ax.legend(loc='best')
    plt.show()

In [None]:
# learn.recorder.plot_loss (skip_start=0, with_valid=True)

In [None]:
# learn.recorder.plot_metrics ()

In [None]:
# learn.recorder.plot_lr ()

In [None]:
# _, logits, _ = learn.predict (DF.iloc[0])
# logits

# Prediction

In [None]:
MODEL = learn.model.eval ()
MODEL

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.utils.rnn as rnn_utils
from   torch.autograd import Variable
from   torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler

DEVICE = torch.device ("cuda:0") if torch.cuda.is_available () else torch.device ("cpu")

# For direct submission, without using Fastai since its too slow
Use Fastai for training models only, not for prediction.

In [None]:
def predict_torch (test_df):
    
    test_df.drop (columns=['weight', 'date'], inplace=True)
    test_df.reset_index (drop=True, inplace=True)
    test_df = PIPE.transform (test_df)        
    test_df = torch.tensor (test_df).float ().view (-1, 130)
    predictions = []
    for i in range (test_df.shape[0]):
        
        pred_p = torch.sigmoid (MODEL (None, test_df)).detach ().cpu ().numpy ().reshape ((-1, 5))
        predictions.append (pred_p)

    predictions = np.vstack (predictions)                     #;print ('predictions.shape =', predictions.shape)
    predictions = np.median (predictions, axis=1)
    return (predictions >= 0.5).astype (int)

# For prediction using Fastai
Don't use thism its too slow and times out. Use Pytorch for prediction.
Use Fastai for training the Pytorch models only.

In [None]:
import torch.nn.functional as F

def predict (df, threshold=0.50):
    
    dl     = learn.dls.test_dl (df)
    logits = learn.get_preds (dl=dl)[0]
    probs  = F.sigmoid (logits).detach ().numpy ()
    pred   = (np.median (probs, axis=1) >= threshold).astype (int)
    return pred

test_df = DF.copy()
resp_cols  = ['resp_1', 'resp_2', 'resp_3','resp_4', 'resp']
test_df.drop (columns=resp_cols, inplace=True)

test_df  = preprocess_data (df=test_df, isTrainData=False)
predict (test_df)

# Test

In [None]:
import janestreet
env      = janestreet.make_env ()  # initialize the environment
env_iter = env.iter_test ()        # an iterator which loops over the test set

In [None]:
for test_df, pred_df in env_iter:
    if test_df["weight"].item () > 0:
        
        pred_df.action = predict_torch (test_df)
    else:
        pred_df.action = 0
        
    # print (pred_df)
    # print ("--------------")
    env.predict (pred_df)

In [None]:
print ('Done !')