# Kaggle Competition : Jane Street Market Prediction

 $\rule{20cm}{0.1pt}$

## Notebook details

cf. https://www.kaggle.com/lunatik62/pytorch-tagmodels-stacking-1-2

**Load Libraries**

In [1]:
import os
import sys
import json
import time
import random
from datetime import datetime

import numpy as np
import pandas as pd
import datatable as dt
from tqdm.notebook import tqdm
from sklearn.metrics import accuracy_score, precision_score, recall_score
from scipy.special import expit

import torch
import torch.nn as nn

print(f"Pytorch version : {torch.__version__}")

if torch.cuda.is_available():
    print(f"GPU : {torch.cuda.get_device_name()} available")
else:
    print("Error : GPU not available")
    sys.exit(1)

Pytorch version : 1.7.0
GPU : Tesla P100-PCIE-16GB available


**Functions**

In [2]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    return None

def utility(dates, weights, true_resp, actions, use_mult=False):
    """Jane Street evaluation metric"""
    Pi = weights * true_resp * actions
    if use_mult:
        mult = np.sqrt(250 / np.bincount(dates).shape[0])
    else:
        mult = 1
    sum_Pi = Pi.sum() 
    sum_pi_squared = np.sqrt((Pi ** 2).sum())
    t = (sum_Pi / sum_pi_squared) * mult
    u = min(max(t, 0), 6) * sum_Pi
    return u

def compute_utility_many(predictions, dates, weights, true_resp, interval=np.linspace(0, 1, 101)):
    """given predictions probability compute utility for many threshold"""
    lst_ut = []
    for v in interval:
        actions =  (predictions > v).astype(int)
        ut = utility(dates=dates, weights=weights, true_resp=true_resp, actions=actions)
        lst_ut.append((v, ut))
    return lst_ut

def build_dic_records(datasets=[], records=[]):
    """save training data into python dict"""
    dic = {}
    for dataset in datasets:
        dic[dataset] = {}
        for record in records:
            dic[dataset][record] = []
    return dic

**Settings**

In [3]:
SEED = 2021
DATE_NOW = datetime.now().__format__("%Y-%m-%d_%H:%M:%S")
DATE_TRAINING_TAGMODEL = "2021-02-16_154406"
DEVICE = torch.device("cuda")

PATH_ROOT = "/kaggle/input/jane-street-market-prediction"
PATH_WEIGHT_TAGMODEL = "../input/weights-tagmodels/"
PATH_WEIGHT_STACKMODEL = "../input/weightsstackedmodel/StackedModel_2021-02-17_135144.pt"

PATH_DATA = os.path.join(PATH_ROOT, "train.csv")
PATH_FEATURES =  os.path.join(PATH_ROOT, "features.csv")

LST_FEATURES = ["feature_"+str(n_feat) for n_feat in range(0, 130, 1)]
LST_TARGETS = ["resp", "resp_1", "resp_2", "resp_3", "resp_4"]

TRAIN_PREVIOUS_LAYERS = False
THRESHOLD = .5
SIZE_TRAIN = .85
TRAINING = False
SAVE_DICT = False

**Preprocessing**

In [4]:
%%time
seed_everything(seed=SEED)

data = dt.fread(PATH_DATA).to_pandas()  # fast loading
data = data[data.date > 85]  # delete date < 85
data = data.sample(frac=1)  # shuffle
data.reset_index(drop=True, inplace=True)
print(f"shape : {data.shape}")

df_tags = pd.read_csv(PATH_FEATURES, index_col="feature")
dic_tags = {}
for n, tag in enumerate(df_tags.columns):
    lst_features = df_tags[tag][df_tags[tag] == True].index.tolist()
    lst_num_features = [int(e.strip("feature_")) for e in lst_features]
    dic_tags[str(n)] = lst_num_features

# add feature_0 in all tags
for e in dic_tags.keys():
    dic_tags[e].append(0)
    
f_mean = data[LST_FEATURES[1:]].mean()
f_mean["feature_0"] = 1
f_mean.sort_index()
data = data.loc[data.weight > 0].reset_index(drop = True)  # delete 0 weight data
data[LST_FEATURES[1:]] = data[LST_FEATURES[1:]].fillna(f_mean)  # filling NaN by mean of each feature

shape : (1862597, 138)
CPU times: user 28.4 s, sys: 9.79 s, total: 38.2 s
Wall time: 1min 7s


**Pytorch utils**

In [5]:
class EarlyStopping:
    
    def __init__(self, patience=7, mode="max", delta=0.0, verbose=False, trace_func=print, path="checkpoint.pt"):
        
        self.patience = patience
        self.counter = 0
        self.mode = mode
        self.best_score = None
        self.early_stop = False
        self.delta = delta
        self.verbose = verbose
        self.trace_func = trace_func
        self.path = path
        
        if self.mode == "min":
            self.val_score = np.Inf
            
        else:
            self.val_score = -np.Inf

    def __call__(self, epoch_score, model):

        if self.mode == "min":
            score = -1.0 * epoch_score
            
        elif self.mode == "max":
            score = np.copy(epoch_score)
        
        # first epoch
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(epoch_score, model)
        
        # best score NOT modified
        elif score < self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f"EarlyStopping counter: {self.counter} out of {self.patience}")
            if self.counter >= self.patience:
                self.early_stop = True
                
        # best score modified        
        else:
            self.best_score = score
            self.save_checkpoint(epoch_score, model)
            self.counter = 0

    def save_checkpoint(self, epoch_score, model):
        """
        Save model when validation loss decrease.
        """
        if self.verbose:
            self.trace_func(f'Validation metric moving : ({self.val_score:.6f} --> {epoch_score:.6f}).  Saving model ...')
            
        if epoch_score not in [-np.inf, np.inf, -np.nan, np.nan]:
            torch.save(model.state_dict(), self.path)
            
        self.val_score = epoch_score

class BuildDataset:
    
    def __init__(self, df, col_x, target):
        self.col_x = col_x.copy()
        self.target = target.copy()
        self.X = df[self.col_x].values
        self.y = (df[self.target] > 0).astype('int').values
        self.weights = df.weight.values
        self.resps = df.resp.values
        self.actions = (df.resp > 0 ).astype('int').values

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return {
            'features': torch.tensor(self.X[idx], dtype=torch.float),
            'label': torch.tensor(self.y[idx], dtype=torch.float),
            'weights': torch.tensor(self.weights[idx], dtype=torch.float),
            'resps': torch.tensor(self.resps[idx], dtype=torch.float),
            'actions': torch.tensor(self.actions[idx], dtype=torch.float)
        }
    
class P1UtilityLoss(torch.nn.Module):
    """
    customized loss based on first part of utility evaluation metric
    """

    def __init__(self, threshold=.5):
        super(P1UtilityLoss, self).__init__()
        self.threshold = torch.tensor(threshold)
        
    def forward(self, true_actions, pred, weights, resps):
        w_r = torch.mul(weights, resps)
        pi_true = torch.mul(w_r, true_actions)
        pi_pred = torch.mul(w_r, pred)
        pi_true_sum = torch.sum(pi_true)
        pi_pred_sum = torch.sum(pi_pred)
        res = torch.sub(torch.tensor(1), torch.div(pi_pred_sum, pi_true_sum))
        return res

class TagModel(torch.nn.Module):
    """
    MLP : batchnorm0 > dropout > dense0 > relu > batchnorm1 > dropout > dense1 > sigmoid
    """
    
    def __init__(self, dic_tags, tag_number, input_size, output_size, threshold=.5, rate_dropout=.1):
        
        super(TagModel, self).__init__()
        self.dic_tags = dic_tags
        self.tag_number = str(tag_number)
        self.input_size = input_size
        self.output_size = output_size
        self.rate_dropout = rate_dropout
        
        self.sigmoid = torch.nn.Sigmoid()
        self.dropout = nn.Dropout(self.rate_dropout)
        self.batch_norm0 = nn.BatchNorm1d(len(self.dic_tags[self.tag_number]))
        self.batch_norm1 = nn.BatchNorm1d(2 * len(self.dic_tags[self.tag_number]))
        self.dense0 = torch.nn.Linear(
            len(self.dic_tags[self.tag_number]),
            2 * len(self.dic_tags[self.tag_number])
        )
        self.dense1 = torch.nn.Linear(
            2 * len(self.dic_tags[self.tag_number]),
            self.output_size
        )
        self.relu = torch.nn.ReLU()
        
    def forward(self, x):
        
        x = self.batch_norm0(x[:, self.dic_tags[self.tag_number]])
        x = self.dropout(x)
        x = self.dense0(x)
        
        x = self.relu(x)
        
        x = self.batch_norm1(x)
        x = self.dropout(x)
        x = self.dense1(x)
        
        x = self.sigmoid(x)
        return x
    
class StackedModel(torch.nn.Module):
    
    def __init__(self, dic_tags, path_weights, date_training_tag, rate_dropout=.1, train_previous_layers=False, device=DEVICE):
        super(StackedModel, self).__init__()

        self.dic_tags = dic_tags
        self.path_weights = path_weights
        self.rate_dropout = rate_dropout
        self.date_training_tag = date_training_tag
        self.train_previous_layers = train_previous_layers
        self.device = DEVICE
        
        # load 29 TagModels
        self.model0 = TagModel(dic_tags=self.dic_tags, tag_number=0, input_size=130, output_size=1).to(self.device)
        self.model1 = TagModel(dic_tags=self.dic_tags, tag_number=1, input_size=130, output_size=1).to(self.device)
        self.model2 = TagModel(dic_tags=self.dic_tags, tag_number=2, input_size=130, output_size=1).to(self.device)
        self.model3 = TagModel(dic_tags=self.dic_tags, tag_number=3, input_size=130, output_size=1).to(self.device)
        self.model4 = TagModel(dic_tags=self.dic_tags, tag_number=4, input_size=130, output_size=1).to(self.device)
        self.model5 = TagModel(dic_tags=self.dic_tags, tag_number=5, input_size=130, output_size=1).to(self.device)
        self.model6 = TagModel(dic_tags=self.dic_tags, tag_number=6, input_size=130, output_size=1).to(self.device)
        self.model7 = TagModel(dic_tags=self.dic_tags, tag_number=7, input_size=130, output_size=1).to(self.device)
        self.model8 = TagModel(dic_tags=self.dic_tags, tag_number=8, input_size=130, output_size=1).to(self.device)
        self.model9 = TagModel(dic_tags=self.dic_tags, tag_number=9, input_size=130, output_size=1).to(self.device)
        self.model10 = TagModel(dic_tags=self.dic_tags, tag_number=10, input_size=130, output_size=1).to(self.device)
        self.model11 = TagModel(dic_tags=self.dic_tags, tag_number=11, input_size=130, output_size=1).to(self.device)
        self.model12 = TagModel(dic_tags=self.dic_tags, tag_number=12, input_size=130, output_size=1).to(self.device)
        self.model13 = TagModel(dic_tags=self.dic_tags, tag_number=13, input_size=130, output_size=1).to(self.device)
        self.model14 = TagModel(dic_tags=self.dic_tags, tag_number=14, input_size=130, output_size=1).to(self.device)
        self.model15 = TagModel(dic_tags=self.dic_tags, tag_number=15, input_size=130, output_size=1).to(self.device)
        self.model16 = TagModel(dic_tags=self.dic_tags, tag_number=16, input_size=130, output_size=1).to(self.device)
        self.model17 = TagModel(dic_tags=self.dic_tags, tag_number=17, input_size=130, output_size=1).to(self.device)
        self.model18 = TagModel(dic_tags=self.dic_tags, tag_number=18, input_size=130, output_size=1).to(self.device)
        self.model19 = TagModel(dic_tags=self.dic_tags, tag_number=19, input_size=130, output_size=1).to(self.device)
        self.model20 = TagModel(dic_tags=self.dic_tags, tag_number=20, input_size=130, output_size=1).to(self.device)
        self.model21 = TagModel(dic_tags=self.dic_tags, tag_number=21, input_size=130, output_size=1).to(self.device)
        self.model22 = TagModel(dic_tags=self.dic_tags, tag_number=22, input_size=130, output_size=1).to(self.device)
        self.model23 = TagModel(dic_tags=self.dic_tags, tag_number=23, input_size=130, output_size=1).to(self.device)
        self.model24 = TagModel(dic_tags=self.dic_tags, tag_number=24, input_size=130, output_size=1).to(self.device)
        self.model25 = TagModel(dic_tags=self.dic_tags, tag_number=25, input_size=130, output_size=1).to(self.device)
        self.model26 = TagModel(dic_tags=self.dic_tags, tag_number=26, input_size=130, output_size=1).to(self.device)
        self.model27 = TagModel(dic_tags=self.dic_tags, tag_number=27, input_size=130, output_size=1).to(self.device)
        self.model28 = TagModel(dic_tags=self.dic_tags, tag_number=28, input_size=130, output_size=1).to(self.device)
        
        # load their weights
        self.model0.load_state_dict(torch.load(os.path.join(self.path_weights, f"TagModel0_{self.date_training_tag}.pt")))
        self.model1.load_state_dict(torch.load(os.path.join(self.path_weights, f"TagModel1_{self.date_training_tag}.pt")))
        self.model2.load_state_dict(torch.load(os.path.join(self.path_weights, f"TagModel2_{self.date_training_tag}.pt")))
        self.model3.load_state_dict(torch.load(os.path.join(self.path_weights, f"TagModel3_{self.date_training_tag}.pt")))
        self.model4.load_state_dict(torch.load(os.path.join(self.path_weights, f"TagModel4_{self.date_training_tag}.pt")))
        self.model5.load_state_dict(torch.load(os.path.join(self.path_weights, f"TagModel5_{self.date_training_tag}.pt")))
        self.model6.load_state_dict(torch.load(os.path.join(self.path_weights, f"TagModel6_{self.date_training_tag}.pt")))
        self.model7.load_state_dict(torch.load(os.path.join(self.path_weights, f"TagModel7_{self.date_training_tag}.pt")))
        self.model8.load_state_dict(torch.load(os.path.join(self.path_weights, f"TagModel8_{self.date_training_tag}.pt")))
        self.model9.load_state_dict(torch.load(os.path.join(self.path_weights, f"TagModel9_{self.date_training_tag}.pt")))
        self.model10.load_state_dict(torch.load(os.path.join(self.path_weights, f"TagModel10_{self.date_training_tag}.pt")))
        self.model11.load_state_dict(torch.load(os.path.join(self.path_weights, f"TagModel11_{self.date_training_tag}.pt")))
        self.model12.load_state_dict(torch.load(os.path.join(self.path_weights, f"TagModel12_{self.date_training_tag}.pt")))
        self.model13.load_state_dict(torch.load(os.path.join(self.path_weights, f"TagModel13_{self.date_training_tag}.pt")))
        self.model14.load_state_dict(torch.load(os.path.join(self.path_weights, f"TagModel14_{self.date_training_tag}.pt")))
        self.model15.load_state_dict(torch.load(os.path.join(self.path_weights, f"TagModel15_{self.date_training_tag}.pt")))
        self.model16.load_state_dict(torch.load(os.path.join(self.path_weights, f"TagModel16_{self.date_training_tag}.pt")))
        self.model17.load_state_dict(torch.load(os.path.join(self.path_weights, f"TagModel17_{self.date_training_tag}.pt")))
        self.model18.load_state_dict(torch.load(os.path.join(self.path_weights, f"TagModel18_{self.date_training_tag}.pt")))
        self.model19.load_state_dict(torch.load(os.path.join(self.path_weights, f"TagModel19_{self.date_training_tag}.pt")))
        self.model20.load_state_dict(torch.load(os.path.join(self.path_weights, f"TagModel20_{self.date_training_tag}.pt")))
        self.model21.load_state_dict(torch.load(os.path.join(self.path_weights, f"TagModel21_{self.date_training_tag}.pt")))
        self.model22.load_state_dict(torch.load(os.path.join(self.path_weights, f"TagModel22_{self.date_training_tag}.pt")))
        self.model23.load_state_dict(torch.load(os.path.join(self.path_weights, f"TagModel23_{self.date_training_tag}.pt")))
        self.model24.load_state_dict(torch.load(os.path.join(self.path_weights, f"TagModel24_{self.date_training_tag}.pt")))
        self.model25.load_state_dict(torch.load(os.path.join(self.path_weights, f"TagModel25_{self.date_training_tag}.pt")))
        self.model26.load_state_dict(torch.load(os.path.join(self.path_weights, f"TagModel26_{self.date_training_tag}.pt")))
        self.model27.load_state_dict(torch.load(os.path.join(self.path_weights, f"TagModel27_{self.date_training_tag}.pt")))
        self.model28.load_state_dict(torch.load(os.path.join(self.path_weights, f"TagModel28_{self.date_training_tag}.pt")))
        
        # fix weights of TagModels only
        if not self.train_previous_layers:
            self.model0.eval()
            self.model1.eval()
            self.model2.eval()
            self.model3.eval()
            self.model4.eval()
            self.model5.eval()
            self.model6.eval()
            self.model7.eval()
            self.model8.eval()
            self.model9.eval()
            self.model10.eval()
            self.model11.eval()
            self.model12.eval()
            self.model13.eval()
            self.model14.eval()
            self.model15.eval()
            self.model16.eval()
            self.model17.eval()
            self.model18.eval()
            self.model19.eval()
            self.model20.eval()
            self.model21.eval()
            self.model22.eval()
            self.model23.eval()
            self.model24.eval()
            self.model25.eval()
            self.model26.eval()
            self.model27.eval()
            self.model28.eval()
                
        self.relu = torch.nn.ReLU()
        self.sigmoid = torch.nn.Sigmoid()
        self.dropout = nn.Dropout(self.rate_dropout)
        self.batch_norm0 = nn.BatchNorm1d(60)
        self.batch_norm1 = nn.BatchNorm1d(5)
        
        self.dense0 = torch.nn.Linear(30, 60)
        self.dense1 = torch.nn.Linear(60, 5)
        self.dense2 = torch.nn.Linear(5, 1)

        
    def forward(self, x):
        feature_0 = torch.reshape(x[:, 0], (-1, 1))
        
        # xi is output of TagModeli
        x0 = self.model0(x)
        x1 = self.model1(x)
        x2 = self.model2(x)
        x3 = self.model3(x)
        x4 = self.model4(x)
        x5 = self.model5(x)
        x6 = self.model6(x)
        x7 = self.model7(x)
        x8 = self.model8(x)
        x9 = self.model9(x)
        x10 = self.model10(x)
        x11 = self.model11(x)
        x12 = self.model12(x)
        x13 = self.model13(x)
        x14 = self.model14(x)
        x15 = self.model15(x)
        x16 = self.model16(x)
        x17 = self.model17(x)
        x18 = self.model18(x)
        x19 = self.model19(x)
        x20 = self.model20(x)
        x21 = self.model21(x)
        x22 = self.model22(x)
        x23 = self.model23(x)
        x24 = self.model24(x)
        x25 = self.model25(x)
        x26 = self.model26(x)
        x27 = self.model27(x)
        x28 = self.model28(x)
        
        x = torch.cat(
            (
                feature_0,
                x0, x1, x2, x3, x4, x5, x6, x7, x8, x9,
                x10, x11, x12, x13, x14, x15, x16, x17, x18, x19,
                x20, x21, x22, x23, x24, x25, x26, x27, x28
            ), 1
        )
        
        x = self.dense0(x)
        x = self.relu(x)
        
        x = self.batch_norm0(x)
        x = self.dropout(x)
        x = self.dense1(x)
        
        x = self.batch_norm1(x)
        x = self.dropout(x)
        x = self.dense2(x)
        
        x = self.sigmoid(x)
        return x

**Training all TagModel**

In [6]:
# Model : architecture
batch_size = 16384
input_size = 130
output_size = 1

# Model : training
num_epochs = 100
learning_rate = 0.001
threshold =.5
es_mode = "min"
patience = 3

dic_records = build_dic_records(
            datasets=["train", "val"],
            records=["lst_loss_epoch", "lst_loss_batch", "lst_utility", "lst_accuracy", "lst_precision", "lst_recall"]
)

if TRAINING:

    # sample data
    data = data.sample(frac=1)
    data.reset_index(drop=True, inplace=True)

    train_index = [e for e in range(0, int(data.shape[0] * SIZE_TRAIN), 1)]
    val_index = [e for e in range(max(train_index), data.shape[0], 1)]

    # build sets for training
    train = data.loc[train_index]
    val = data.loc[val_index]
    train_set = BuildDataset(df=train.loc[train_index], col_x=LST_FEATURES, target=LST_TARGETS)
    val_set = BuildDataset(df=val.loc[val_index], col_x=LST_FEATURES, target=LST_TARGETS)
    train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size)
    val_loader = torch.utils.data.DataLoader(val_set, batch_size=batch_size)

    # compute and save perfect utility for each set
    perfect_train_utility = utility(dates=train.date.values, weights=train.weight.values, true_resp=train.resp.values, actions=(train.resp > 0).astype(int))
    perfect_val_utility = utility(dates=val.date.values, weights=val.weight.values, true_resp=val.resp.values, actions=(val.resp > 0).astype(int))
    dic_records["train"]["perfect_utility"] = perfect_train_utility
    dic_records["val"]["perfect_utility"] = perfect_val_utility
    dic_records["train"]["lst_index"] = train_index
    dic_records["val"]["lst_index"] = val_index

    # define utils for model
    torch.cuda.empty_cache()
    model = StackedModel(
        dic_tags=dic_tags, path_weights=PATH_WEIGHT_TAGMODEL, date_training_tag=DATE_TRAINING_TAGMODEL,train_previous_layers=TRAIN_PREVIOUS_LAYERS, device=DEVICE
    ).to(DEVICE)
    p1_utility_loss = P1UtilityLoss(threshold)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    path_model = f"./StackedModel_{DATE_NOW}.pt"
    early_stopping = EarlyStopping(mode=es_mode, patience=patience, verbose=True, path=path_model)

    pbar_epoch = tqdm(total=num_epochs, position=1)

    for epoch in range(num_epochs):

        dic_records["train"]["lst_loss_batch"] = []
        dic_records["val"]["lst_loss_batch"] = []
        train_outputs = np.empty(shape=(len(train)))
        val_outputs = np.empty(shape=(len(val)))

        pbar_batch = tqdm(total=len(train_loader), position=2)

        for i, train_batch in enumerate(train_loader):

            # Allow training <=> moving weights
            model.train()

            # Remember index on train data
            start_ind = i * batch_size
            end_ind = start_ind + batch_size

            # Send to GPU
            X = train_batch["features"].to(DEVICE)
            y = train_batch["label"].to(DEVICE)
            w = train_batch["weights"].to(DEVICE)
            r = train_batch["resps"].to(DEVICE)
            a = train_batch["actions"].to(DEVICE)

            # Forward pass
            outputs = model(X.float())
            loss = p1_utility_loss(a.to(DEVICE), outputs[:,0].to(DEVICE), w.to(DEVICE), r.to(DEVICE))
            # Backward and optimize
            optimizer.zero_grad() 
            loss.backward()
            optimizer.step()

            # Save 
            dic_records["train"]["lst_loss_batch"].append(loss.item())
            train_outputs[start_ind:end_ind] = outputs[:,0].cpu().detach().numpy()
            pbar_batch.update(1)

        # compute loss on validation
        model.eval()
        with torch.no_grad():
            for i_, val_batch in enumerate(val_loader):

                # Remember index on validation data
                start_ind_ = i_ * batch_size
                end_ind_ = start_ind_ + batch_size

                # Send to GPU
                X_ = val_batch["features"].to(DEVICE)
                y_ = val_batch["label"].to(DEVICE)
                w_ = val_batch["weights"].to(DEVICE)
                r_ = val_batch["resps"].to(DEVICE)
                a_ = val_batch["actions"].to(DEVICE)

                # Compute
                outputs_ = model(X_.float())
                loss_ = p1_utility_loss(a_.to(DEVICE), outputs_[:,0].to(DEVICE), w_.to(DEVICE), r_.to(DEVICE))
                # Save
                dic_records["val"]["lst_loss_batch"].append(loss_.item())
                val_outputs[start_ind_:end_ind_] = outputs_[:,0].cpu().detach().numpy()
        dic_records["train"]["lst_loss_epoch"].append(np.mean(dic_records["train"]["lst_loss_batch"]))
        dic_records["val"]["lst_loss_epoch"].append(np.mean(dic_records["val"]["lst_loss_batch"]))

        # Early Stopping on loss function
        train_actions = (train_outputs > threshold).astype(int)
        train_utility = utility(dates=train.date.values, weights=train.weight.values, true_resp=train.resp.values, actions=train_actions)
        train_accuracy = accuracy_score((train.resp > 0).astype(int), train_actions)
        train_precision = precision_score((train.resp > 0).astype(int), train_actions)
        train_recall = recall_score((train.resp > 0).astype(int), train_actions)
        dic_records["train"]["lst_utility"].append(train_utility)
        dic_records["train"]["lst_accuracy"].append(train_accuracy)
        dic_records["train"]["lst_precision"].append(train_precision)
        dic_records["train"]["lst_recall"].append(train_recall)
            
        val_actions = (val_outputs > threshold).astype(int)
        val_utility = utility(dates=val.date.values, weights=val.weight.values, true_resp=val.resp.values, actions=val_actions)
        val_accuracy = accuracy_score((val.resp > 0).astype(int), val_actions)
        val_precision = precision_score((val.resp > 0).astype(int), val_actions)
        val_recall = recall_score((val.resp > 0).astype(int), val_actions)
        dic_records["val"]["lst_utility"].append(val_utility)
        dic_records["val"]["lst_accuracy"].append(val_accuracy)
        dic_records["val"]["lst_precision"].append(val_precision)
        dic_records["val"]["lst_recall"].append(val_recall)
            
        msg1 = '~~~ Epoch [{}/{}], Loss train: {:.4f}, Loss val {:.4f}'.format(
                    epoch + 1,
                    num_epochs,
                    dic_records["train"]["lst_loss_epoch"][-1],
                    dic_records["val"]["lst_loss_epoch"][-1]
        )
        msg2 = '~~~ train utility: {:.4f}, val utility {:.4f}'.format(
                    dic_records["train"]["lst_utility"][-1],
                    dic_records["val"]["lst_utility"][-1]
        )
        pbar_epoch.update(1)
        pbar_epoch.write(msg1)
        pbar_epoch.write(msg2)

        early_stopping(dic_records["val"]["lst_loss_epoch"][-1], model)
        if early_stopping.early_stop:
            print("Early stopping")
            break

**Save dict**

In [7]:
if SAVE_DICT:
    with open("./dic_records_training_StackedModel.json", "w") as out:  
        json.dump(dic_records, out) 

**Load StackedModel**

In [8]:
# load model
torch.cuda.empty_cache()
model = StackedModel(
        dic_tags=dic_tags, path_weights=PATH_WEIGHT_TAGMODEL, date_training_tag=DATE_TRAINING_TAGMODEL,train_previous_layers=TRAIN_PREVIOUS_LAYERS, device=DEVICE
    ).to(DEVICE)
model.load_state_dict(torch.load(PATH_WEIGHT_STACKMODEL))
model.eval()

StackedModel(
  (model0): TagModel(
    (sigmoid): Sigmoid()
    (dropout): Dropout(p=0.1, inplace=False)
    (batch_norm0): BatchNorm1d(18, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (batch_norm1): BatchNorm1d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (dense0): Linear(in_features=18, out_features=36, bias=True)
    (dense1): Linear(in_features=36, out_features=1, bias=True)
    (relu): ReLU()
  )
  (model1): TagModel(
    (sigmoid): Sigmoid()
    (dropout): Dropout(p=0.1, inplace=False)
    (batch_norm0): BatchNorm1d(18, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (batch_norm1): BatchNorm1d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (dense0): Linear(in_features=18, out_features=36, bias=True)
    (dense1): Linear(in_features=36, out_features=1, bias=True)
    (relu): ReLU()
  )
  (model2): TagModel(
    (sigmoid): Sigmoid()
    (dropout): Dropout(p=0.1, inplace=False)
    (batch

**Submission**

In [9]:
import janestreet
env = janestreet.make_env()
env_iter = env.iter_test()

for (test_df, pred_df) in tqdm(env_iter):
    
    if test_df['weight'].values[0] > 0:
        x_tt = test_df.loc[:, LST_FEATURES].values
        
        if np.isnan(x_tt.sum()):
            x_tt = np.nan_to_num(x_tt) + np.isnan(x_tt) * f_mean.values.reshape(1, -1)
        
        pred = model(torch.tensor(x_tt, dtype=torch.float).to(DEVICE)).detach().cpu().numpy()
        int_pred = int(pred >= THRESHOLD)
        
        pred_df["action"].values[0] = int_pred
    
    else:
        pred_df["action"].values[0] = 0
    
    env.predict(pred_df)

|          | 0/? [00:00<?, ?it/s]