# Jane Street: Neural Network Starter

I try implementing a simple Tensorflow Keras neural network here. Train in Version 17.

**Caution:** The GroupCV method applied in this notebook may cause time leakage problem. Please use [Purged Time-Series CV][1] instead.

[1]: https://www.kaggle.com/marketneutral/purged-time-series-cv-xgboost-optuna

In [1]:
TRAINING = False

In [2]:
class LiteModel:
    
    @classmethod
    def from_file(cls, model_path):
        return LiteModel(tf.lite.Interpreter(model_path=model_path))
    
    @classmethod
    def from_keras_model(cls, kmodel):
        converter = tf.lite.TFLiteConverter.from_keras_model(kmodel)
        tflite_model = converter.convert()
        return LiteModel(tf.lite.Interpreter(model_content=tflite_model))
    
    def __init__(self, interpreter):
        self.interpreter = interpreter
        self.interpreter.allocate_tensors()
        input_det = self.interpreter.get_input_details()[0]
        output_det = self.interpreter.get_output_details()[0]
        self.input_index = input_det["index"]
        self.output_index = output_det["index"]
        self.input_shape = input_det["shape"]
        self.output_shape = output_det["shape"]
        self.input_dtype = input_det["dtype"]
        self.output_dtype = output_det["dtype"]
        
    def predict(self, inp):
        inp = inp.astype(self.input_dtype)
        count = inp.shape[0]
        out = np.zeros((count, self.output_shape[1]), dtype=self.output_dtype)
        for i in range(count):
            self.interpreter.set_tensor(self.input_index, inp[i:i+1])
            self.interpreter.invoke()
            out[i] = self.interpreter.get_tensor(self.output_index)[0]
        return out
    
    def predict_single(self, inp):
        """ Like predict(), but only for a single record. The input data can be a Python list. """
        inp = np.array([inp], dtype=self.input_dtype)
        self.interpreter.set_tensor(self.input_index, inp)
        self.interpreter.invoke()
        out = self.interpreter.get_tensor(self.output_index)
        return out[0]
    
    def predict_single2(self, inp):
        """ Like predict(), but only for a single record. The input data can be a Python list. """
        self.interpreter.set_tensor(self.input_index, inp)
        self.interpreter.invoke()
        out = self.interpreter.get_tensor(self.output_index)
        return out
    
    def predict_lstm(self, inp):
        self.interpreter.set_tensor(self.input_index, inp)
        self.interpreter.invoke()
        result = self.interpreter.get_tensor(self.output_index)
        return result[0,-1]

In [3]:
import warnings
warnings.filterwarnings('ignore')

#if TRAINING:
#    import cudf
#    import cupy as cp

import os, gc
import pandas as pd
import numpy as np
import xgboost as xgb
from hyperopt import hp, fmin, tpe, Trials
from hyperopt.pyll.base import scope
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.model_selection import GroupKFold
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm, trange
from joblib import dump, load

import tensorflow as tf
tf.random.set_seed(42)
import tensorflow.keras.backend as K
import tensorflow.keras.layers as layers
from tensorflow.keras.layers import Input, Dense, BatchNormalization, Dropout, Concatenate, Lambda, GaussianNoise, Activation
from tensorflow.keras.callbacks import Callback, ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers.experimental.preprocessing import Normalization


import numpy as np
from sklearn.model_selection import KFold
from sklearn.model_selection._split import _BaseKFold, indexable, _num_samples
from sklearn.utils.validation import _deprecate_positional_args


import numpy as np
from sklearn.model_selection import KFold
from sklearn.model_selection._split import _BaseKFold, indexable, _num_samples
from sklearn.utils.validation import _deprecate_positional_args
import kerastuner as kt

In [4]:
TARGET = 'action'
FEATS = ['feature_{}'.format(int(i)) for i in range(130)]
SEED = 42
FOLDS = 5

In [5]:
def create_autoencoder(input_dim,output_dim,noise=0.1):
    i = Input(130)
    mask = Input(130)
    encoded = BatchNormalization()(i)
    encoded = GaussianNoise(noise)(encoded)
    
    encoded = Dense(96, activation = 'elu')(encoded)
    encoded = Dense(64,activation='linear')(encoded)
    encoder = Model(inputs=i,outputs=encoded)
    
    return encoder

In [6]:
encoder = create_autoencoder(130, 5, noise=0.1)
encoder.load_weights('../input/js-cv-split2/encoder.hdf5')
encoder.trainable = False

In [7]:
def create_model1(input_dim,output_dim):
    inputs = Input(input_dim)
    
    #x = encoder(inputs)
    #x = Concatenate()([x,inputs]) #use both raw and encoded features
    x = BatchNormalization()(inputs)
    
    x = Dense(512)(x)
    x = BatchNormalization()(x)
    x = Lambda(tf.keras.activations.swish)(x)
    x = Dropout(0)(x)    
    
    x = Dense(512)(x)
    x = BatchNormalization()(x)
    x = Lambda(tf.keras.activations.swish)(x)
    x = Dropout(0)(x)
    
    x = Dense(300)(x)
    x = BatchNormalization()(x)
    x = Lambda(tf.keras.activations.swish)(x)
    x = Dropout(0)(x)
    
    
    x = Dense(64)(x)
    x = BatchNormalization()(x)
    x = Lambda(tf.keras.activations.swish)(x)
    x = Dropout(0)(x)
    
    x = Dense(output_dim,activation='sigmoid', name = 'label_output')(x)
    
    model = Model(inputs = inputs, outputs = x)
    return model

In [8]:


def create_model2(input_dim,output_dim):
    inputs = Input(input_dim)
    
    #x = encoder(inputs)
    #x = Concatenate()([x,inputs]) #use both raw and encoded features
    x = BatchNormalization()(inputs)

    x = Dense(438)(x)
    x = BatchNormalization()(x)
    x = Lambda(tf.keras.activations.swish)(x)
    x = Dropout(0)(x)

    x = Dense(420)(x)
    x = BatchNormalization()(x)
    x = Lambda(tf.keras.activations.swish)(x)
    x = Dropout(0)(x)

    x = Dense(64)(x)
    x = BatchNormalization()(x)
    x = Lambda(tf.keras.activations.swish)(x)
    x = Dropout(0)(x)
    

    x = Dense(output_dim,activation='sigmoid')(x)
    model = Model(inputs=inputs,outputs=x)
    model.compile(optimizer=Adam(0.0072342),loss=BinaryCrossentropy(label_smoothing=0.090004),metrics=[tf.keras.metrics.AUC(name = 'auc')])
    return model



In [9]:
models = []

model = create_model2(130 + 64 + 88, 5)
model.load_weights(f'../input/js-non-of-model-w-sae/model2_3.hdf5')
if not TRAINING:
    model = LiteModel.from_keras_model(model)
models.append(model)
    
model = create_model1(130 + 64 + 88, 5)
model.load_weights(f'../input/js-non-of-model-w-sae/model_4.hdf5')
if not TRAINING:
    model = LiteModel.from_keras_model(model)
models.append(model)
        
model = create_model1(130 + 64 + 88, 5)
model.load_weights(f'../input/js-non-of-model-w-sae/model_5.hdf5')
if not TRAINING:
    model = LiteModel.from_keras_model(model)
models.append(model)        

In [10]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, BatchNormalization, Input, Multiply, Add, Concatenate
from tensorflow.keras.activations import sigmoid, relu
import tensorflow.keras.backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.metrics import AUC
from tensorflow.keras.losses import binary_crossentropy
import tensorflow_addons as tfa
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras.activations import softmax


def build_tabnet_model(input_dim, transform_dim, N_a, N_d, num_decision_step, gamma, output_dim, B_v, m_d, Lambda, multiplier):
    # Initialization
    inputs = Input(input_dim)
    #x2 = encoder(x1)
    #x = Concatenate()([x1, x2])
    #P = tf.ones([tf.shape(x)[0], tf.shape(x)[1]]) #
    #d_out = tf.zeros([tf.shape(x)[0], N_d])
    #entropy = 0
    # pre-encoding
    
    x = BatchNormalization(virtual_batch_size=B_v, momentum = m_d)(inputs)
    feat_trans1 = Dense(transform_dim * 2, use_bias= False)(x)
    feat_trans1 = Dropout(0)(feat_trans1)
    feat_trans1 = BatchNormalization(virtual_batch_size=B_v, momentum = m_d)(feat_trans1)
    feat_trans1 = Multiply()([feat_trans1[:,:transform_dim], sigmoid(feat_trans1[:,transform_dim:])])
    
    feat_trans2 = Dense(transform_dim * 2, use_bias = False)(feat_trans1)
    feat_trans2 = Dropout(0)(feat_trans2)
    feat_trans2 = BatchNormalization(virtual_batch_size=B_v, momentum = m_d)(feat_trans2)
    feat_trans2 = Multiply()([feat_trans2[:,:transform_dim], sigmoid(feat_trans2[:,transform_dim:])])
    feat_trans2 = Add()([feat_trans2, feat_trans1])
    feat_trans2 *= tf.math.sqrt(0.5)
    
    feat_trans3 = Dense(transform_dim * 2, use_bias = False)(feat_trans2)
    feat_trans3 = Dropout(0)(feat_trans3)
    feat_trans3 = BatchNormalization(virtual_batch_size=B_v, momentum = m_d)(feat_trans3)
    feat_trans3 = Multiply()([feat_trans3[:,:transform_dim], sigmoid(feat_trans3[:,transform_dim:])])
    feat_trans3 = Add()([feat_trans3, feat_trans2])
    feat_trans3 *= tf.math.sqrt(0.5)
    
    feat_trans4 = Dense(transform_dim * 2, use_bias = False)(feat_trans3)
    feat_trans4 = Dropout(0)(feat_trans4)
    feat_trans4 = BatchNormalization(virtual_batch_size=B_v, momentum = m_d)(feat_trans4)
    feat_trans4 = Multiply()([feat_trans4[:,:transform_dim], sigmoid(feat_trans4[:,transform_dim:])])
    feat_trans4 = Add()([feat_trans4, feat_trans3])
    feat_trans4 *= tf.math.sqrt(0.5)
    
    for step in range(num_decision_step):
        #Attentive transform
        
        mask_value = Dense(input_dim, use_bias= False)(feat_trans4[:,-N_a:])
        mask_value = BatchNormalization(virtual_batch_size=B_v, momentum = m_d)(mask_value)
        if step == 0:
            mask_value = softmax(multiplier * mask_value)
            P = (gamma - mask_value)
        else:
            mask_value *= P
            mask_value = softmax(multiplier * mask_value)
            P *= (gamma - mask_value)

        masked_feature = Multiply()([mask_value, inputs])
        
        
        # Entropy is used to penalize the amount of sparsity in feature
        # selection.
        #entropy += tf.reduce_mean(tf.reduce_sum(-mask_value * tf.math.log(mask_value + 0.01), axis=1)) / (num_decision_step)
        
        
        # feature_transform
        feat_trans1 = Dense(transform_dim * 2, use_bias= False)(masked_feature)
        feat_trans1 = Dropout(0)(feat_trans1)
        feat_trans1 = BatchNormalization(virtual_batch_size=B_v, momentum = m_d)(feat_trans1)
        feat_trans1 = Multiply()([feat_trans1[:,:transform_dim], sigmoid(feat_trans1[:,transform_dim:])])
    
        feat_trans2 = Dense(transform_dim * 2, use_bias = False)(feat_trans1)
        feat_trans2 = Dropout(0)(feat_trans2)
        feat_trans2 = BatchNormalization(virtual_batch_size=B_v, momentum = m_d)(feat_trans2)
        feat_trans2 = Multiply()([feat_trans2[:,:transform_dim], sigmoid(feat_trans2[:,transform_dim:])])
        feat_trans2 = Add()([feat_trans2, feat_trans1])
        feat_trans2 *= tf.math.sqrt(0.5)
    
        feat_trans3 = Dense(transform_dim * 2, use_bias = False)(feat_trans2)
        feat_trans3 = Dropout(0)(feat_trans3)
        feat_trans3 = BatchNormalization(virtual_batch_size=B_v, momentum = m_d)(feat_trans3)
        feat_trans3 = Multiply()([feat_trans3[:,:transform_dim], sigmoid(feat_trans3[:,transform_dim:])])
        feat_trans3 = Add()([feat_trans3, feat_trans2])
        feat_trans3 *= tf.math.sqrt(0.5)
        
        feat_trans4 = Dense(transform_dim * 2, use_bias = False)(feat_trans3)
        feat_trans4 = Dropout(0)(feat_trans4)
        feat_trans4 = BatchNormalization(virtual_batch_size=B_v, momentum = m_d)(feat_trans4)
        feat_trans4 = Multiply()([feat_trans4[:,:transform_dim], sigmoid(feat_trans4[:,transform_dim:])])
        feat_trans4 = Add()([feat_trans4, feat_trans3])
        feat_trans4 *= tf.math.sqrt(0.5)
        
        # ouput
        if step == 0:
            d_out = relu(feat_trans4[:,:N_d])
        else:
            d_out += relu(feat_trans4[:,:N_d])
        
    y1 = Dense(output_dim, activation = 'sigmoid', name = 'label_out')(d_out)
    
    model = Model(inputs = inputs, outputs = y1)
    return model

In [11]:
model = build_tabnet_model(130 + 64 + 88, 32, 16, 16, 3, 1.2, 5, None, 0.8, 0, 6)
model.load_weights(f'../input/js-non-of-model-w-sae/tabnet_model_42_3_multiplier_6_version2.hdf5')
if not TRAINING:
    model = LiteModel.from_keras_model(model)
models.append(model)

model = build_tabnet_model(130 + 64 + 88, 32, 16, 16, 3, 1.2, 5, None, 0.8, 0, 4)
model.load_weights(f'../input/js-non-of-model-w-sae/tabnet_model_42_4_multiplier_4_version4.hdf5')
if not TRAINING:
    model = LiteModel.from_keras_model(model)
models.append(model)

model = build_tabnet_model(130 + 64 + 88, 32, 16, 16, 3, 1.2, 5, None, 0.8, 0, 4)
model.load_weights(f'../input/js-non-of-model-w-sae/tabnet_model_42_5_multiplier_4_version8.hdf5')
if not TRAINING:
    model = LiteModel.from_keras_model(model)
models.append(model)

In [12]:
from tensorflow.keras.layers import Conv1D, AveragePooling1D, MaxPooling1D, Flatten, Multiply
import tensorflow as tf
from tensorflow.keras.layers import Dense, BatchNormalization, Input, Multiply, Add, Concatenate, Softmax
from tensorflow.keras.activations import sigmoid, relu
import tensorflow.keras.backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.metrics import AUC
from tensorflow.keras.losses import binary_crossentropy
import tensorflow_addons as tfa
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras.activations import softmax

In [13]:


def scaled_dot_product_attention(q, k, v, mask):
    """Calculate the attention weights.
    q, k, v must have matching leading dimensions.
    k, v must have matching penultimate dimension, i.e.: seq_len_k = seq_len_v.
    The mask has different shapes depending on its type(padding or look ahead) 
    but it must be broadcastable for addition.

    Args:
      q: query shape == (..., seq_len_q, depth)
      k: key shape == (..., seq_len_k, depth)
      v: value shape == (..., seq_len_v, depth_v)
      mask: Float tensor with shape broadcastable 
            to (..., seq_len_q, seq_len_k). Defaults to None.

    Returns:
      output, attention_weights
    """

    matmul_qk = tf.matmul(q, k, transpose_b=True)  # (..., seq_len_q, seq_len_k)

    # scale matmul_qk
    dk = tf.cast(tf.shape(k)[-1], tf.float32)
    scaled_attention_logits = matmul_qk / tf.math.sqrt(dk)

    # add the mask to the scaled tensor.
    if mask is not None:
        scaled_attention_logits += (mask * -1e9)  

    # softmax is normalized on the last axis (seq_len_k) so that the scores
    # add up to 1.
    attention_weights = Softmax()(scaled_attention_logits)  # (..., seq_len_q, seq_len_k)

    output = tf.matmul(attention_weights, v)  # (..., seq_len_q, depth_v)

    return output, attention_weights



In [14]:
def transformer_model(input_dim1, window_size, d_model, rate, num_layers, dff, num_heads, output_dim):
    inputs = Input(input_dim1)
    batch_size = tf.shape(inputs)[0]
    #mask = create_look_ahead_mask(seq)
    x = BatchNormalization()(inputs)
    x = Dense(d_model * window_size, use_bias = False)(x)
    x = tf.reshape(x, (batch_size, window_size, d_model))
    
    #x *= tf.math.sqrt(tf.cast(d_model, tf.float32))
    #x += positional_encoding(seq, d_model)
    #x = Dropout(rate)(x)
    
    for i in range(num_layers):
        
        #multihead_attention
        q = Dense(d_model)(x)
        k = Dense(d_model)(x)
        v = Dense(d_model)(x)
        
        q = tf.reshape(q, (batch_size, -1, num_heads, d_model//num_heads))
        q = tf.transpose(q, perm=[0, 2, 1, 3])
        
        k = tf.reshape(k, (batch_size, -1, num_heads, d_model//num_heads))
        k = tf.transpose(k, perm=[0, 2, 1, 3])
        
        v = tf.reshape(v, (batch_size, -1, num_heads, d_model//num_heads))
        v = tf.transpose(v, perm=[0, 2, 1, 3])
        
        attn_out, _ = scaled_dot_product_attention(q, k, v, None)
        attn_out = tf.transpose(attn_out, perm=[0, 2, 1, 3])  
        attn_out = tf.reshape(x, (batch_size, -1, d_model))  
        attn_out = Dense(d_model)(attn_out)  
        attn_out = Dropout(rate)(attn_out)
        attn_out = BatchNormalization()(attn_out + x)
        
        fnn_out = Dense(dff, activation= 'relu')(attn_out)  # (batch_size, seq_len, dff)
        fnn_out = Dense(d_model)(fnn_out)
        fnn_out = Dropout(rate)(fnn_out)
        x = BatchNormalization()(fnn_out + attn_out)
        if i <= 1:
            x = AveragePooling1D(2)(x)
        else:
            x = MaxPooling1D(2)(x)
    
    x = Dense(64)(x)
    x = Flatten()(x)
    #x = BatchNormalization()(x)
    x = Dropout(rate)(x)
    x = Dense(output_dim, activation = 'sigmoid', name = 'label_out')(x)
    model = Model(inputs = inputs, outputs = x)    
    return model

In [15]:
def transformer_model2(input_dim1, window_size, d_model, rate, num_layers, dff, num_heads, output_dim):
    inputs = Input(input_dim1)
    batch_size = tf.shape(inputs)[0]
    #mask = create_look_ahead_mask(seq)
    x = BatchNormalization()(inputs)
    x = Dense(d_model * window_size, use_bias = False)(x)
    x = tf.reshape(x, (batch_size, window_size, d_model))
    
    #x *= tf.math.sqrt(tf.cast(d_model, tf.float32))
    #x += positional_encoding(seq, d_model)
    #x = Dropout(rate)(x)
    
    for i in range(num_layers):
        
        #multihead_attention
        q = Dense(d_model)(x)
        k = Dense(d_model)(x)
        v = Dense(d_model)(x)
        
        q = tf.reshape(q, (batch_size, -1, num_heads, d_model//num_heads))
        q = tf.transpose(q, perm=[0, 2, 1, 3])
        
        k = tf.reshape(k, (batch_size, -1, num_heads, d_model//num_heads))
        k = tf.transpose(k, perm=[0, 2, 1, 3])
        
        v = tf.reshape(v, (batch_size, -1, num_heads, d_model//num_heads))
        v = tf.transpose(v, perm=[0, 2, 1, 3])
        
        attn_out, _ = scaled_dot_product_attention(q, k, v, None)
        attn_out = tf.transpose(attn_out, perm=[0, 2, 1, 3])  
        attn_out = tf.reshape(x, (batch_size, -1, d_model))  
        attn_out = Dense(d_model)(attn_out)  
        attn_out = Dropout(rate)(attn_out)
        attn_out = BatchNormalization()(attn_out + x)
        
        fnn_out = Dense(dff, activation= 'relu')(attn_out)  # (batch_size, seq_len, dff)
        fnn_out = Dense(d_model)(fnn_out)
        fnn_out = Dropout(rate)(fnn_out)
        x = BatchNormalization()(fnn_out + attn_out)
        if i <= 1:
            x = MaxPooling1D(2)(x)
        else:
            x = MaxPooling1D(2)(x)
    
    x = Dense(64)(x)
    x = Flatten()(x)
    #x = BatchNormalization()(x)
    x = Dropout(rate)(x)
    x = Dense(output_dim, activation = 'sigmoid', name = 'label_out')(x)
    
    model = Model(inputs = inputs, outputs = x)    
    return model

In [16]:
model = transformer_model(130 + 64 +88, 16, 128, 0, 3, 256, 4, 5)
model.load_weights(f'../input/js-non-of-model-w-sae/transformer_no_imputer_34_version2.hdf5')
if not TRAINING:
    model = LiteModel.from_keras_model(model)
models.append(model)

model = transformer_model2(130 + 64 + 88, 16, 128, 0, 3, 256, 4, 5)
model.load_weights(f'../input/js-non-of-model-w-sae/transformer_no_imputer_45_version5.hdf5')
if not TRAINING:
    model = LiteModel.from_keras_model(model)
models.append(model)

model = transformer_model2(130 + 64 +88, 16, 128, 0, 3, 256, 4, 5)
model.load_weights(f'../input/js-non-of-model-w-sae/transformer_no_imputer_55_version4.hdf5')
if not TRAINING:
    model = LiteModel.from_keras_model(model)
models.append(model)

In [17]:
from tensorflow.keras.layers import Conv1D, AveragePooling1D, MaxPooling1D, Flatten, Multiply, Add

input_dim = 130 + 64 + 88
output_dim = 5
window_size = 16
cha1 = 128
cha2 = 256
rate = 0.0


def conv_model(input_dim, output_dim, window_size, cha1, cha2, rate):
    inputs = Input(input_dim)
    x = BatchNormalization()(inputs)
    x = Dense(window_size * cha1, use_bias = False)(x)
    x = tf.reshape(x, (-1, window_size, cha1))
    x = BatchNormalization()(x)
    
    for i in range(3):
        #x = BatchNormalization()(x)
        x = Dropout(rate)(x)
        x = Conv1D(cha1, 3, activation = tf.keras.activations.swish, padding = 'causal')(x)
        temp_x = x
        
        x = BatchNormalization()(x)
        x = Dropout(rate)(x)
        x = Conv1D(cha2, 3, activation = tf.keras.activations.swish, padding = 'causal')(x)

        x = BatchNormalization()(x)
        x = Dropout(rate)(x)
        x = Conv1D(cha1, 3, activation = tf.keras.activations.swish, padding = 'causal')(x)
        
        x = Add()([temp_x,x])
        x = BatchNormalization()(x)
        x = MaxPooling1D(2)(x)
        
    x = Dense(64)(x)    
    x = Flatten()(x)
    x = BatchNormalization()(x)
    x = Dropout(rate)(x)
    x = Dense(output_dim, activation = 'sigmoid', name = 'label_out')(x)

    
    model = Model(inputs =inputs, outputs = x) 
    
    return model

In [18]:
model = conv_model(input_dim, output_dim, window_size, cha1, cha2, rate)
model.load_weights(f'../input/js-non-of-model-w-sae/conv_model3_42_3.hdf5')
if not TRAINING:
    model = LiteModel.from_keras_model(model)
models.append(model)    

model = conv_model(input_dim, output_dim, window_size, cha1, cha2, rate)
model.load_weights(f'../input/js-non-of-model-w-sae/conv_model5_42_4.hdf5')
if not TRAINING:
    model = LiteModel.from_keras_model(model)
models.append(model)    

model = conv_model(input_dim, output_dim, window_size, cha1, cha2, rate)
model.load_weights(f'../input/js-non-of-model-w-sae/conv_model4_42_5.hdf5')
if not TRAINING:
    model = LiteModel.from_keras_model(model)
models.append(model)    

In [19]:
def create_model(input_dim,output_dim):
    inputs = Input(input_dim)
    
    #x = encoder(inputs)
    #x = Concatenate()([x,inputs]) #use both raw and encoded features
    x = BatchNormalization()(inputs)
    x = Dropout(0.2)(x)
    
    
    x1 = Dense(256)(x)
    x1 = BatchNormalization()(x1)
    x1 = Lambda(tf.keras.activations.swish)(x1)
    x1 = Dropout(0.2)(x1)    
    
    x2 = Concatenate()([x1, x])
    x2 = Dense(256)(x2)
    x2 = BatchNormalization()(x2)
    x2 = Lambda(tf.keras.activations.swish)(x2)
    x2 = Dropout(0.2)(x2)

    x3 = Concatenate()([x2, x1])
    x3 = Dense(256)(x3)
    x3 = BatchNormalization()(x3)
    x3 = Lambda(tf.keras.activations.swish)(x3)
    x3 = Dropout(0.2)(x3)
    
    x4 = Concatenate()([x3, x2])
    x4 = Dense(256)(x4)
    x4 = BatchNormalization()(x4)
    x4 = Lambda(tf.keras.activations.swish)(x4)
    x4 = Dropout(0.2)(x4)
    
    x = Concatenate()([x4, x3])
    
    x = Dense(output_dim,activation='sigmoid', name = 'label_output')(x)
    
    model = Model(inputs = inputs, outputs = x)
    return model

In [20]:
for f in range(3,6):
    model = create_model(130 + 64 + 88, 5)
    model.load_weights(f'../input/js-non-of-model-w-sae/resnet_model_{f}.hdf5')
    if not TRAINING:
        model = LiteModel.from_keras_model(model)
    models.append(model)    

In [21]:
if TRAINING:
    print('Loading...')
    train = pd.read_csv('/kaggle/input/jane-street-market-prediction/train.csv')
    TARGET = 'action'
    FEATS = ['feature_{}'.format(int(i)) for i in range(130)]

    print('Filling...')
    train = train.query('weight > 0').reset_index(drop = True)
    resp_cols = ['resp_1', 'resp_2', 'resp_3', 'resp', 'resp_4']
    y = np.stack([(train[c] > 0.00000).astype('int') for c in resp_cols]).T
    X = train[FEATS].to_numpy()
    wr = train.weight*train['resp'].to_numpy()
    date = train['date'].values
    print('Finish.')

In [22]:
nan_feat_bool = pd.read_pickle('../input/nn-model-wpt/nfb.pkl')

In [23]:
if TRAINING:
    with open('../input/js-cv-split2/f_mean.npy', 'rb') as f:
        f_mean = np.load(f)
    mask2 = np.isnan(X[:,nan_feat_bool]).astype(int)
    X = np.nan_to_num(X) + np.isnan(X).astype(int) * f_mean
    del(train)
    _= gc.collect()
    splits = pd.read_pickle('../input/js-cv-split2/cross_validation.pkl')

In [24]:
if TRAINING:
    X_test = X[splits[3][1]]
    y_test = y[splits[3][1]]
    mask_test = mask2[splits[3][1]]
    encoded_X_test = encoder(X_test).numpy()
    X_test = np.concatenate((X_test, encoded_X_test, mask_test), axis = -1)
    date_test = date[splits[3][1]]
    wr_test = wr[splits[3][1]]
    del(mask_test)
    _= gc.collect()

In [25]:
def metrics(y_true, y_pred):
    Pi = np.bincount(y_true, y_pred)
    unique_y_true = np.unique(y_true).tolist()
    Pi = Pi[unique_y_true]
    t = np.sum(Pi) / np.sqrt(np.sum(Pi ** 2)) * np.sqrt(250 / len(Pi))
    u = min(max(t, 0), 6) * np.sum(Pi)
    return u

In [26]:
if TRAINING:
    Y_hat = [model.predict(X_test, batch_size = 10000) for model in models]

In [27]:
if TRAINING:
    best3 = 0
    for i in range(10000):
        x1 = np.random.uniform()
        x2 = np.random.uniform()
        x3 = np.random.uniform()
        x4 = np.random.uniform()
        x5 = np.random.uniform()
        #x3 = np.random.uniform(0.48,0.52)
        x = x1 + x2 + x3 + x4 + x5
        x1 /= x
        x2 /= x
        x3 /= x
        x4 /= x
        x5 /= x
        score3 = metrics(date_test,wr_test*np.where(np.median(x1 * Y_hat[0] + x2 * Y_hat[3] + x3*Y_hat[6] + x4 * Y_hat[9] + x5 * Y_hat[12], axis = 1) > 0.5,1,0) )
        if score3 > best3:
            best3 = score3
            print(x1, x2, x3, x4, x5, best3)

In [28]:
if TRAINING:
    best3 = 0
    for i in range(10000):
        x1 = np.random.uniform()
        x2 = np.random.uniform()
        x3 = np.random.uniform()
        x4 = np.random.uniform()
        x5 = np.random.uniform()
        #x3 = np.random.uniform(0.48,0.52)
        x = x1 + x2 + x3 + x4 + x5
        x1 /= x
        x2 /= x
        x3 /= x
        x4 /= x
        x5 /= x
        score3 = metrics(date_test,wr_test*np.where(np.median(x1 * Y_hat[0] + x2 * Y_hat[3] + x3*Y_hat[6] + x4 * Y_hat[9] + x5 * Y_hat[12], axis = 1) > 0.501,1,0) )
        if score3 > best3:
            best3 = score3
            print(x1, x2, x3, x4, x5, best3)

In [29]:
if TRAINING:
    best3 = 0
    for i in range(10000):
        x1 = np.random.uniform()
        x2 = np.random.uniform()
        x3 = np.random.uniform()
        x4 = np.random.uniform()
        x5 = np.random.uniform()
        #x3 = np.random.uniform(0.48,0.52)
        x = x1 + x2 + x3 + x4 + x5
        x1 /= x
        x2 /= x
        x3 /= x
        x4 /= x
        x5 /= x
        score3 = metrics(date_test,wr_test*np.where(np.median(x1 * Y_hat[0] + x2 * Y_hat[3] + x3*Y_hat[6] + x4 * Y_hat[9] + x5 * Y_hat[12], axis = 1) > 0.502,1,0) )
        if score3 > best3:
            best3 = score3
            print(x1, x2, x3, x4, x5, best3)

In [30]:
if TRAINING:
    best3 = 0
    for i in range(10000):
        x1 = np.random.uniform()
        x2 = np.random.uniform()
        x3 = np.random.uniform()
        x4 = np.random.uniform()
        x5 = np.random.uniform()
        #x3 = np.random.uniform(0.48,0.52)
        x = x1 + x2 + x3 + x4 + x5
        x1 /= x
        x2 /= x
        x3 /= x
        x4 /= x
        x5 /= x
        score3 = metrics(date_test,wr_test*np.where(np.median(x1 * Y_hat[0] + x2 * Y_hat[3] + x3*Y_hat[6] + x4 * Y_hat[9] + x5 * Y_hat[12], axis = 1) > 0.503,1,0) )
        if score3 > best3:
            best3 = score3
            print(x1, x2, x3, x4, x5, best3)

In [31]:
if TRAINING:
    best3 = 0
    for i in range(10000):
        x1 = np.random.uniform()
        x2 = np.random.uniform()
        x3 = np.random.uniform()
        x4 = np.random.uniform()
        x5 = np.random.uniform()
        #x3 = np.random.uniform(0.48,0.52)
        x = x1 + x2 + x3 + x4 + x5
        x1 /= x
        x2 /= x
        x3 /= x
        x4 /= x
        x5 /= x
        score3 = metrics(date_test,wr_test*np.where(np.median(x1 * Y_hat[0] + x2 * Y_hat[3] + x3*Y_hat[6] + x4 * Y_hat[9] + x5 * Y_hat[12], axis = 1) > 0.504,1,0) )
        if score3 > best3:
            best3 = score3
            print(x1, x2, x3, x4, x5, best3)

In [32]:
if TRAINING:
    best3 = 0
    for i in range(10000):
        x1 = np.random.uniform()
        x2 = np.random.uniform()
        x3 = np.random.uniform()
        x4 = np.random.uniform()
        x5 = np.random.uniform()
        #x3 = np.random.uniform(0.48,0.52)
        x = x1 + x2 + x3 + x4 + x5
        x1 /= x
        x2 /= x
        x3 /= x
        x4 /= x
        x5 /= x
        score3 = metrics(date_test,wr_test*np.where(np.median(x1 * Y_hat[0] + x2 * Y_hat[3] + x3*Y_hat[6] + x4 * Y_hat[9] + x5 * Y_hat[12], axis = 1) > 0.505,1,0) )
        if score3 > best3:
            best3 = score3
            print(x1, x2, x3, x4, x5, best3)

In [33]:
if TRAINING:
    best3 = 0
    for i in range(10000):
        x1 = np.random.uniform()
        x2 = np.random.uniform()
        x3 = np.random.uniform()
        x4 = np.random.uniform()
        x5 = np.random.uniform()
        #x3 = np.random.uniform(0.48,0.52)
        x = x1 + x2 + x3 + x4 + x5
        x1 /= x
        x2 /= x
        x3 /= x
        x4 /= x
        x5 /= x
        score3 = metrics(date_test,wr_test*np.where(np.median(x1 * Y_hat[0] + x2 * Y_hat[3] + x3*Y_hat[6] + x4 * Y_hat[9] + x5 * Y_hat[12], axis = 1) > 0.506,1,0) )
        if score3 > best3:
            best3 = score3
            print(x1, x2, x3, x4, x5, best3)

In [34]:
if TRAINING:
    best3 = 0
    for i in range(10000):
        x1 = np.random.uniform()
        x2 = np.random.uniform()
        x3 = np.random.uniform()
        x4 = np.random.uniform()
        x5 = np.random.uniform()
        #x3 = np.random.uniform(0.48,0.52)
        x = x1 + x2 + x3 + x4 + x5
        x1 /= x
        x2 /= x
        x3 /= x
        x4 /= x
        x5 /= x
        score3 = metrics(date_test,wr_test*np.where(np.median(x1 * Y_hat[0] + x2 * Y_hat[3] + x3*Y_hat[6] + x4 * Y_hat[9] + x5 * Y_hat[12], axis = 1) > 0.507,1,0) )
        if score3 > best3:
            best3 = score3
            print(x1, x2, x3, x4, x5, best3)

In [35]:
if TRAINING:
    best3 = 0
    for i in range(10000):
        x1 = np.random.uniform()
        x2 = np.random.uniform()
        x3 = np.random.uniform()
        x4 = np.random.uniform()
        x5 = np.random.uniform()
        #x3 = np.random.uniform(0.48,0.52)
        x = x1 + x2 + x3 + x4 + x5
        x1 /= x
        x2 /= x
        x3 /= x
        x4 /= x
        x5 /= x
        score3 = metrics(date_test,wr_test*np.where(np.median(x1 * Y_hat[0] + x2 * Y_hat[3] + x3*Y_hat[6] + x4 * Y_hat[9] + x5 * Y_hat[12], axis = 1) > 0.508,1,0) )
        if score3 > best3:
            best3 = score3
            print(x1, x2, x3, x4, x5, best3)

In [36]:
if TRAINING:
    X_test = X[splits[4][1]]
    y_test = y[splits[4][1]]
    mask_test = mask2[splits[4][1]]
    encoded_X_test = encoder(X_test).numpy()
    X_test = np.concatenate((X_test, encoded_X_test, mask_test), axis = -1)
    date_test = date[splits[4][1]]
    wr_test = wr[splits[4][1]]
    del(mask_test)
    _= gc.collect()

In [37]:
if TRAINING:
    Y_hat1 = [model.predict(X_test, batch_size = 10000) for model in models]

In [38]:
if TRAINING:
    best3 = 0
    for i in range(10000):
        x1 = np.random.uniform()
        x2 = np.random.uniform()
        x3 = np.random.uniform()
        x4 = np.random.uniform()
        x5 = np.random.uniform()
        #x3 = np.random.uniform(0.48,0.52)
        x = x1 + x2 + x3 + x4 + x5
        x1 /= x
        x2 /= x
        x3 /= x
        x4 /= x
        x5 /= x
        
        score3 = metrics(date_test,wr_test*np.where(np.median(x1 * Y_hat1[1] + x2 * Y_hat1[4] + x3*Y_hat1[7] + x4 * Y_hat1[10] + x5 * Y_hat1[13], axis = 1) > 0.5,1,0) )
        if score3 > best3:
            best3 = score3
            print(x1, x2, x3, x4, x5, best3)

In [39]:
if TRAINING:
    best3 = 0
    for i in range(10000):
        x1 = np.random.uniform()
        x2 = np.random.uniform()
        x3 = np.random.uniform()
        x4 = np.random.uniform()
        x5 = np.random.uniform()
        #x3 = np.random.uniform(0.48,0.52)
        x = x1 + x2 + x3 + x4 + x5
        x1 /= x
        x2 /= x
        x3 /= x
        x4 /= x
        x5 /= x
        
        score3 = metrics(date_test,wr_test*np.where(np.median(x1 * Y_hat1[1] + x2 * Y_hat1[4] + x3*Y_hat1[7] + x4 * Y_hat1[10] + x5 * Y_hat1[13], axis = 1) > 0.501,1,0) )
        if score3 > best3:
            best3 = score3
            print(x1, x2, x3, x4, x5, best3)

In [40]:
if TRAINING:
    best3 = 0
    for i in range(10000):
        x1 = np.random.uniform()
        x2 = np.random.uniform()
        x3 = np.random.uniform()
        x4 = np.random.uniform()
        x5 = np.random.uniform()
        #x3 = np.random.uniform(0.48,0.52)
        x = x1 + x2 + x3 + x4 + x5
        x1 /= x
        x2 /= x
        x3 /= x
        x4 /= x
        x5 /= x
        
        score3 = metrics(date_test,wr_test*np.where(np.median(x1 * Y_hat1[1] + x2 * Y_hat1[4] + x3*Y_hat1[7] + x4 * Y_hat1[10] + x5 * Y_hat1[13], axis = 1) > 0.502,1,0) )
        if score3 > best3:
            best3 = score3
            print(x1, x2, x3, x4, x5, best3)

In [41]:
if TRAINING:
    best3 = 0
    for i in range(10000):
        x1 = np.random.uniform()
        x2 = np.random.uniform()
        x3 = np.random.uniform()
        x4 = np.random.uniform()
        x5 = np.random.uniform()
        #x3 = np.random.uniform(0.48,0.52)
        x = x1 + x2 + x3 + x4 + x5
        x1 /= x
        x2 /= x
        x3 /= x
        x4 /= x
        x5 /= x
        
        score3 = metrics(date_test,wr_test*np.where(np.median(x1 * Y_hat1[1] + x2 * Y_hat1[4] + x3*Y_hat1[7] + x4 * Y_hat1[10] + x5 * Y_hat1[13], axis = 1) > 0.503,1,0) )
        if score3 > best3:
            best3 = score3
            print(x1, x2, x3, x4, x5, best3)

In [42]:
if TRAINING:
    best3 = 0
    for i in range(10000):
        x1 = np.random.uniform()
        x2 = np.random.uniform()
        x3 = np.random.uniform()
        x4 = np.random.uniform()
        x5 = np.random.uniform()
        #x3 = np.random.uniform(0.48,0.52)
        x = x1 + x2 + x3 + x4 + x5
        x1 /= x
        x2 /= x
        x3 /= x
        x4 /= x
        x5 /= x
        
        score3 = metrics(date_test,wr_test*np.where(np.median(x1 * Y_hat1[1] + x2 * Y_hat1[4] + x3*Y_hat1[7] + x4 * Y_hat1[10] + x5 * Y_hat1[13], axis = 1) > 0.504,1,0) )
        if score3 > best3:
            best3 = score3
            print(x1, x2, x3, x4, x5, best3)

In [43]:
if TRAINING:
    best3 = 0
    for i in range(10000):
        x1 = np.random.uniform()
        x2 = np.random.uniform()
        x3 = np.random.uniform()
        x4 = np.random.uniform()
        x5 = np.random.uniform()
        #x3 = np.random.uniform(0.48,0.52)
        x = x1 + x2 + x3 + x4 + x5
        x1 /= x
        x2 /= x
        x3 /= x
        x4 /= x
        x5 /= x
        
        score3 = metrics(date_test,wr_test*np.where(np.median(x1 * Y_hat1[1] + x2 * Y_hat1[4] + x3*Y_hat1[7] + x4 * Y_hat1[10] + x5 * Y_hat1[13], axis = 1) > 0.505,1,0) )
        if score3 > best3:
            best3 = score3
            print(x1, x2, x3, x4, x5, best3)

In [44]:
if TRAINING:
    best3 = 0
    for i in range(10000):
        x1 = np.random.uniform()
        x2 = np.random.uniform()
        x3 = np.random.uniform()
        x4 = np.random.uniform()
        x5 = np.random.uniform()
        #x3 = np.random.uniform(0.48,0.52)
        x = x1 + x2 + x3 + x4 + x5
        x1 /= x
        x2 /= x
        x3 /= x
        x4 /= x
        x5 /= x
        
        score3 = metrics(date_test,wr_test*np.where(np.median(x1 * Y_hat1[1] + x2 * Y_hat1[4] + x3*Y_hat1[7] + x4 * Y_hat1[10] + x5 * Y_hat1[13], axis = 1) > 0.506,1,0) )
        if score3 > best3:
            best3 = score3
            print(x1, x2, x3, x4, x5, best3)

In [45]:
if TRAINING:
    best3 = 0
    for i in range(10000):
        x1 = np.random.uniform()
        x2 = np.random.uniform()
        x3 = np.random.uniform()
        x4 = np.random.uniform()
        x5 = np.random.uniform()
        #x3 = np.random.uniform(0.48,0.52)
        x = x1 + x2 + x3 + x4 + x5
        x1 /= x
        x2 /= x
        x3 /= x
        x4 /= x
        x5 /= x
        
        score3 = metrics(date_test,wr_test*np.where(np.median(x1 * Y_hat1[1] + x2 * Y_hat1[4] + x3*Y_hat1[7] + x4 * Y_hat1[10] + x5 * Y_hat1[13], axis = 1) > 0.507,1,0) )
        if score3 > best3:
            best3 = score3
            print(x1, x2, x3, x4, x5, best3)

In [46]:
if TRAINING:
    best3 = 0
    for i in range(10000):
        x1 = np.random.uniform()
        x2 = np.random.uniform()
        x3 = np.random.uniform()
        x4 = np.random.uniform()
        x5 = np.random.uniform()
        #x3 = np.random.uniform(0.48,0.52)
        x = x1 + x2 + x3 + x4 + x5
        x1 /= x
        x2 /= x
        x3 /= x
        x4 /= x
        x5 /= x
        
        score3 = metrics(date_test,wr_test*np.where(np.median(x1 * Y_hat1[1] + x2 * Y_hat1[4] + x3*Y_hat1[7] + x4 * Y_hat1[10] + x5 * Y_hat1[13], axis = 1) > 0.508,1,0) )
        if score3 > best3:
            best3 = score3
            print(x1, x2, x3, x4, x5, best3)

In [47]:
if TRAINING:
    X_test = X[splits[5][1]]
    y_test = y[splits[5][1]]
    mask_test = mask2[splits[5][1]]
    encoded_X_test = encoder(X_test).numpy()
    X_test = np.concatenate((X_test, encoded_X_test, mask_test), axis = -1)
    date_test = date[splits[5][1]]
    wr_test = wr[splits[5][1]]
    del(mask_test)
    _= gc.collect()

In [48]:
if TRAINING:
    Y_hat2 = [model.predict(X_test, batch_size = 10000) for model in models]

In [49]:
if TRAINING:
    best3 = 0
    for i in range(10000):
        x1 = np.random.uniform()
        x2 = np.random.uniform()
        x3 = np.random.uniform()
        x4 = np.random.uniform()
        x5 = np.random.uniform()
        #x3 = np.random.uniform(0.48,0.52)
        x = x1 + x2 + x3 + x4 + x5
        x1 /= x
        x2 /= x
        x3 /= x
        x4 /= x
        x5 /= x
        
        score3 = metrics(date_test,wr_test*np.where(np.median(x1 * Y_hat2[2] + x2 * Y_hat2[5] + x3*Y_hat2[8] + x4 * Y_hat2[11] + x5 * Y_hat2[14], axis = 1) > 0.5,1,0) )
        if score3 > best3:
            best3 = score3
            print(x1, x2, x3, x4, x5, best3)

In [50]:
if TRAINING:
    best3 = 0
    for i in range(10000):
        x1 = np.random.uniform()
        x2 = np.random.uniform()
        x3 = np.random.uniform()
        x4 = np.random.uniform()
        x5 = np.random.uniform()
        #x3 = np.random.uniform(0.48,0.52)
        x = x1 + x2 + x3 + x4 + x5
        x1 /= x
        x2 /= x
        x3 /= x
        x4 /= x
        x5 /= x
        
        score3 = metrics(date_test,wr_test*np.where(np.median(x1 * Y_hat2[2] + x2 * Y_hat2[5] + x3*Y_hat2[8] + x4 * Y_hat2[11] + x5 * Y_hat2[14], axis = 1) > 0.501,1,0) )
        if score3 > best3:
            best3 = score3
            print(x1, x2, x3, x4, x5, best3)

In [51]:
if TRAINING:
    best3 = 0
    for i in range(10000):
        x1 = np.random.uniform()
        x2 = np.random.uniform()
        x3 = np.random.uniform()
        x4 = np.random.uniform()
        x5 = np.random.uniform()
        #x3 = np.random.uniform(0.48,0.52)
        x = x1 + x2 + x3 + x4 + x5
        x1 /= x
        x2 /= x
        x3 /= x
        x4 /= x
        x5 /= x
        
        score3 = metrics(date_test,wr_test*np.where(np.median(x1 * Y_hat2[2] + x2 * Y_hat2[5] + x3*Y_hat2[8] + x4 * Y_hat2[11] + x5 * Y_hat2[14], axis = 1) > 0.502,1,0) )
        if score3 > best3:
            best3 = score3
            print(x1, x2, x3, x4, x5, best3)

In [52]:
if TRAINING:
    best3 = 0
    for i in range(10000):
        x1 = np.random.uniform()
        x2 = np.random.uniform()
        x3 = np.random.uniform()
        x4 = np.random.uniform()
        x5 = np.random.uniform()
        #x3 = np.random.uniform(0.48,0.52)
        x = x1 + x2 + x3 + x4 + x5
        x1 /= x
        x2 /= x
        x3 /= x
        x4 /= x
        x5 /= x
        
        score3 = metrics(date_test,wr_test*np.where(np.median(x1 * Y_hat2[2] + x2 * Y_hat2[5] + x3*Y_hat2[8] + x4 * Y_hat2[11] + x5 * Y_hat2[14], axis = 1) > 0.503,1,0) )
        if score3 > best3:
            best3 = score3
            print(x1, x2, x3, x4, x5, best3)

In [53]:
if TRAINING:
    best3 = 0
    for i in range(10000):
        x1 = np.random.uniform()
        x2 = np.random.uniform()
        x3 = np.random.uniform()
        x4 = np.random.uniform()
        x5 = np.random.uniform()
        #x3 = np.random.uniform(0.48,0.52)
        x = x1 + x2 + x3 + x4 + x5
        x1 /= x
        x2 /= x
        x3 /= x
        x4 /= x
        x5 /= x
        
        score3 = metrics(date_test,wr_test*np.where(np.median(x1 * Y_hat2[2] + x2 * Y_hat2[5] + x3*Y_hat2[8] + x4 * Y_hat2[11] + x5 * Y_hat2[14], axis = 1) > 0.504,1,0) )
        if score3 > best3:
            best3 = score3
            print(x1, x2, x3, x4, x5, best3)

In [54]:
if TRAINING:
    best3 = 0
    for i in range(10000):
        x1 = np.random.uniform()
        x2 = np.random.uniform()
        x3 = np.random.uniform()
        x4 = np.random.uniform()
        x5 = np.random.uniform()
        #x3 = np.random.uniform(0.48,0.52)
        x = x1 + x2 + x3 + x4 + x5
        x1 /= x
        x2 /= x
        x3 /= x
        x4 /= x
        x5 /= x
        
        score3 = metrics(date_test,wr_test*np.where(np.median(x1 * Y_hat2[2] + x2 * Y_hat2[5] + x3*Y_hat2[8] + x4 * Y_hat2[11] + x5 * Y_hat2[14], axis = 1) > 0.505,1,0) )
        if score3 > best3:
            best3 = score3
            print(x1, x2, x3, x4, x5, best3)

In [55]:
if TRAINING:
    best3 = 0
    for i in range(10000):
        x1 = np.random.uniform()
        x2 = np.random.uniform()
        x3 = np.random.uniform()
        x4 = np.random.uniform()
        x5 = np.random.uniform()
        #x3 = np.random.uniform(0.48,0.52)
        x = x1 + x2 + x3 + x4 + x5
        x1 /= x
        x2 /= x
        x3 /= x
        x4 /= x
        x5 /= x
        
        score3 = metrics(date_test,wr_test*np.where(np.median(x1 * Y_hat2[2] + x2 * Y_hat2[5] + x3*Y_hat2[8] + x4 * Y_hat2[11] + x5 * Y_hat2[14], axis = 1) > 0.506,1,0) )
        if score3 > best3:
            best3 = score3
            print(x1, x2, x3, x4, x5, best3)

In [56]:
if TRAINING:
    best3 = 0
    for i in range(10000):
        x1 = np.random.uniform()
        x2 = np.random.uniform()
        x3 = np.random.uniform()
        x4 = np.random.uniform()
        x5 = np.random.uniform()
        #x3 = np.random.uniform(0.48,0.52)
        x = x1 + x2 + x3 + x4 + x5
        x1 /= x
        x2 /= x
        x3 /= x
        x4 /= x
        x5 /= x
        
        score3 = metrics(date_test,wr_test*np.where(np.median(x1 * Y_hat2[2] + x2 * Y_hat2[5] + x3*Y_hat2[8] + x4 * Y_hat2[11] + x5 * Y_hat2[14], axis = 1) > 0.507,1,0) )
        if score3 > best3:
            best3 = score3
            print(x1, x2, x3, x4, x5, best3)

In [57]:
if TRAINING:
    best3 = 0
    for i in range(10000):
        x1 = np.random.uniform()
        x2 = np.random.uniform()
        x3 = np.random.uniform()
        x4 = np.random.uniform()
        x5 = np.random.uniform()
        #x3 = np.random.uniform(0.48,0.52)
        x = x1 + x2 + x3 + x4 + x5
        x1 /= x
        x2 /= x
        x3 /= x
        x4 /= x
        x5 /= x
        
        score3 = metrics(date_test,wr_test*np.where(np.median(x1 * Y_hat2[2] + x2 * Y_hat2[5] + x3*Y_hat2[8] + x4 * Y_hat2[11] + x5 * Y_hat2[14], axis = 1) > 0.508,1,0) )
        if score3 > best3:
            best3 = score3
            print(x1, x2, x3, x4, x5, best3)

In [58]:
with open('../input/js-cv-split2/f_mean.npy', 'rb') as f:
    f_mean = np.load(f)

In [59]:
encoder = LiteModel.from_keras_model(encoder)

In [60]:
if not TRAINING:
    import janestreet
    env = janestreet.make_env()
    th = 0.505
    #501
    #w_th = [0.282586165396963,0,0,0.409453626674629,0.230074696487854,0.601781674485216,0,0,0.0116348575038513,0.243247292748396,0.439550808680961,0,0.0647129151800104,0.330374494831183,0.386583468010932]
    #502
    #w_th = [0.350562125298993,0,0,0.448576828126836,0.134586453380421,0.65264356621618,0,0,0.0261123909009704,0.134511467677107,0.685961342574953,0,0.0663495788970628,0.179452204044625,0.321244042882849]
    #503
    #w_th = [0.360800665430468,0,0,0.429149764044411,0.114599310364374,0.69178947541075,0,0,0.0682843624963087,0.120513849928649,0.830006826443837,0,0.0895357205964704,0.0553938631917881,0.23992616209294]
    #504
    #w_th = [0.440453769182776,0,0,0.386035526242392,0.0947024403089371,0.761301979742291,0,0,0.0825679577912019,0.159692569362111,0.869504966116743,0,0.0138181352127204,0.0357925935743198,0.156130062466506]
    #505
    #w_th = [0.0160982113917282,0,0,0.408031763680652,0.103234899296302,0.770207288773767,0,0,0.129126332662569,0.556117716944014,0.864208099260292,0,0.0197523079836051,0.0325570014434044,0.100666378563663]
    #506
    #w_th = [0.0107694490518132,0,0,0.418049733705931,0.168603910678253,0.737750099082452,0,0,0.244939753415722,0.417596620720282,0.759687963502009,0,0.153584196521972,0.0717081258197375,0.0173101475018251]
    #507
    #w_th = [0.00869530976805593,0,0,0.43934692714842,0.0647559713228543,0.544669476464809,0,0,0.262487538642279,0.344704322163479,0.718182476512787,0,0.207253440920044,0.217061552164358,0.192842984892911]
    #508
    #w_th = [0.0432387797581322,0,0,0.346699406169769,0.116159401563698,0.0913701656245924,0,0,0.35951130675128,0.476263938176473,0.774953192987766,0,0.133797875895625,0.108887405448535,0.549118527624126]
    
    #w_th_boolen = (np.array(w_th) > 0).tolist()
    #w_th = [i for (i, v) in zip(w_th, w_th_boolen) if v]
    #models = [i for (i, v) in zip(models, w_th_boolen) if v]
    for (test_df, pred_df) in tqdm(env.iter_test()):
        if test_df['weight'].item() > 0:
            #test_df[FIX_FEAT] = pt.transform(test_df[FIX_FEAT].values)
            x_tt = test_df.loc[:, FEATS].values
            mask = np.isnan(x_tt[:,nan_feat_bool]).astype(np.int8)
            if np.isnan(x_tt.sum()):

                x_tt = np.nan_to_num(x_tt) + np.isnan(x_tt).astype(np.int8) * f_mean
                
            x_tt = x_tt.astype(np.float32)
            encoded_x = encoder.predict_single2(x_tt)
            x_tt = tf.concat([x_tt, encoded_x, mask], axis = -1)
            #0.5
            #pred = np.average([model.predict_single2(x_tt) for model in models], axis = 0, weights = [0.269289583638586,0.140273060522882,0.00431820575329026,0.410287705610988,0.248819727362426,0.930825875759512,0.0745530246350113,0.28684450294532,0.0398643375719953,0.245869686115413,0.32406270916937,0.0249915809152021])
            #0.501
            #pred = np.average([model.predict_single2(x_tt) for model in models], axis = 0, weights = [0.0763251457522784,0.0788625175166904,0.189492504605759,0.341901274112498,0.247725213614468,0.655959549587888,0.0187377150341519,0.240186716116674,0.143356259279585,0.56303586510107,0.433225552752167,0.0111916865267664])
            #0.502
            #pred = np.average([model.predict_single2(x_tt) for model in models], axis = 0, weights = [0.0576583473691916,0.12255652240959,0.175323289611383,0.449173119701892,0.110027096104041,0.76522980391606,0.26055588039915,0.00275766092980543,0.0589500766202904,0.232612652529765,0.764658720556562,0.000496829852265645])
            #0.503
            #pred = np.average([model.predict_single2(x_tt) for model in models], axis = 0, weights = [0.00870004211860127,0.0978252692956878,0.298014718855447,0.383586287383453,0.100620814834676,0.620239671826976,0.0867980001869826,0.0179772830228647,0.071241936891379,0.520915670310963,0.783576632846771,0.0105036724261971])
            #0.504
            #pred = np.average([model.predict_single2(x_tt) for model in models], axis = 0, weights = [0.0900695070920216,0.0400101879000951,0.363508444081315,0.404165368235183,0.101581732824844,0.52280927277972,0.0766822336375494,0.0223235719877681,0.0813574399335528,0.429082891035245,0.836084507287292,0.0323248432054112])
            #0.505
            #pred = np.average([model.predict_single2(x_tt) for model in models], axis = 0, weights = [0.14585293948777,0.0321340599888603,0.168930356391904,0.45014880680078,0.0928682653923039,0.483209333358805,0.206996385495624,0.0014347064763478,0.195922871036136,0.197001868215824,0.873562968142487,0.151937439213153])
            
            #0.506
            #pred = np.average([model.predict_single2(x_tt) for model in models], axis = 0, weights = [0.123190243224635,0.0333809356163824,0.223927230959407,0.4083891129262,0.214877194263553,0.488523934898997,0.115077478026766,0.274629118750788,0.230760689860318,0.353343165822397,0.477112751369275,0.0567881442812759])
            #0.507
            #pred = np.average([model.predict_single2(x_tt) for model in models], axis = 0, weights = [0.0792840481940689,0.0486631725897758,0.088028800771282,0.413007787518532,0.180887357608131,0.59477504438469,0.143226402099234,0.307389347262816,0.315100337014842,0.364481762188163,0.463060122539277,0.00209581782918519])
            #0.508
            #pred = np.average([model.predict_single2(x_tt) for model in models], axis = 0, weights = [0.104034154023355,0.00943763699276361,0.0345839554383715,0.436273484206495,0.186711622897726,0.420741558536897,0.142822556506646,0.365700918525257,0.0200891010652324,0.316869805263502,0.438149821584252,0.524585384959498])
            #0.509
            #pred = np.average([model.predict_single2(x_tt) for model in models], axis = 0, weights = [0.120926552448402,0.0428807234778378,0.316305249359201,0.32172273862087,0.250142935175495,0.241626069167982,0.132838781409683,0.365697181864785,0.397642473806075,0.424511927521044,0.341279159481881,0.0444262076667411])
            #0.51
            #pred = np.average([model.predict_single2(x_tt) for model in models], axis = 0, weights = [0.26694818048737,0.00541709707387954,0.283017229351308,0.378816459431733,0.266689820694482,0.269084127637271,0.0883274830502573,0.716043337455058,0.432078909025282,0.265907877030638,0.0118497447765794,0.0158197339861377])
            #test 
            
            #15 models
            #0.5
            #pred = np.average([model.predict_single2(x_tt) for model in models], axis = 0, weights = [0.00660530146858248,0.0195375928683106,0.182872045891882,0.260118264684016,0.259382986465131,0.553270253699695,0.0166969213150559,0.0154832176142514,0.0198471979821222,0.47617786468772,0.299282816507157,0.00360026847919924,0.240401647844624,0.406313386545149,0.2404102339471])
            #0.501
            #pred = np.average([model.predict_single2(x_tt) for model in models], axis = 0, weights = [0.336580956009717,0.085371165319604,0.00218059361783603,0.373457256322469,0.162088571157434,0.525977250550515,0.018854173902929,0.023271705478133,0.000476500288030846,0.122090501937807,0.225064973272897,0.0293346148533625,0.149017111827077,0.50420358477193,0.442031040690255])
            #0.502
            #pred = np.average([model.predict_single2(x_tt) for model in models], axis = 0, weights = [0.267630393363895,0.0121074000206929,0.077325505585401,0.459718152232472,0.23373410136094,0.511652365670948,0.00478935684023983,0.109150725044656,0.0252773717038874,0.140126052877474,0.28370351248071,0.0120077947696216,0.127736044685916,0.361304261092999,0.373736962270141])
            #0.503
            #pred = np.average([model.predict_single2(x_tt) for model in models], axis = 0, weights = [0.372400399851216,0.0842449434986123,0.00582403783490785,0.356247458832398,0.127812090578148,0.384425132524302,0.00461311509458511,0.0452318385342215,0.00376762080572573,0.12848525414531,0.680780700868442,0.22207416523498,0.138253772076489,0.0619304265205752,0.383909043600083])
            #0.504
            #pred = np.average([model.predict_single2(x_tt) for model in models], axis = 0, weights = [0.0314205579910957,0.0179271771151849,0.0567105407619044,0.232853546367987,0.1566876188777,0.201552569532505,0.204970454836834,0.0574374672227673,0.05168700098273,0.49925730930842,0.661029894181915,0.0616861543090878,0.0314981314956622,0.106917842602431,0.628363734413771])
            #0.505
            pred = np.average([model.predict_single2(x_tt) for model in models], axis = 0, weights = [0.30643427242264,0.209491777667095,0.137817458159022,0.406721405697375,0.286242301004577,0.471708329957434,0.0803022608268741,0.190720619919767,0.157722275768748,0.169601431361634,0.185827274674314,0.21853027424372,0.0369406296914762,0.127718026734245,0.0142216618710743])
            #0.506
            #pred = np.average([model.predict_single2(x_tt) for model in models], axis = 0, weights = [0.000488861162908858,0.0313966987398687,0.0656565676638678,0.347998116798586,0.228480791106072,0.341137346186518,0.157814581352585,0.29812125283193,0.0493706636969972,0.394416638844336,0.437189210728981,0.0695625253674544,0.0992818018415826,0.00481204659314628,0.474272897085161])
            #0.507
            #pred = np.average([model.predict_single2(x_tt) for model in models], axis = 0, weights = [0.0203769850068616,0.114490385313229,0.0707570701624141,0.385520860294775,0.0759857589733933,0.348705206822549,0.0612651093612269,0.0345871704494746,0.0853170582856602,0.403623242658935,0.622493080241552,0.110674275844068,0.1292138026782,0.152443605022349,0.384546388885307])
            #0.508
            #pred = np.average([model.predict_single2(x_tt) for model in models], axis = 0, weights = [0.0439711805548865,0.0188827744980941,0.121000832077844,0.358214855147226,0.164080208761975,0.213987041979311,0.0588312916190743,0.332702641908083,0.0410097981416294,0.392269323193152,0.460821755585347,0.286801020156277,0.146713349485659,0.0235126192464997,0.337201307644937])
            
            
            
            #501 skipped
            #pred = np.average([model.predict_single2(x_tt) for model in models], axis = 0, weights = w_th)
            
            #pred = np.average([model.predict_single2(x_tt) for model in models], axis = 0, weights = [1]*15)
            pred = np.median(pred)
            pred_df.action = np.where(pred >= th, 1, 0).astype(int)
        else:
            pred_df.action = 0
        env.predict(pred_df) 

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


