# Description

### Upgrade "Finding Best Threshold" from this code https://www.kaggle.com/a763337092/searching-best-threshold    

* **You can find best threshold and ensemble ratio.**  
* **FYI I only used tensorflow for faster inference time.**  
* **Additionally TensorflowLite.**

In [None]:
import os
import gc
import glob
import time
import pickle
import random
from random import choices
import numpy as np
import pandas as pd
from tqdm import tqdm
from collections import namedtuple
from sklearn.metrics import log_loss, roc_auc_score
from sklearn.model_selection import GroupKFold
from sklearn.preprocessing import StandardScaler

from tensorflow import keras
from tensorflow.keras.layers import Input, Dense, BatchNormalization, Dropout, Concatenate, Lambda, GaussianNoise, Activation
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.layers.experimental.preprocessing import Normalization
import tensorflow as tf
import tensorflow_addons as tfa
import keras.backend as K

import warnings
warnings.filterwarnings ("ignore")

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

DATA_PATH = '../input/jane-street-market-prediction/'

def save_pickle(dic, save_path):
    with open(save_path, 'wb') as f:
        pickle.dump(dic, f)
        
def load_pickle(load_path):
    with open(load_path, 'rb') as f:
        message_dict = pickle.load(f)
    return message_dict

In [None]:
Train_list = [False, False] # Model1, Model2
scale_feature = True

def seed_everything(seed=1111):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    
seed_everything(seed=1111)

train = pd.read_csv(os.path.join(DATA_PATH, 'train.csv'))
train = train.query('date > 85').reset_index(drop = True) 

feat_cols = [f'feature_{i}' for i in range(130)]
target_cols = ['action', 'action_1', 'action_2', 'action_3', 'action_4']

train = train.query('weight > 0').reset_index(drop = True)
train['action'] = (train['resp'] > 0).astype('int')
train['action_1'] = (train['resp_1'] > 0).astype('int')
train['action_2'] = (train['resp_2'] > 0).astype('int')
train['action_3'] = (train['resp_3'] > 0).astype('int')
train['action_4'] = (train['resp_4'] > 0).astype('int')

train[feat_cols[1:]] = train[feat_cols[1:]].fillna(method='bfill')

In [None]:
def feature_engineering(train, scale=True):
    '''
        Standardize features & manipulate
    '''
    if scale:
        scaler = StandardScaler()
        f_41 = scaler.fit_transform(train['feature_41'].values.reshape(-1,1))
        f_42 = scaler.fit_transform(train['feature_42'].values.reshape(-1,1))
        f_43 = scaler.fit_transform(train['feature_43'].values.reshape(-1,1))
        cross_41_42_43 = f_41.reshape(-1) + f_42.reshape(-1) + f_43.reshape(-1)

        f_1 = scaler.fit_transform(train['feature_1'].values.reshape(-1,1))
        f_2 = scaler.fit_transform(train['feature_2'].values.reshape(-1,1))
        cross_1_2 = f_1.reshape(-1) / ( f_2.reshape(-1) / 1e-5 )
    else:
        cross_41_42_43 = train['feature_41'].values + train['feature_42'].values + train['feature_43'].values
        cross_1_2 = train['feature_1'].values / ( train['feature_2'].values / 1e-5 )
        
    return cross_41_42_43, cross_1_2

In [None]:
feat_cols_130 = [f'feature_{i}' for i in range(130)]
feat_cols.extend(['cross_41_42_43', 'cross_1_2'])

# Feature Engineering : Standardize
train['cross_41_42_43'], train['cross_1_2'] = feature_engineering(train, scale=scale_feature)

# Convert Tensorflow-Lite model

In [None]:
class LiteModel:
    
    @classmethod
    def from_file(cls, model_path):
        return LiteModel(tf.lite.Interpreter(model_path=model_path))
    
    @classmethod
    def from_keras_model(cls, kmodel):
        converter = tf.lite.TFLiteConverter.from_keras_model(kmodel)
        tflite_model = converter.convert()
        return LiteModel(tf.lite.Interpreter(model_content=tflite_model))
    
    def __init__(self, interpreter):
        self.interpreter = interpreter
        self.interpreter.allocate_tensors()
        input_det = self.interpreter.get_input_details()[0]
        output_det = self.interpreter.get_output_details()[0]
        self.input_index = input_det["index"]
        self.output_index = output_det["index"]
        self.input_shape = input_det["shape"]
        self.output_shape = output_det["shape"]
        self.input_dtype = input_det["dtype"]
        self.output_dtype = output_det["dtype"]
        
    def predict(self, inp):
        inp = inp.astype(self.input_dtype)
        count = inp.shape[0]
        out = np.zeros((count, self.output_shape[1]), dtype=self.output_dtype)
        for i in range(count):
            self.interpreter.set_tensor(self.input_index, inp[i:i+1])
            self.interpreter.invoke()
            out[i] = self.interpreter.get_tensor(self.output_index)[0]
        return out
    
    def predict_single(self, inp):
        """ Like predict(), but only for a single record. The input data can be a Python list. """
        inp = np.array([inp], dtype=self.input_dtype)
        self.interpreter.set_tensor(self.input_index, inp)
        self.interpreter.invoke()
        out = self.interpreter.get_tensor(self.output_index)
        return out[0]

# Model 1 : ResNet Architecture

In [None]:
class Model(keras.Model):
    def __init__(self, drop_prob, hidden_units, num_classes=5):
        super(Model, self).__init__()
        self.bnorm0 = BatchNormalization()
        self.dropout0 = Dropout(drop_prob)

        self.bnorm1 = BatchNormalization()
        self.dropout1 = Dropout(drop_prob)
        self.dense1 = Dense(hidden_units)
        
        self.bnorm2 = BatchNormalization()
        self.dropout2 = Dropout(drop_prob)
        self.dense2 = Dense(hidden_units)
        
        self.bnorm3 = BatchNormalization()
        self.dropout3 = Dropout(drop_prob)
        self.dense3 = Dense(hidden_units)
        
        self.bnorm4 = BatchNormalization()
        self.dropout4 = Dropout(drop_prob)
        self.dense4 = Dense(hidden_units)
        
        self.dense5 = Dense(num_classes)
        self.LeakyReLU = tf.keras.layers.LeakyReLU(alpha=0.01)
        
    def call(self, x, training=False):
        x = self.bnorm0(x, training=training)
        x = self.dropout0(x, training=training)
        # Dense 1    
        x1 = self.dense1(x)
        x1 = self.bnorm1(x1, training=training)
        x1 = self.LeakyReLU(x1)
        x1 = self.dropout1(x1, training=training)
        x = tf.keras.layers.concatenate([x, x1], axis=-1)
        # Dense 2
        x2 = self.dense2(x)
        x2 = self.bnorm2(x2, training=training)
        x2 = self.LeakyReLU(x2)
        x2 = self.dropout2(x2, training=training)
        x = tf.keras.layers.concatenate([x1, x2], axis=-1)
        # Dense 3
        x3 = self.dense3(x)
        x3 = self.bnorm3(x3, training=training)
        x3 = self.LeakyReLU(x3)
        x3 = self.dropout3(x3, training=training)
        x = tf.keras.layers.concatenate([x2, x3], axis=-1)
        # Dense 4
        x4 = self.dense4(x)
        x4 = self.bnorm4(x4, training=training)
        x4 = self.LeakyReLU(x4)
        x4 = self.dropout4(x4, training=training)
        x = tf.keras.layers.concatenate([x3, x4], axis=-1)
        # Dense 5
        x = self.dense5(x)
        x = tf.keras.activations.sigmoid(x)
        return x

In [None]:
label_smoothing = 5e-3
learning_rate = 1e-3
drop_prob = 0.2
hidden_units = 256
num_classes = 5
NFOLDS = 5

sample_input = np.random.randn(1, 132)

model_list1 = []
for _fold in range(NFOLDS):
    tf.keras.backend.clear_session()

    model1 = Model(drop_prob=drop_prob, hidden_units=hidden_units, num_classes=num_classes)
    model1.load_weights(f'../input/groupkmodel/model_1/JSModel_{_fold}.tf')
    model1.predict(sample_input.astype(np.float32)) # To assert input_shape
    tflite_model1 = LiteModel.from_keras_model(model1)
    model_list1.append(tflite_model1)

In [None]:
print(model_list1[4].predict(sample_input.astype(np.float32)))
print(model1.predict(sample_input.astype(np.float32)))

# Model 2 : Simple MLP

In [None]:
# fit
def create_mlp(
    num_columns, num_labels, hidden_units, dropout_rates, label_smoothing, learning_rate
):

    inp = tf.keras.layers.Input(shape=(num_columns,))
    x = tf.keras.layers.BatchNormalization()(inp)
    x = tf.keras.layers.Dropout(dropout_rates[0])(x)
    for i in range(len(hidden_units)):
        x = tf.keras.layers.Dense(hidden_units[i])(x)
        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.Activation(tf.keras.activations.swish)(x)
        x = tf.keras.layers.Dropout(dropout_rates[i + 1])(x)
    
    x = tf.keras.layers.Dense(num_labels)(x)
    out = tf.keras.layers.Activation("sigmoid")(x)

    model = tf.keras.models.Model(inputs=inp, outputs=out)
    model.compile(
        optimizer=tfa.optimizers.RectifiedAdam(learning_rate=learning_rate),
        loss=tf.keras.losses.BinaryCrossentropy(label_smoothing=label_smoothing),
        metrics=[
            tf.keras.metrics.AUC(name="AUC"),
            tf.keras.metrics.Precision(name="Precision"),
        ]
    )
    
    return model

In [None]:
label_smoothing = 5e-3
learning_rate = 1e-3
hidden_units = [160, 160, 160]
dropout_rates = [0.2, 0.2, 0.2, 0.2]
tf.keras.backend.clear_session()

sample_input = np.random.randn(1, 130)

model_list2 = []
for _fold in range(NFOLDS):
    tf.keras.backend.clear_session()

    model2 = create_mlp(len(feat_cols_130), 5, hidden_units, dropout_rates, label_smoothing, learning_rate)
    model2.load_weights(f'../input/groupkmodel/model_2/JSModel_{_fold}.tf')
    model2.predict(sample_input.astype(np.float32)) # To assert input_shape
    tflite_model2 = LiteModel.from_keras_model(model2)
    model_list2.append(tflite_model2)

In [None]:
print(model_list2[4].predict(sample_input.astype(np.float32)))
print(model2.predict(sample_input.astype(np.float32)))

# Valid Dataset

In [None]:
valid = train.loc[(train.date >= 450) & (train.date < 500)].reset_index(drop=True)
# del train

In [None]:
def utility_score_bincount(date, weight, resp, action):
    count_i = len(np.unique(date))
    Pi = np.bincount(date, weight * resp * action)
    t = np.sum(Pi) / np.sqrt(np.sum(Pi ** 2)) * np.sqrt(250 / count_i)
    u = np.clip(t, 0, 6) * np.sum(Pi)
    return u

In [None]:
valid_pred1 = np.zeros((len(valid), len(target_cols)))
for model in model_list1:
    valid_pred1 += model.predict(valid[feat_cols].values) / len(model_list1)
    
valid_pred1 = np.median(valid_pred1, axis=1)

valid_pred2 = np.zeros((len(valid), len(target_cols)))
for model in model_list2:
    valid_pred2 += model.predict(valid[feat_cols_130].values) / len(model_list2)
    
valid_pred2 = np.median(valid_pred2, axis=1)

In [None]:
for a in range(40, 60):
    best_threshold, best_u_score = 0.5, 0
    alpha = float(a) / 100
    print(f'alpha : {alpha} 1-alpha: {1-alpha}')
    for i in range(4500, 5500):
        thres = float(i) / 10000

        valid_pred = valid_pred1*alpha + valid_pred2*(1-alpha)
        slice_valid_pred = valid_pred.copy()
        slice_valid_pred = np.where(slice_valid_pred >= thres, 1, 0).astype(int)
        valid_u_score = utility_score_bincount(date=valid.date.values, weight=valid.weight.values,
                                               resp=valid.resp.values, action=slice_valid_pred)
#         print(f'thresold={thres:.4f}, valid_u_score={valid_u_score:.4f}')

        if valid_u_score >= best_u_score:
            best_u_score = valid_u_score
            best_threshold = thres
    print(f'Best thresold={best_threshold:.4f}, best valid u score={best_u_score:.4f}')

In [None]:
for a in range(60, 70):
    best_threshold, best_u_score = 0.5, 0
    alpha = float(a) / 100
    print(f'alpha : {alpha} 1-alpha: {1-alpha}')
    for i in range(4500, 5500):
        thres = float(i) / 10000

        valid_pred = valid_pred1*alpha + valid_pred2*(1-alpha)
        slice_valid_pred = valid_pred.copy()
        slice_valid_pred = np.where(slice_valid_pred >= thres, 1, 0).astype(int)
        valid_u_score = utility_score_bincount(date=valid.date.values, weight=valid.weight.values,
                                               resp=valid.resp.values, action=slice_valid_pred)
#         print(f'thresold={thres:.4f}, valid_u_score={valid_u_score:.4f}')

        if valid_u_score >= best_u_score:
            best_u_score = valid_u_score
            best_threshold = thres
    print(f'Best thresold={best_threshold:.4f}, best valid u score={best_u_score:.4f}')

In [None]:
for a in range(70, 90):
    best_threshold, best_u_score = 0.5, 0
    alpha = float(a) / 100
    print(f'alpha : {alpha} 1-alpha: {1-alpha}')
    for i in range(4500, 5500):
        thres = float(i) / 10000

        valid_pred = valid_pred1*alpha + valid_pred2*(1-alpha)
        slice_valid_pred = valid_pred.copy()
        slice_valid_pred = np.where(slice_valid_pred >= thres, 1, 0).astype(int)
        valid_u_score = utility_score_bincount(date=valid.date.values, weight=valid.weight.values,
                                               resp=valid.resp.values, action=slice_valid_pred)
#         print(f'thresold={thres:.4f}, valid_u_score={valid_u_score:.4f}')

        if valid_u_score >= best_u_score:
            best_u_score = valid_u_score
            best_threshold = thres
    print(f'Best thresold={best_threshold:.4f}, best valid u score={best_u_score:.4f}')

In [None]:
# Best Setting
# alpha : 0.81 1-alpha: 0.18999999999999995
# Best thresold=0.4893, best valid u score=3291.1995