In [None]:
import os
import time
import gc
import random
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import pathlib
from sklearn.metrics import log_loss, roc_auc_score
from sklearn.model_selection import KFold
from sklearn.model_selection._split import _BaseKFold, indexable, _num_samples
from sklearn.utils.validation import _deprecate_positional_args
import tensorflow as tf
from tensorflow import keras
import tensorflow_addons as tfa 
from tensorflow.keras import layers

In [None]:
GPUs = tf.config.experimental.list_physical_devices(device_type='GPU')                                           
for gpu in GPUs:
    tf.config.experimental.set_memory_growth(gpu, True)

In [None]:
print('tensorflow_version_is',tf.__version__)

In [None]:
SEED=42
def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)  
    tf.random.set_seed(seed)
seed_everything(seed=SEED)

In [None]:
%%time
# Loading dataset
print('Loading data...')
train = pd.read_feather('../input/janestreet-save-as-feather/train.feather')
print('Done!')

In [None]:
from tensorflow.keras import backend as K

class Mish(tf.keras.layers.Layer):

    def __init__(self, **kwargs):
        super(Mish, self).__init__(**kwargs)
        self.supports_masking = True

    def call(self, inputs):
        return inputs * K.tanh(K.softplus(inputs))

    def get_config(self):
        base_config = super(Mish, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

    def compute_output_shape(self, input_shape):
        return input_shape

def mish(x):
    return tf.keras.layers.Lambda(lambda x: x*K.tanh(K.softplus(x)))(x)

tf.keras.utils.get_custom_objects().update({'mish': tf.keras.layers.Activation(mish)})

DATA OF NN-MLP&RESTNET


In [None]:
train = train.query('date > 85').reset_index(drop = True) 
train = train.query('weight > 0').reset_index(drop = True)
train.fillna(train.mean(),inplace=True)  # Fillna with its cols' mean value

base_features = [c for c in train.columns if "feature" in c]
f_mean = np.mean(train[base_features[1:]].values,axis=0)  # Cause feature_0 shouldn't be fill with a float

train['action'] = (train['resp'] > 0).astype('int')
train['action_1'] = (train['resp_1'] > 0).astype('int')
train['action_2'] = (train['resp_2'] > 0).astype('int')
train['action_3'] = (train['resp_3'] > 0).astype('int')
train['action_4'] = (train['resp_4'] > 0).astype('int')

def add_features(train_df):
    train_df['feature_cross_41_42_43'] = train_df['feature_41']+train_df['feature_42']+train_df['feature_43']
    train_df['feature_cross_1_2'] = train_df['feature_1']/(train_df['feature_2']+2e-5)
    return train_df

train = add_features(train)
features = [c for c in train.columns if "feature" in c]
target_cols = ['action', 'action_1', 'action_2', 'action_3', 'action_4']
X = train[features].values
y = np.stack([(train[c] > 0).astype('int') for c in target_cols]).T
X.shape, y.shape

RESTNET_MODEL

In [None]:
def RestNet_(num_columns, 
             num_labels, 
             hidden_size, 
             dropout_rate, 
             label_smoothing, 
             learning_rate):
    
    inp = layers.Input(shape=(num_columns,))
    x = layers.BatchNormalization()(inp)
    x = layers.Dense(132)(x)
    x = layers.LeakyReLU()(x)
    x = layers.Dropout(0.315)(x)
    
    
    x1 = layers.Dense(hidden_size*1.2)(x)
    x1 = layers.BatchNormalization()(x1)
    x1 = layers.Activation('mish')(x1)
    x1 = layers.Dropout(dropout_rate)(x1)

    x = layers.concatenate([x, x1], axis=1)

    x2 = layers.Dense(hidden_size*1.1)(x)
    x2 = layers.BatchNormalization(axis=1)(x2)
    x2 = layers.Activation('mish')(x2)
    x2 = layers.Dropout(dropout_rate)(x2)
    
    x = layers.concatenate([x1, x2], axis=1)

    
    x3 = layers.Dense(hidden_size*1.0)(x)
    x3 = layers.BatchNormalization(axis=1)(x3)
    x3 = layers.Activation('mish')(x3)
    x3 = layers.Dropout(dropout_rate)(x3)
    
    x = layers.concatenate([x2, x3], axis=1)

    x4 = layers.Dense(hidden_size*0.9)(x)
    x4 = layers.BatchNormalization(axis=1)(x4)
    x4 = layers.Activation('mish')(x4)
    x4 = layers.Dropout(dropout_rate)(x4)
    
    x = layers.concatenate([x3, x4], axis=1)
    
    x5 = layers.Dense(hidden_size*0.8)(x)
    x5 = layers.BatchNormalization(axis=1)(x5)
    x5 = layers.LeakyReLU()(x5)
    x5 = layers.Dropout(dropout_rate)(x5)
    
    x = layers.concatenate([x1, x3, x5], axis=1)
    x = layers.Dense(num_labels)(x)    
    
    out = layers.Activation("sigmoid")(x)

    model = tf.keras.models.Model(inputs=inp, outputs=out)
    model.compile(optimizer=tfa.optimizers.RectifiedAdam(learning_rate=learning_rate,weight_decay=1e-5),
                  loss=tf.keras.losses.BinaryCrossentropy(label_smoothing=label_smoothing),
                  metrics=tf.keras.metrics.AUC(name="AUC")
                 )

    return model

In [None]:
# Set defaults
hidden_units = 256
dropout_rates = 0.3
label_smoothing = 5e-3
learning_rate = 1e-3

model = RestNet_(X.shape[1],
                 y.shape[1], 
                 hidden_units,
                 dropout_rates,
                 label_smoothing,
                 learning_rate)
model.summary()

In [None]:
del model
gc.collect()

In [None]:
class PurgedGroupTimeSeriesSplit(_BaseKFold):
    """
    n_splits : int, default=5
        Number of splits. Must be at least 2.
    max_train_group_size : int, default=Inf
        Maximum group size for a single training set.
    group_gap : int, default=None
        Gap between train and test
    max_test_group_size : int, default=Inf
        We discard this number of groups from the end of each train split
    """
    @_deprecate_positional_args
    def __init__(self,
                 n_splits=5,
                 *,
                 max_train_group_size=np.inf,
                 max_test_group_size=np.inf,
                 group_gap=None,
                 verbose=False
                 ):
        super().__init__(n_splits, shuffle=False, random_state=None)
        self.max_train_group_size = max_train_group_size
        self.group_gap = group_gap
        self.max_test_group_size = max_test_group_size
        self.verbose = verbose

    def split(self, X, y=None, groups=None):
        """Generate indices to split data into training and test set.
        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data, where n_samples is the number of samples
            and n_features is the number of features.
        y : array-like of shape (n_samples,)
            Always ignored, exists for compatibility.
        groups : array-like of shape (n_samples,)
            Group labels for the samples used while splitting the dataset into
            train/test set.
        """
        if groups is None:
            raise ValueError(
                "The 'groups' parameter should not be None")
        X, y, groups = indexable(X, y, groups)
        n_samples = _num_samples(X)
        n_splits = self.n_splits
        group_gap = self.group_gap
        max_test_group_size = self.max_test_group_size
        max_train_group_size = self.max_train_group_size
        n_folds = n_splits + 1
        group_dict = {}
        u, ind = np.unique(groups, return_index=True)
        unique_groups = u[np.argsort(ind)]
        n_samples = _num_samples(X)
        n_groups = _num_samples(unique_groups)
        for idx in np.arange(n_samples):
            if (groups[idx] in group_dict):
                group_dict[groups[idx]].append(idx)
            else:
                group_dict[groups[idx]] = [idx]
        if n_folds > n_groups:
            raise ValueError(
                ("Cannot have number of folds={0} greater than"
                 " the number of groups={1}").format(n_folds,
                                                     n_groups))

        group_test_size = min(n_groups // n_folds, max_test_group_size)
        group_test_starts = range(n_groups - n_splits * group_test_size,
                                  n_groups, group_test_size)
        for group_test_start in group_test_starts:
            train_array = []
            test_array = []

            group_st = max(0, group_test_start - group_gap - max_train_group_size)
            for train_group_idx in unique_groups[group_st:(group_test_start - group_gap)]:
                train_array_tmp = group_dict[train_group_idx]
                
                train_array = np.sort(np.unique(
                                      np.concatenate((train_array,
                                                      train_array_tmp)),
                                      axis=None), axis=None)

            train_end = train_array.size
 
            for test_group_idx in unique_groups[group_test_start:
                                                group_test_start +
                                                group_test_size]:
                test_array_tmp = group_dict[test_group_idx]
                test_array = np.sort(np.unique(
                                              np.concatenate((test_array,
                                                              test_array_tmp)),
                                     axis=None), axis=None)

            test_array  = test_array[group_gap:]
            
            
            if self.verbose > 0:
                    pass
                    
            yield [int(i) for i in train_array], [int(i) for i in test_array]

In [None]:
NUM_FOLDS = 7
EPOCHS = 500
BATCH_SIZE = 6500
TRAINING = False
CV = True

if TRAINING:
    if CV:
        gkf = PurgedGroupTimeSeriesSplit(n_splits = NUM_FOLDS, group_gap=15)
        splits = list(gkf.split(y, groups=train['date'].values))    
        for fold, (train_indices, test_indices) in enumerate(splits):
            keras.backend.clear_session()

            reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
                                                          verbose=1,
                                                          factor=0.2,
                                                          patience=12, mode='min')
            early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=25)
            checkpoint_callback = keras.callbacks.ModelCheckpoint(
                filepath='logs/BestVal_model_{}.h5'.format(fold),
                save_weights_only=True,
                monitor='val_AUC',
                mode='max',
                verbose=1,
                save_best_only=True)

            model = RestNet_(X.shape[1],
                         y.shape[1], 
                         hidden_units,
                         dropout_rates,
                         label_smoothing,
                         learning_rate)

            X_train, X_test = X[train_indices], X[test_indices]
            y_train, y_test = y[train_indices], y[test_indices]

            model.fit(X_train,
                      y_train,
                      validation_data=(X_test,y_test),
                      epochs=EPOCHS,
                      batch_size=BATCH_SIZE,
                      callbacks=[reduce_lr,
                                 early_stop,
                                 checkpoint_callback])

            model.save_weights(f'logs/model_{SEED}_{fold}.h5')

            del model
            gc.collect()
    else:
        keras.backend.clear_session()
        model = RestNet_(len(features1),
                     len(features2),
                     y.shape[1], 
                     hidden_units,
                     dropout_rates,
                     label_smoothing,
                     learning_rate)
        
        model.fit(X,
                  y,
                  epochs=EPOCHS,
                  batch_size=BATCH_SIZE)

        model.save_weights(f'.logs/model_{SEED}_NONE.h5')
        
else:
    SEED1=42
#    Val_models = []
    Full_models_42 = []
    for fold in range(NUM_FOLDS):
        tf.keras.backend.clear_session()
        model1 = RestNet_(X.shape[1],
                          y.shape[1], 
                          hidden_units,
                          dropout_rates,
                          label_smoothing,
                          learning_rate)
        
        model1.load_weights(pathlib.Path(f'../input/drop0315/model_{SEED1}_{fold}.h5'))
        Full_models_42.append(model1)
    
    SEED2=73
#    Val_models = []
    Full_models_73 = []
    for fold in range(NUM_FOLDS):
        tf.keras.backend.clear_session()
        model2 = RestNet_(X.shape[1],
                          y.shape[1], 
                          hidden_units,
                          dropout_rates,
                          label_smoothing,
                          learning_rate)
        
        model2.load_weights(pathlib.Path(f'../input/73cv7/model_{SEED2}_{fold}.h5'))
        Full_models_73.append(model2)
        
    SEED3=2021
#    Val_models = []
    Full_models_2021 = []
    for fold in range(NUM_FOLDS):
        tf.keras.backend.clear_session()
        model3 = RestNet_(X.shape[1],
                          y.shape[1], 
                          hidden_units,
                          dropout_rates,
                          label_smoothing,
                          learning_rate)
        
        model3.load_weights(pathlib.Path(f'../input/2021cv7/model_{SEED3}_{fold}.h5'))
        Full_models_2021.append(model3)
        
#    for fold in range(NUM_FOLDS):
#         tf.keras.backend.clear_session()
#         model2 = RestNet_(X.shape[1],
#                          y.shape[1], 
#                          hidden_units,
#                          dropout_rates,
#                          label_smoothing,
#                          learning_rate)   
#         model2.load_weights(pathlib.Path(f'../input/drop0315/BestVal_model_{fold}.h5'))
#         Val_models.append(model2)

In [None]:
#Val_models = Val_models[1:]
Full_models_42 = Full_models_42[1:]
Full_models_73 = Full_models_73[1:]
Full_models_2021 = Full_models_2021[1:]
print(len(Full_models_42) + len(Full_models_73) + len(Full_models_2021))

In [None]:
%%time
class LiteModel:
    
    @classmethod
    def from_file(cls, model_path):
        return LiteModel(tf.lite.Interpreter(model_path=model_path))
    
    @classmethod
    def from_keras_model(cls, kmodel):
        converter = tf.lite.TFLiteConverter.from_keras_model(kmodel)
        tflite_model = converter.convert()
        return LiteModel(tf.lite.Interpreter(model_content=tflite_model))
    
    def __init__(self, interpreter):
        self.interpreter = interpreter
        self.interpreter.allocate_tensors()
        input_det = self.interpreter.get_input_details()[0]
        output_det = self.interpreter.get_output_details()[0]
        self.input_index = input_det["index"]
        self.output_index = output_det["index"]
        self.input_shape = input_det["shape"]
        self.output_shape = output_det["shape"]
        self.input_dtype = input_det["dtype"]
        self.output_dtype = output_det["dtype"]
        
    def predict(self, inp):
        inp = inp.astype(self.input_dtype)
        count = inp.shape[0]
        out = np.zeros((count, self.output_shape[1]), dtype=self.output_dtype)
        for i in range(count):
            self.interpreter.set_tensor(self.input_index, inp[i:i+1])
            self.interpreter.invoke()
            out[i] = self.interpreter.get_tensor(self.output_index)[0]
        return out
    
    def predict_single(self, inp):
        """ Like predict(), but only for a single record. The input data can be a Python list. """
        inp = np.array([inp], dtype=self.input_dtype)
        self.interpreter.set_tensor(self.input_index, inp)
        self.interpreter.invoke()
        out = self.interpreter.get_tensor(self.output_index)
        return out[0]

# tflite_models_val=[]

# for i in range(len(Val_models)):
#     tflite_model_ = LiteModel.from_keras_model(Val_models[i])
#     tflite_models_val.append(tflite_model_)
    
    
tflite_models_42=[]
for i in range(len(Full_models_42)):
    tflite_model_ = LiteModel.from_keras_model(Full_models_42[i])
    tflite_models_42.append(tflite_model_)
    
tflite_models_73=[]
for i in range(len(Full_models_73)):
    tflite_model_ = LiteModel.from_keras_model(Full_models_73[i])
    tflite_models_73.append(tflite_model_)
    
tflite_models_2021=[]
for i in range(len(Full_models_2021)):
    tflite_model_ = LiteModel.from_keras_model(Full_models_2021[i])
    tflite_models_2021.append(tflite_model_)

print(len(tflite_models_42) + len(tflite_models_73) + len(tflite_models_2021))

In [None]:
f = np.median
th = 0.502
weight_model = [1,1,1,2,2,2]

In [None]:
import janestreet
env = janestreet.make_env()
for (test_df, pred_df) in tqdm(env.iter_test()):
    if test_df['weight'].item() > 0:
        test_df_ = test_df.loc[:, base_features].values
        if np.isnan(test_df_[:, 1:].sum()):
            test_df_[:, 1:] = np.nan_to_num(test_df_[:, 1:]) + np.isnan(test_df_[:, 1:]) * f_mean
            
        cross_41_42_43 = test_df_[:, 41] + test_df_[:, 42] + test_df_[:, 43]
        cross_1_2 = test_df_[:, 1] / (test_df_[:, 2] + 2e-5)
        x_tt = np.concatenate((
            test_df_,
            np.array(cross_41_42_43).reshape(test_df_.shape[0], 1),
            np.array(cross_1_2).reshape(test_df_.shape[0], 1),
        ), axis=1)
        
        pred_42 = np.average([clf.predict(x_tt) for clf in tflite_models_42], axis=0, weights=np.array(weight_model))
        pred_42 = f(pred_42)
        pred_73 = np.average([clf.predict(x_tt) for clf in tflite_models_73], axis=0, weights=np.array(weight_model))
        pred_42 = f(pred_73)
        pred_2021 = np.average([clf.predict(x_tt) for clf in tflite_models_2021], axis=0, weights=np.array(weight_model))
        pred_2021 = f(pred_2021)
        pred = pred_42*0.3 + pred_73*0.3 + pred_73*0.4
        pred_df.action = np.where(pred >= th, 1, 0).astype(int)
    else:
        pred_df.action = 0

    env.predict(pred_df)