In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import random
from sklearn.pipeline import Pipeline


import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.layers import Input, Dense, BatchNormalization, Dropout, Embedding,  Flatten
from tensorflow.keras.models import Model, Sequential
from keras.callbacks import ReduceLROnPlateau
from keras.optimizers import RMSprop
import keras_tuner as kt

from tensorflow.data import Dataset
from sklearn.preprocessing import QuantileTransformer,  KBinsDiscretizer
from sklearn.model_selection import StratifiedKFold
from tensorflow import keras
from sklearn import metrics
from sklearn.impute import SimpleImputer

from sklearn.model_selection import train_test_split

from sklearn.metrics import roc_auc_score


import warnings
warnings.simplefilter('ignore')

# Parameters

In [None]:
target = 'claim'

DEBUG = False

if DEBUG:
    N_ESTIMATORS = 1
    N_SPLITS = 2
    SEED = 2017
    CVSEED = 2017
    EARLY_STOPPING_ROUNDS = 1
    VERBOSE = 100
    BINS = 128
    #N_ITERS = 2
else:
    N_SPLITS = 5
    N_ESTIMATORS = 20000
    EARLY_STOPPING_ROUNDS = 300
    VERBOSE = 1000
    SEED = 2017
    CVSEED = 2017
    BINS = 128
    #N_ITERS = 10

In [None]:
def set_seed(seed=2017):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    #tf.compat.v1.set_random_seed(seed)
    
set_seed(SEED)

# Load Dataset

In [None]:
train = pd.DataFrame(pd.read_csv('../input/tabular-playground-series-sep-2021/train.csv')[target])
#test  = pd.read_csv('../input/tabular-playground-series-sep-2021/test.csv')
#sub   = pd.read_csv('../input/tabular-playground-series-sep-2021/sample_solution.csv')

In [None]:
train['lgb_pred'] = np.load("../input/tps-sep-lv0-base-trees/agg_lgb_oof.npy")
#test['lgb_pred'] = np.load("../input/tps-sep-lv0-base-trees/agg_lgb_pred.npy")

train['lgb2_pred'] = np.load("../input/tps-sep-lv0-base-trees/agg_lgb2_oof.npy")
#test['lgb2_pred'] = np.load("../input/tps-sep-lv0-base-trees/agg_lgb2_pred.npy")

train['xgb_pred'] = np.load("../input/tps-sep-lv0-base-trees/agg_xgb_oof.npy")
#test['xgb_pred'] = np.load("../input/tps-sep-lv0-base-trees/agg_xgb_pred.npy")

train['lgb_bizen_pred'] = np.load("../input/tps-sep-lv0-base-trees/lgb_bizen_oof.npy")
#test['lgb_bizen_pred'] = np.load("../input/tps-sep-lv0-base-trees/lgb_bizen_pred.npy")

train['lgb_dmitry_pred'] = np.load("../input/tps-sep-lv0-base-trees/lgb_dmitry_oof.npy")
#test['lgb_dmitry_pred'] = np.load("../input/tps-sep-lv0-base-trees/lgb_dmitry_pred.npy")

train['xgb_dmitry_pred'] = np.load("../input/tps-sep-lv0-base-trees/xgb_dmitry_oof.npy")
#test['xgb_dmitry_pred'] = np.load("../input/tps-sep-lv0-base-trees/xgb_dmitry_pred.npy")

train['lgb_manav_pred'] = np.load("../input/tps-sep-lv0-base-trees/lgb_manav_oof.npy")
#test['lgb_manav_pred'] = np.load("../input/tps-sep-lv0-base-trees/lgb_manav_pred.npy")

train['xgb_manav_pred'] = np.load("../input/tps-sep-lv0-base-trees/xgb_manav_oof.npy")
#test['xgb_manav_pred'] = np.load("../input/tps-sep-lv0-base-trees/xgb_manav_pred.npy")

In [None]:
train['ridge_pred'] = np.load("../input/tps-sep-lv0-base-trees/ridge_oof.npy")
#test['ridge_pred'] = np.load("../input/tps-sep-lv0-base-trees/ridge_pred.npy")

train['nn_pred'] = np.load("../input/tps-sep-lv0-base-trees/agg_nn_oof.npy")
#test['nn_pred'] = np.load("../input/tps-sep-lv0-base-trees/agg_nn_pred.npy")

# Preprocessing

In [None]:
features = [col for col in train.columns if 'pred' in col]

pipe = Pipeline([
       # ('imputer', SimpleImputer(strategy='median',missing_values=np.nan)),
        ("scaler", QuantileTransformer(n_quantiles=BINS,output_distribution='normal')),
        ('bin', KBinsDiscretizer(n_bins=BINS, encode='ordinal',strategy='uniform'))
        ])
#train[features] = pipe.fit_transform(train[features])
#test[features] = pipe.transform(test[features])

In [None]:
train[features]

In [None]:
train[target]

# Model

In [None]:
def make_model(hp):
    
    lr = hp.Float('learning_rate', min_value=3.2e-3, max_value=8.2e-3)
    dropout = hp.Float('dropout', min_value=0.518, max_value=0.618)
    embed_dim = hp.Int('embed_dim', min_value=4, max_value=20, step=4)
    hidden_dim = hp.Int('hidden_dim', min_value=280, max_value=344, step=8)
    n_layers = hp.Int('n_layers', min_value=1, max_value=2, step=1)
    act = hp.Choice('activation', values=['swish', 'relu', 'elu', 'tanh', 'sigmoid'])
    #dstep = hp.Int('decay_steps', min_value=2000, max_value=4000, step=200)
    drate = hp.Float('decay_rate', min_value=0.637, max_value=0.687)
    eps = hp.Float('epsilon', min_value=1e-8, max_value=1e-7)
    
    inputs = Input(train[features].shape[1:])
    X = Embedding(input_dim=BINS, output_dim=embed_dim, embeddings_initializer = "glorot_normal")(inputs)
    X = Dropout(dropout)(X)
    #X = BatchNormalization()(X)
    X = Flatten()(X)
    
    for i in range(n_layers):
        #units = hp.Int('units_{i}'.format(i=i), min_value=8, max_value=256, step=8)
        X = layers.Dense(hidden_dim/(2**i), activation=act, kernel_initializer=tf.keras.initializers.GlorotNormal())(X)
        X = Dropout(dropout)(X)
        #X = BatchNormalization()(X)
    outputs = layers.Dense(1, activation='sigmoid', kernel_initializer=tf.keras.initializers.GlorotNormal())(X)
    model = keras.Model(inputs, outputs)

    #learning_rate = hp.Float('learning_rate', min_value=3e-4, max_value=3e-3)
    lr_schedule = keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=lr,
        decay_steps = 450,
        decay_rate= drate)

    optimizer = keras.optimizers.Adam(learning_rate=lr_schedule, epsilon=eps)
    model.compile(loss=keras.losses.binary_crossentropy,
                  optimizer=optimizer,
                  metrics=[tf.keras.metrics.AUC(name='aucroc')])
    #model.summary()
    return model

# Keras Tuner

In [None]:
def parse_trial_state(trial):
    state = trial.get_state()
    out = {}
    out['best_step'] = state['best_step']
    out['trial_id'] = state['trial_id']
    out['score'] = state['score']
    out.update(state['hyperparameters']['values'])
    
    return out

In [None]:
class CVTuner(kt.engine.tuner.Tuner):
  def run_trial(self, trial, x, y, batch_size=1024, epochs=100):
    cv = StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=CVSEED)
    val_losses = np.zeros(x.shape[0])
    
    for fold, (train_indices, test_indices) in enumerate(cv.split(x, y)):
      print(f"===== fold {fold} =====")
      x_train, x_test = x.iloc[train_indices], x.iloc[test_indices]
      y_train, y_test = y.iloc[train_indices], y.iloc[test_indices]
    
      x_train[features] = pipe.fit_transform(x_train[features])
      x_test[features] = pipe.transform(x_test[features])
    
      model = self.hypermodel.build(trial.hyperparameters)
      model.fit(x_train, y_train, 
                validation_data=(x_test, y_test),
                shuffle=True,
                verbose=0,
                #callbacks=[model_checkpoint_callback],
                callbacks=[
                #tf.keras.callbacks.ReduceLROnPlateau(monitor='val_aucroc', mode='max', patience=2),
                tf.keras.callbacks.EarlyStopping(monitor='val_aucroc', mode='max', patience=5)  ],
                batch_size=batch_size, 
                epochs=epochs)
      val_losses[test_indices] += model.predict(x_test)[:,-1]
      auc=roc_auc_score(y_test, val_losses[test_indices])
      
      print(f"fold {fold} - nn aucroc: {auc:.6f}\n")
    auc_oof = roc_auc_score(y, val_losses)
    print(f"agg_val_aucroc: {auc_oof}")
    self.oracle.update_trial(trial.trial_id, {'agg_val_aucroc': auc_oof})
    #self.save_model(trial.trial_id, model)

In [None]:
tuner = CVTuner(
  hypermodel=make_model,
  oracle=kt.oracles.BayesianOptimization(
    objective=kt.Objective('agg_val_aucroc', direction='max'),
    max_trials=10,
    #executions_per_trial=3,
    seed=SEED,
  ))

In [None]:
tuner.search(x=train[features], y=train[target], 
             batch_size=1024, 
             epochs=100,
             #validation_data=(x_val, y_val),
                  
            )

In [None]:
trials_df = pd.DataFrame([
    parse_trial_state(t) for t in tuner.oracle.trials.values()
])

trials_df

In [None]:
best_hp = tuner.get_best_hyperparameters()[0]
best_hp.get_config()['values']

# Log

/////// 8 trees + ridge /////////

/// 256 quant normal kbins uniform with dropout, dstep=450, decreasing hidden units ////////
0.8167566627581594 ver5

/// 128 quant normal kbins uniform with dropout, dstep=450, decreasing hidden units ////////
0.816775060867857 ver4
0.8167841782499053 ver6 of ver6-8
0.8167877707284713 ver9 of ver9-11
0.816795651881851 ver16 of ver12-16
0.8168053336620444 ver18 of ver17-21
no improve ver22-26

/// 64 quant normal kbins uniform with dropout, dstep=450, decreasing hidden units ////////
0.8166313597810129 ver1 of ver1-2

/// 32 quant normal kbins uniform with dropout, dstep=450, decreasing hidden units ////////
0.8158497098756874 ver3



/////// 8 trees + ridge + nn /////////

/// 256 quant normal kbins uniform with dropout, dstep=450, decreasing hidden units ////////
0.8167406938974502 ver28

/// 128 quant normal kbins uniform with dropout, dstep=450, decreasing hidden units ////////
0.816761281214846 ver29
0.8167752280146776 ver33 of ver32-34
no improve ver35-38

/// 64 quant normal kbins uniform with dropout, dstep=450, decreasing hidden units ////////
0.8166620020279339 ver30

/// 32 quant normal kbins uniform with dropout, dstep=450, decreasing hidden units ////////
0.8159247012741027 ver31
