In [None]:
#Help from here: https://github.com/optuna/optuna-examples/blob/main/tensorflow/tensorflow_eager_simple.py
#'''''''''''''': https://optuna.readthedocs.io/en/stable/_modules/optuna/integration/tfkeras.html#TFKerasPruningCallback

In [None]:
TIMEOUT = 4.5 * (60**2)

In [None]:
import numpy as np
import pandas as pd
import sys
import os
from time import time

import tensorflow_addons as tfa
import tensorflow as tf
import gc
import joblib
import pickle

In [None]:
import optuna
from optuna.integration import TFKerasPruningCallback
from optuna.trial import TrialState

In [None]:
#train = pd.read_feather('/kaggle/input/embeddings-from-daes-with-tensorflow/train_daeta_500_3_0.15_0.1_5')
#test =  pd.read_feather('/kaggle/input/embeddings-from-daes-with-tensorflow/test_daeta_500_3_0.15_0.1_5')
train = pd.read_feather('/kaggle/input/september-feather/train_rg_min')
test =  pd.read_feather('/kaggle/input/september-feather/test_rg_min')

ss = pd.read_csv('/kaggle/input/tabular-playground-series-sep-2021/sample_solution.csv')
FEATURES = [feat for feat in train.columns if 'f' in feat] + ['nan_count']
TARGET = 'claim'

In [None]:
#Creating the data for optuna to train and validate
np.random.seed(42)
train_percent = .8
X_msk = np.random.choice(a=[True, False], size=int(train.shape[0]), replace=True, p=[.8,.2])

X = train.loc[X_msk,FEATURES].values
y = train.loc[X_msk, TARGET].values

val_X = train.loc[~X_msk,FEATURES].values
val_y = train.loc[~X_msk, TARGET].values

In [None]:
##################################################################
#Special layers
##################################################################
class CutMix(tf.keras.layers.Layer):
    '''
    Implementation of CutMix
    Args
    _____
    noise: (R in [0,1)) probability that a value is not sampled from distribution
    Application
    ____________
    CM = CutMix(.2)
    x = tf.reshape(tf.range(0,10, dtype=tf.float32), (5,2))
    print(x.numpy())
    y = CM(x,True)
    print(y.numpy())
    '''
    def __init__(self, noise, **kwargs):
        super(CutMix, self).__init__(**kwargs)
        self.noise = noise

    def get_config(self):
        config = super(CutMix, self).get_config()
        config.update({"noise": self.noise})
        return config

    def call(self, inputs, training=None):
        if training:
            shuffled = tf.stop_gradient(tf.random.shuffle(inputs))
            #print(shuffled.numpy())

            msk = tf.keras.backend.random_bernoulli(tf.shape(inputs), p=1 - self.noise, dtype=tf.float32)
            #print(msk)
            return msk * inputs + (tf.ones_like(msk) - msk) * shuffled
        return inputs

class MixUp(tf.keras.layers.Layer):
    '''
    Implementation of MixUp
    Args
    _____
    alpha: (R in [0,1)) percentage of random sample to input  used
    Application
    ____________
    MU = MixUp(.1)
    x = tf.reshape(tf.range(0,10, dtype=tf.float32), (5,2))
    y = MU(x)
    print(x.numpy())
    print(y.numpy())
    '''
    def __init__(self, alpha, **kwargs):
        super(MixUp, self).__init__(**kwargs)
        self.alpha = alpha
        self.alpha_constant = tf.constant(self.alpha)
        self.one_minus_alpha = tf.constant(1.) - self.alpha

    def get_config(self):
        config = super(MixUp, self).get_config()
        config.update({"alpha": self.alpha})
        return config

    def call(self, inputs, training=None):
        if training:
            shuffled = tf.stop_gradient(tf.random.shuffle(inputs))
            #print(shuffled.numpy())
            return self.alpha_constant * inputs + self.one_minus_alpha * shuffled
        return inputs
    
class ResnetBlockTabular(tf.keras.Model):
    def __init__(self, output_dim, **kwargs):
        '''
        output_dim: (int) dimension of output dense layer. 
        NOTE: if output_dim == input_dim, this is a ResNetIdentityBlock
        '''
        super(ResnetBlockTabular, self).__init__(**kwargs)
        self.output_dim = output_dim
    
    def build(self, input_shape):
        if self.output_dim == input_shape[-1]:
            self.Dense1 = None
        else:
            self.Dense1 = tf.keras.layers.Dense(output_dim)

        self.bn1 = tf.keras.layers.BatchNormalization()
        self.relu1 = tf.keras.layers.ReLU()
        self.dense2 = tf.keras.layers.Dense(self.output_dim)
        self.bn2 = tf.keras.layers.BatchNormalization()
        self.relu2 = tf.keras.layers.ReLU()
        self.dense3 = tf.keras.layers.Dense(self.output_dim)
    
    def call(self, input_tensor, training=False):
        if self.Dense1 is not None:
            input_tensor = self.Dense1(input_tensor)
        
        x = self.bn1(input_tensor)
        x = self.relu1(x)
        x = self.dense2(x)
        x = self.bn2(x)
        x = self.relu2(x)
        x = self.dense3(x)
        
        return x + input_tensor

In [None]:
def ff(num_input_columns, BLOCKS, drop_rate, cutmix_noise, mixup_alpha, optimizer, block_sizes =None):
    
    if block_sizes is None:
        block_sizes = [num_input_columns for _ in range(BLOCKS)]
    else:
        if len(block_sizes) !=BLOCKS:
            print(f'block_sizes has {len(block_sizes)} blocks.  Needs {BLOCKS}.')
    
    #Input
    inp = tf.keras.layers.Input(num_input_columns)
    x = CutMix(noise = cutmix_noise)(inp)
    x = tf.keras.layers.BatchNormalization()(x)
    x = ResnetBlockTabular(output_dim = block_sizes[0], name=f'Resnet_0')(x)
    x = MixUp(alpha= mixup_alpha)(x)
    
    for i in range(1,BLOCKS):
        x = ResnetBlockTabular(output_dim = block_sizes[i], name=f'Resnet_{i}')(x)
        x = tf.keras.layers.Dropout(drop_rate)(x)
    x = tf.keras.layers.Dense(1, activation='sigmoid')(x)
    model = tf.keras.Model(inputs=inp, outputs=x)
    
    
    model.compile(optimizer=optimizer,
                  loss=tf.keras.losses.BinaryCrossentropy(),
                  metrics=[tf.keras.metrics.AUC()])
    return model

In [None]:
'''
#DELETE THIS.  JUST CHECKING THE EVALUATE FUNCTION
optimizer = tf.keras.optimizers.Adam()
model = ff(num_input_columns=len(FEATURES), BLOCKS=3, drop_rate=.1, cutmix_noise=.3, mixup_alpha=.2, optimizer=optimizer)
model.evaluate(X,y, batch_size=10000)
'''

In [None]:
ES = tf.keras.callbacks.EarlyStopping(monitor='val_auc', min_delta=0, patience=20, verbose=0, mode='max')
def objective(trial):
    #from https://github.com/optuna/optuna-examples/blob/main/keras/keras_integration.py
    # Clear clutter from previous session graphs.
    tf.keras.backend.clear_session()
    batch_size = trial.suggest_categorical('batch_size', [512,1024])
    epochs = 200
    
    ###################################
    # Generate our trial model.
    ###################################
    #Model Architecture specifications
    num_input_columns= len(FEATURES)
    BLOCKS = trial.suggest_int("BLOCKS", 1, 10) 
    drop_rate= trial.suggest_float("drop_rate", 0, .2, )
    
    #Sum of cutmix and mixup <=.5
    cutmix_noise= trial.suggest_float("cutmix_noise", 0., .5)
    mixup_alpha=trial.suggest_float("mixup_alpha", 0., .5 - cutmix_noise)
    
    #Model Optimizer Specifications
    #Copy pasted from https://github.com/optuna/optuna-examples/blob/main/tensorflow/tensorflow_eager_simple.py
    #Thanks y'all!
    kwargs = {}
    optimizer_options = ["RMSprop", "Adam", "SGD"]
    optimizer_selected = trial.suggest_categorical("optimizer", optimizer_options)
    if optimizer_selected == "RMSprop":
        kwargs["learning_rate"] = trial.suggest_float(
            "rmsprop_learning_rate", 1e-5, 1e-1, log=True
        )
        kwargs["decay"] = trial.suggest_float("rmsprop_decay", 0.85, 0.99)
        kwargs["momentum"] = trial.suggest_float("rmsprop_momentum", 1e-5, 1e-1, log=True)
    elif optimizer_selected == "Adam":
        kwargs["learning_rate"] = trial.suggest_float("adam_learning_rate", 1e-5, 1e-1, log=True)
    elif optimizer_selected == "SGD":
        kwargs["learning_rate"] = trial.suggest_float(
            "sgd_opt_learning_rate", 1e-5, 1e-1, log=True
        )
        kwargs["momentum"] = trial.suggest_float("sgd_opt_momentum", 1e-5, 1e-1, log=True)

    optimizer = getattr(tf.optimizers, optimizer_selected)(**kwargs)
    
    model = ff(num_input_columns, BLOCKS, drop_rate, cutmix_noise, mixup_alpha, optimizer)

    # Fit the model on the training data.
    # The KerasPruningCallback checks for pruning condition every epoch.
    model.fit(
        X,
        y,
        batch_size=batch_size,
        callbacks=[ES, TFKerasPruningCallback(trial, "val_auc")],
        epochs=epochs,
        validation_data=(val_X, val_y),
        verbose=1,
    )

    # Evaluate the model accuracy on the validation set.
    score = model.evaluate(val_X, val_y, batch_size=10000,verbose=0)
    return score[1]

In [None]:
study = optuna.create_study(direction="maximize", pruner=optuna.pruners.MedianPruner())
study.optimize(objective, timeout = TIMEOUT) #Optimize for 5 hours.  Let's waste our gpu quota!
#study.optimize(objective, n_trials=5)
pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])
print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

In [None]:
joblib.dump(study, "study.pkl")
#To regain this study: study = joblib.load("study.pkl") #https://optuna.readthedocs.io/en/stable/faq.html#how-can-i-save-and-resume-studies