## Import libraries

In [None]:
import gc
import math
import scipy
import itertools
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt

from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.model_selection import StratifiedKFold

import tensorflow as tf
from tensorflow.keras.utils import plot_model
from tensorflow.keras.optimizers import Adam, Adamax
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.callbacks import TerminateOnNaN
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.layers import Flatten, Conv1D, Add
from tensorflow.keras.layers import Reshape, Dense, Dropout
from tensorflow.keras.layers import Input, BatchNormalization

np.random.seed(42)
tf.random.set_seed(42)

## Load source datasets

In [None]:
train = pd.read_csv("../input/tabular-playground-series-may-2022/train.csv")
train.set_index('id', inplace=True)
print(f"train: {train.shape}")
train.head()

In [None]:
test = pd.read_csv("../input/tabular-playground-series-may-2022/test.csv")
test.set_index('id', inplace=True)
print(f"test: {test.shape}")
test.head()

## Feature Engineering

In [None]:
for df in [train, test]:
    for i in tqdm(range(10)):
        df[f'f_27_{i}'] = df.f_27.str.get(i).apply(ord) - ord('A')
        
    df["unique_characters"] = df.f_27.apply(lambda s: len(set(s)))
    
    df['i_02_21'] = (df.f_21 + df.f_02 > 5.2).astype(int) - (df.f_21 + df.f_02 < -5.3).astype(int)
    df['i_05_22'] = (df.f_22 + df.f_05 > 5.1).astype(int) - (df.f_22 + df.f_05 < -5.4).astype(int)
    i_00_01_26 = df.f_00 + df.f_01 + df.f_26
    df['i_00_01_26'] = (i_00_01_26 > 5.0).astype(int) - (i_00_01_26 < -5.0).astype(int)

In [None]:
train.drop('f_27', axis=1, inplace=True)
test.drop('f_27', axis=1, inplace=True)
features = test.columns.to_list()
len(features)

In [None]:
scaler = StandardScaler().fit(train[features])
train[features] = scaler.transform(train[features])
test[features] = scaler.transform(test[features])

## Helper Function

In [None]:
def plot_confusion_matrix(cm, classes):

    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title('Confusion matrix', fontweight='bold', pad=15)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=0)
    plt.yticks(tick_marks, classes)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], 'd'),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label', fontweight='bold')
    plt.xlabel('Predicted label', fontweight='bold')
    plt.tight_layout()

## Keras Model

In [None]:
def cosine_decay(epoch):
    if epochs > 1:
        w = (1 + math.cos(epoch / (epochs-1) * math.pi)) / 2
    else:
        w = 1
    return w * lr_start + (1 - w) * lr_end

In [None]:
def dnn_model():
    
    x_input = Input(shape=(len(features),))
    
    xi = Dense(units=384, activation='swish', 
               kernel_initializer='lecun_normal')(x_input)
    xi = BatchNormalization()(xi)
    xi = Dropout(rate=0.25)(xi)
    
    x = Reshape((16, 24))(xi)
    
    x = Conv1D(filters=48, activation='swish', 
               kernel_size=3, strides=2, padding='same', 
               kernel_initializer='lecun_normal')(x)
    x = BatchNormalization()(x)
    
    x1 = Conv1D(filters=96, activation='swish', 
                kernel_size=3, strides=1, padding='same',
                kernel_initializer='lecun_normal')(x)
    x1 = BatchNormalization()(x1)
    
    x2 = Conv1D(filters=96, activation='swish', 
                kernel_size=3, strides=1, padding='same',
                kernel_initializer='lecun_normal')(x1)
    x2 = BatchNormalization()(x2)
    
    x2 = Conv1D(filters=96, activation='swish', 
                kernel_size=3, strides=1, padding='same',
                kernel_initializer='lecun_normal')(x2)
    x2 = BatchNormalization()(x2)
    
    x = Add()([x1, x2])
    
    x = Conv1D(filters=96, activation='swish', 
               kernel_size=3, strides=2, padding='same',
               kernel_initializer='lecun_normal')(x)
    x = BatchNormalization()(x)
    
    x = Flatten()(x)
    x = Add()([x, xi])
    
    x = Dense(units=192, activation='swish',
              kernel_initializer='lecun_normal')(x)
    x = BatchNormalization()(x)
    x = Dropout(rate=0.3)(x)
    
    x = Dense(units=96, activation='swish',
              kernel_initializer='lecun_normal')(x)
    x = BatchNormalization()(x)
    x = Dropout(rate=0.2)(x)
    
    x_output = Dense(units=1, activation='sigmoid',
                     kernel_initializer='lecun_normal')(x)

    model = Model(inputs=x_input, 
                  outputs=x_output, 
                  name='TPS_May22_TF_Model')
    return model

In [None]:
model = dnn_model()
model.summary()

In [None]:
plot_model(
    model, to_file='./TPS_May22_TF_Model.png', 
    show_shapes=True, show_layer_names=True
)

In [None]:
FOLD = 10
SEEDS = [42]

lr_start = 1e-2
lr_end = 1e-4
batch_size = 2048
verbose = 0
epochs = 75

counter = 0
oof_score = 0
y_pred_final_dnn = np.zeros((test.shape[0], 1))
y_pred_meta_dnn = np.zeros((train.shape[0], 1))


for sidx, seed in enumerate(SEEDS):
    seed_score = 0
    
    kfold = StratifiedKFold(n_splits=FOLD, shuffle=True, random_state=seed)

    for fold, (train_idx, val_idx) in enumerate(kfold.split(train[features], train['target'])):
        counter += 1

        train_x, train_y = train[features].iloc[train_idx], train['target'].iloc[train_idx]
        val_x, val_y = train[features].iloc[val_idx], train['target'].iloc[val_idx]

        model = dnn_model()
        model.compile(optimizer=Adamax(learning_rate=lr_start), 
                      loss="binary_crossentropy", metrics=['AUC'])

        lr = LearningRateScheduler(cosine_decay, verbose=verbose)

        chk_point = ModelCheckpoint(f'./TPS_May22_TF_Model_{fold+1}C.h5', 
                                    monitor='val_auc', verbose=verbose, 
                                    save_best_only=True, mode='max')

        model.fit(
            train_x, train_y, 
            validation_data=(val_x, val_y), 
            epochs=epochs,
            verbose=verbose,
            batch_size=batch_size, 
            callbacks=[lr, chk_point, TerminateOnNaN()]
        )

        model = load_model(f'./TPS_May22_TF_Model_{fold+1}C.h5')

        y_pred = model.predict(val_x, batch_size=batch_size, verbose=verbose)
        y_pred_meta_dnn[val_idx] += np.array([scipy.stats.rankdata(y_pred)]).T
        y_pred_final_dnn += np.array([
            scipy.stats.rankdata(
                model.predict(test, batch_size=batch_size, verbose=verbose))
        ]).T
        
        score = roc_auc_score(val_y, y_pred)
        oof_score += score
        seed_score += score
        print("Seed-{} | Fold-{} | OOF Score: {}".format(seed, fold+1, score))
        
        del model, y_pred
        del train_x, train_y
        del val_x, val_y
        gc.collect()
    
    print("\nSeed: {} | Aggregate OOF Score: {}\n\n".format(seed, (seed_score / FOLD)))


y_pred_meta_dnn = y_pred_meta_dnn / float(len(SEEDS))
y_pred_final_dnn = y_pred_final_dnn / float(counter)
oof_score /= float(counter)
print("Aggregate OOF Score: {}".format(oof_score))

In [None]:
y_pred_meta_dnn = y_pred_meta_dnn / np.max(y_pred_meta_dnn)
y_pred_final_dnn = y_pred_final_dnn / np.max(y_pred_final_dnn)

In [None]:
y_pred = (y_pred_meta_dnn > 0.5).astype(int)
print(classification_report(train['target'], y_pred))

cnf_matrix = confusion_matrix(train['target'], y_pred, labels=[0, 1])
np.set_printoptions(precision=2)
plt.figure(figsize=(12, 5))
plot_confusion_matrix(cnf_matrix, classes=[0, 1])

## Create submission file

In [None]:
sub = pd.read_csv("../input/tabular-playground-series-may-2022/sample_submission.csv")
sub['target'] = y_pred_final_dnn.ravel()
sub.to_csv("./dnn_submission.csv", index=False)
sub.head()

In [None]:
## Good Day!!