## Import libraries

In [None]:
import gc
import itertools
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import StratifiedKFold

import tensorflow as tf
from tensorflow.keras.optimizers import Adam, Adamax
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.layers import Concatenate
from tensorflow.keras.layers import Input, BatchNormalization
from tensorflow.keras.layers import Dense, Dropout, Multiply

np.random.seed(42)
tf.random.set_seed(42)

## Load source datasets

In [None]:
train_df = pd.read_csv("../input/tabular-playground-series-nov-2021/train.csv")
train_df.set_index('id', inplace=True)
print(f"train_df: {train_df.shape}")
train_df.head()

In [None]:
test_df = pd.read_csv("../input/tabular-playground-series-nov-2021/test.csv")
test_df.set_index('id', inplace=True)
print(f"test_df: {test_df.shape}")
test_df.head()

## Feature Engineering

In [None]:
features = test_df.columns.tolist()
len(features)

In [None]:
for col in tqdm(features):
    train_df[col+'_bin'] = train_df[col].apply(lambda x: 1 if np.cbrt(x)>0 else 0)
    test_df[col+'_bin'] = test_df[col].apply(lambda x: 1 if np.cbrt(x)>0 else 0)

print(f"train_df: {train_df.shape} \ntest_df: {test_df.shape}")
train_df.head()

In [None]:
features = test_df.columns.tolist()
print(f"Num features: {len(features)}")

In [None]:
train_df[features] = train_df[features].astype('float32')
test_df[features] = test_df[features].astype('float32')
print(f"train_df: {train_df.shape} \ntest_df: {test_df.shape}")

## Helper Function

In [None]:
def plot_confusion_matrix(cm, classes):

    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title('Confusion matrix', fontweight='bold', pad=15)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=0)
    plt.yticks(tick_marks, classes)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], 'd'),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label', fontweight='bold')
    plt.xlabel('Predicted label', fontweight='bold')
    plt.tight_layout()

## Keras Model

In [None]:
def dnn_model():
    
    x_input = Input(shape=(len(features),))
    
    x1 = Dense(units=384, activation='selu')(x_input)
    x1 = BatchNormalization()(x1)
    x2 = Dropout(rate=0.45)(x1)
    
    x2 = Dense(units=192, activation='selu')(x2)
    x2 = BatchNormalization()(x2)
    x3 = Dropout(rate=0.35)(x2)
    
    x3 = Dense(units=96, activation='selu')(x3)
    x3 = BatchNormalization()(x3)
    x3 = Dropout(rate=0.25)(x3)
    
    x4 = Dense(units=192, activation='selu')(x3)
    x4 = BatchNormalization()(x4)
    x4 = Multiply()([x2, x4])
    x4 = Dropout(rate=0.35)(x4)
    
    x5 = Dense(units=384, activation='selu')(x4)
    x5 = BatchNormalization()(x5)
    x5 = Multiply()([x1, x5])
    x5 = Dropout(rate=0.45)(x5)
    
    x = Concatenate()([x3, x5])
    x = Dense(units=128, activation='selu')(x)
    x = BatchNormalization()(x)
    x = Dropout(rate=0.25)(x)
    
    x_output = Dense(units=1, activation='sigmoid')(x)

    model = Model(inputs=x_input, outputs=x_output, 
                  name='DNN_Model')
    return model

In [None]:
model = dnn_model()
model.summary()

In [None]:
FOLD = 7
VERBOSE = 0
SEEDS = [13, 18]
BATCH_SIZE = 1024

counter = 0
oof_score = 0
y_pred_final_dnn = np.zeros((test_df.shape[0], 1))
y_pred_meta_dnn = np.zeros((train_df.shape[0], 1))


for sidx, seed in enumerate(SEEDS):
    seed_score = 0
    
    kfold = StratifiedKFold(n_splits=FOLD, shuffle=True, random_state=seed)

    for idx, (train, val) in enumerate(kfold.split(train_df[features], train_df['target'])):
        counter += 1

        train_x, train_y = train_df[features].iloc[train], train_df['target'].iloc[train]
        val_x, val_y = train_df[features].iloc[val], train_df['target'].iloc[val]

        model = dnn_model()
        model.compile(optimizer=Adam(learning_rate=1e-2), 
                      loss="binary_crossentropy", 
                      metrics=['AUC'])

        lr = ReduceLROnPlateau(monitor="val_loss", factor=0.25, 
                               patience=4, verbose=VERBOSE)
        
        chk_point = ModelCheckpoint(f'./Keras_DNN_Model_{counter}C.h5', 
                                    monitor='val_loss', verbose=VERBOSE, 
                                    save_best_only=True, mode='min')

        es = EarlyStopping(monitor="val_loss", patience=15, 
                           verbose=VERBOSE, mode="min", 
                           restore_best_weights=True)
        
        model.fit(train_x, train_y, 
                  validation_data=(val_x, val_y), 
                  epochs=2000,
                  verbose=VERBOSE,
                  batch_size=BATCH_SIZE, 
                  callbacks=[lr, chk_point, es])
        
        model = load_model(f'./Keras_DNN_Model_{counter}C.h5')
        
        y_pred = model.predict(val_x, batch_size=BATCH_SIZE)
        y_pred_meta_dnn[val] += y_pred
        y_pred_final_dnn += model.predict(test_df, batch_size=BATCH_SIZE)
        
        score = roc_auc_score(val_y, y_pred)
        oof_score += score
        seed_score += score
        print("\nSeed-{} | Fold-{} | OOF Score: {}\n".format(seed, idx, score))
        
        del model, y_pred
        del train_x, train_y
        del val_x, val_y
        gc.collect()
    
    print("\nSeed: {} | Aggregate OOF Score: {}\n\n".format(seed, (seed_score / FOLD)))


y_pred_meta_dnn = y_pred_meta_dnn / float(len(SEEDS))
y_pred_final_dnn = y_pred_final_dnn / float(counter)
oof_score /= float(counter)
print("Aggregate OOF Score: {}".format(oof_score))

In [None]:
y_pred_meta = np.mean(y_pred_meta_dnn, axis=1)
y_pred = (y_pred_meta>0.5).astype(int)
print(classification_report(train_df['target'], y_pred))

In [None]:
cnf_matrix = confusion_matrix(train_df['target'], y_pred, labels=[0, 1])
np.set_printoptions(precision=2)
plt.figure(figsize=(12, 5))
plot_confusion_matrix(cnf_matrix, classes=[0, 1])

## Save meta features

In [None]:
np.savez_compressed('./TPS_1121_DNN_Meta_Features.npz',
                    y_pred_meta_dnn=y_pred_meta_dnn,  
                    y_pred_final_dnn=y_pred_final_dnn)

## Create submission file

In [None]:
submit_df = pd.read_csv("../input/tabular-playground-series-nov-2021/sample_submission.csv")
submit_df['target'] = y_pred_final_dnn.ravel()
submit_df.to_csv("DNN_Submission.csv", index=False)
submit_df.head()