My starting point for doing Tensorflow/Keras for TPS Nov 2021

My first model is build_model01().  I will add additional models, keeping the original for comparison.

# Versions

- V15: Love working on iPad but easier t make mistakes.  See V14.
- V14: Back to 3 hidden layers. 
- V12: batch_size=1024
- V11: descrease batch_size=128 4 hidden layers. This failed

- V4: build_model02_swish_dropout()
- V3: build_model03_swish()
  - Added lots of extra layers
- V2: build_model02_swish()
  - swish activation.  
  - Added Charts
  - 25 epochs
- V1: Original - build_model01()

# References

- [[Tutorial] TensorFlow 2.x For Tabular Data](https://www.kaggle.com/lucamassaron/tutorial-tensorflow-2-x-for-tabular-data)
- [PyTorch CV - EarlyStopping - LRScheduler](https://www.kaggle.com/yusufmuhammedraji/pytorch-cv-earlystopping-lrscheduler)
- [TPS Nov 2021 pytorch lightning](https://www.kaggle.com/hiro5299834/tps-nov-2021-pytorch-lightning) LB: 0.74793

In [None]:
import os
import time

import pandas as pd
import numpy as np
from pathlib import Path

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import random

from sklearn.preprocessing import StandardScaler

from sklearn.metrics import roc_auc_score
from sklearn import model_selection
from sklearn.model_selection import KFold,StratifiedKFold, GroupKFold

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import layers
from tensorflow.keras.initializers import RandomUniform

# Configuration

In [None]:
class Config:
    debug = False
    competition = "TPS_202111"
    seed = 42
    n_folds = 5
    batch_size = 1024
    epochs = 200 # 25

In [None]:
def seed_everything(seed=Config.seed):
    np.random.seed(seed)
    random.seed(seed)
    tf.random.set_seed(seed)

In [None]:
data_dir = Path('../input/tabular-playground-series-nov-2021')

In [None]:
%%time
train_df = pd.read_csv(data_dir / "train.csv", 
#                        nrows=10000
                      )
test_df = pd.read_csv(data_dir / "test.csv",
#                      nrows=1000
                     )
sample_submission = pd.read_csv(data_dir / "sample_submission.csv")

print(f"train data: Rows={train_df.shape[0]}, Columns={train_df.shape[1]}")
print(f"test data : Rows={test_df.shape[0]}, Columns={test_df.shape[1]}")

In [None]:
train_df.head()

# Feature Engineering

In [None]:
features = [col for col in train_df.columns if col not in ('id', 'target')]

# Standardize/Normalize the Data

In [None]:
scaler = StandardScaler()

train_df[features] = scaler.fit_transform(train_df[features])
test_df[features] = scaler.transform(test_df[features])

# Extract Target and Drop Unused Columns

In [None]:
y = train_df.target

test = test_df.drop(columns=["id"], axis=1)
X = train_df.drop(columns=["id", "target"], axis=1)

# Models

## First Keras Model

Keeping it simple

In [None]:
def build_model01(x_shape):
    
    inputs = keras.Input(shape=x_shape)

    x = keras.layers.Dense(64, activation="relu")(inputs)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Dense(32, activation="relu")(x)
    x = keras.layers.BatchNormalization()(x)
#     x = keras.layers.Dropout(0.1)(x)

    outputs = keras.layers.Dense(1, activation="sigmoid")(x)
    
    model = keras.Model(inputs, outputs)
    
    roc_auc = tf.keras.metrics.AUC(name='roc_auc', curve='ROC')

    model.compile(
        loss="binary_crossentropy", optimizer="adam", metrics=[roc_auc]
    )
    return model

# Model 2: Use swish activation

In [None]:
def build_model02_swish(x_shape):
    inputs = keras.Input(shape=x_shape)

    x = keras.layers.Dense(64, activation="swish")(inputs)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Dense(32, activation="swish")(x)
    x = keras.layers.BatchNormalization()(x)
#     x = keras.layers.Dropout(0.1)(x)

    outputs = keras.layers.Dense(1, activation="sigmoid")(x)
    
    model = keras.Model(inputs, outputs)
    
    roc_auc = tf.keras.metrics.AUC(name='roc_auc', curve='ROC')

    model.compile(
        loss="binary_crossentropy", optimizer="adam", metrics=[roc_auc, "acc"]
    )
    return model

In [None]:
def build_model02_swish_dropout(x_shape, activation):
    inputs = keras.Input(shape=x_shape)

#    activation = "swish"
#    x = keras.layers.Dense(256, activation=activation)(inputs)
#    x = keras.layers.Dropout(0.3)(x)
    x = keras.layers.Dense(128, activation=activation)(inputs)
    x = keras.layers.Dropout(0.2)(x)
    x = keras.layers.Dense(64, activation=activation)(x)
    x = keras.layers.Dropout(0.2)(x)

#    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Dense(32, activation="swish")(x)
    x = keras.layers.Dropout(0.2)(x)

    outputs = keras.layers.Dense(1, activation="sigmoid")(x)
    
    model = keras.Model(inputs, outputs)
    
    roc_auc = tf.keras.metrics.AUC(name='roc_auc', curve='ROC')

    model.compile(
        loss="binary_crossentropy", optimizer="adam", metrics=[roc_auc, "acc"]
    )
    return model

In [None]:
# Model03 + swish

In [None]:
def build_model03_swish(x_shape):
    inputs = keras.Input(shape=x_shape)

    x = keras.layers.Dense(96, activation="swish")(inputs)
    x = keras.layers.Dense(192, activation="swish")(inputs)
    x = keras.layers.BatchNormalization()(x)

    x = keras.layers.Dense(128, activation="swish")(inputs)
    x = keras.layers.BatchNormalization()(x)
    
    x = keras.layers.Dense(32, activation="swish")(x)
    x = keras.layers.BatchNormalization()(x)

    x = keras.layers.Dense(32, activation="swish")(x)
    x = keras.layers.BatchNormalization()(x)

    outputs = keras.layers.Dense(1, activation="sigmoid")(x)
    
    model = keras.Model(inputs, outputs)
    
    roc_auc = tf.keras.metrics.AUC(name='roc_auc', curve='ROC')

    model.compile(
        loss="binary_crossentropy", optimizer="adam", metrics=[roc_auc, "acc"]
    )
    return model

In [None]:
model = build_model02_swish_dropout(x_shape=(X.shape[1],), activation="swish")
model.summary()

# Cross Validation

In [None]:
seed_everything()

final_test_predictions = []
final_valid_predictions = {}
scores = []
histories = []

kf = StratifiedKFold(n_splits=Config.n_folds, random_state=Config.seed, shuffle=True)

for fold, (train_idx, valid_idx) in enumerate(kf.split(X = X, y = y)):
    print(10*"=", f"Fold={fold+1}", 10*"=")
    start_time = time.time()

    x_train = X.loc[train_idx, :]
    x_valid = X.loc[valid_idx, :]
    
    y_train = y[train_idx]
    y_valid = y[valid_idx]
    
    model = build_model02_swish_dropout(x_shape=(X.shape[1],), activation="swish")

    early_stopping_cb = keras.callbacks.EarlyStopping(monitor="val_auc",
                                                      mode='max',
                                                      verbose=1,
                                                      restore_best_weights=True,
                                                      patience=3)
    
    lr_scheduler_cb = keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss', 
        factor=0.2,
        patience=10,
        mode='min'
    )
    
    history = model.fit(X, y,
              callbacks=[early_stopping_cb, lr_scheduler_cb],
                  batch_size=Config.batch_size,
              validation_data=(x_valid, y_valid),
              epochs=Config.epochs
             )
    histories.append(history)

    # Predictions for OOF
    print("--- Predicting OOF ---")
    preds_valid = model.predict(x_valid)[:, -1]
    final_valid_predictions.update(dict(zip(valid_idx, preds_valid)))
    
    auc = roc_auc_score(y_valid,  preds_valid)
    scores.append(auc)

    run_time = time.time() - start_time
    
    # Predictions for Test Data
    print("--- Predicting Test Data ---")
    test_preds = model.predict(test_df[features])[:, -1]
    final_test_predictions.append(test_preds)
    print(f"Fold={fold+1}, auc: {auc:.8f}, Run Time: {run_time:.2f}")


# Scores

In [None]:
print(f"Scores -> Adjusted: {np.mean(scores) - np.std(scores):.8f} , mean: {np.mean(scores):.8f}, std: {np.std(scores):.8f}")

# History

In [None]:
def plot_history(history, metric, val_metric, title):

    loss = history.history[metric]
    val_loss = history.history[val_metric]

    epoch = history.epoch

    plt.figure(figsize=(11, 4))
    
    plt.plot(epoch, loss, label=metric, color="r")
    plt.plot(epoch, val_loss, label=val_metric, color="b")

    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()
    plt.grid(True)
    plt.title(f"Training and Validation {title}")

    plt.show()


In [None]:
history.history.keys()

In [None]:
for fold, h in enumerate(histories):
    print(20*'=', f"Fold = {fold+1}", 20*'=')

    plot_history(h, "acc", "val_acc", "Accuracy")

    plot_history(h, "loss", "val_loss", "Loss")
    plot_history(h, "roc_auc", "val_roc_auc", "AUC")

    plt.show()


# Submission File

In [None]:
sample_submission['target'] = np.mean(np.column_stack(final_test_predictions), axis=1)
sample_submission.to_csv("test_pred_2.csv",index=None)
sample_submission.to_csv("submission.csv",index=None)
sample_submission