## TPS-11-21: CNN+DNN

## OverView
In this Notebook, I will create a TPS prediction Model using Both CNN and DNN architecutre with skip connections. I will do hyperparameter tuning using Keras Tuner. Since this dataset is so big, I only choose a small sample for hyperparameter tuning. After finding the best Model, I keep training this Model with full dataset.

## Setup

In [None]:
import numpy as np
import pandas as pd 
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow import keras
from sklearn.model_selection import train_test_split
import keras_tuner as kt
import gc

## Configuration

In [None]:
class Config:
    validation_split = 0.2
    dataset_name = "tabular-playground-series-nov-2021"
    train_path = "/kaggle/input/%s/train.csv"%(dataset_name)
    test_path = "/kaggle/input/%s/test.csv"%(dataset_name)
    sample_submission_path = "/kaggle/input/%s/sample_submission.csv"%(dataset_name)
    id_field = "id"
    label_field = "target"
    hyperparameter_tuning_trial = 50
    epochs = 50
    train_with_fulldataset = True
    sample_rate = 0.05
    model_path = "model.h5"
    submission_path = "submission.csv"
    batch_size = 1024
config = Config()

## Import and preprocess datasets

In [None]:
train_features = pd.read_csv(config.train_path)
train_features.head()

In [None]:
train_features.pop(config.id_field)
train_targets = train_features.pop(config.label_field)

In [None]:
train_max = train_features.max()
train_features = train_features / train_max

In [None]:
X_train, X_val, y_train, y_val = train_test_split(train_features, train_targets, test_size=config.validation_split, random_state=42)

In [None]:
X_train.shape, X_val.shape, y_train.shape, y_val.shape

In [None]:
del train_features
del train_targets
gc.collect()

### Choose a small sample for hyperparameter tuning

In [None]:
train_indices = np.random.choice(X_train.shape[0], int(X_train.shape[0] * config.sample_rate))
X_train_subset = X_train.iloc[train_indices]
y_train_subset = y_train.iloc[train_indices]
val_indices = np.random.choice(X_val.shape[0], int(X_val.shape[0] * config.sample_rate))
X_val_subset = X_val.iloc[val_indices]
y_val_subset = y_val.iloc[val_indices]

## Model Development

In [None]:
def residual_block(x, filters, kernel_size):
    residual = x
    x = layers.Conv2D(filters, kernel_size, activation="relu", padding="same")(x)
    x = layers.Conv2D(filters, kernel_size, activation="relu", padding="same")(x)
    x = layers.AveragePooling2D(2, padding="same")(x)
    residual = layers.Conv2D(filters, 1, strides=2)(residual)
    x = layers.add([x, residual])
    return x

In [None]:
def build_model(hp):
    width = hp.Choice('dnn_width', [16, 32, 64])
    depth = hp.Choice('dnn_depth', [3, 6, 9])
    dropout = hp.Choice('dropout', [0.1, 0.2, 0.3])
    inputs = tf.keras.layers.Input((100))
    cnn_x = tf.keras.layers.Reshape((10, 10, 1))(inputs)
    for i in range(3):
        filters =  16 * (2 ** (i + 1))
        kernel_size = 5 if i == 0 else 3
        cnn_x = residual_block(cnn_x, filters, kernel_size)
    cnn_x = tf.keras.layers.GlobalAveragePooling2D()(cnn_x)
    for i in range(depth):
        if i == 0:
            dnn_x = inputs
        dnn_x = keras.layers.Dense(
            width, 
            activation="swish"
        )(dnn_x)
        if (i + 1) % 3 == 0:
            dnn_x = keras.layers.BatchNormalization()(dnn_x)
            dnn_x = keras.layers.Dropout(dropout)(dnn_x)
            dnn_x = keras.layers.Concatenate()([dnn_x, inputs])
    x = keras.layers.Concatenate()([cnn_x, dnn_x])
    x = keras.layers.Dropout(dropout)(x)
    output = keras.layers.Dense(1, activation="sigmoid")(x)
    model = keras.Model(inputs=inputs, outputs=output)
    adam = keras.optimizers.Adam(learning_rate=hp.Float("learing_rate", 1e-5, 5e-3))
    model.compile(loss='binary_crossentropy', optimizer=adam, metrics=["accuracy", keras.metrics.AUC()])
    return model

In [None]:
tuner = kt.BayesianOptimization(
    build_model,
    objective=kt.Objective("val_auc", direction="max"),
    max_trials=config.hyperparameter_tuning_trial,
    directory="tps_cnn_dnn"
)
tuner.search(x=X_train_subset, y=y_train_subset, epochs=5, batch_size=config.batch_size, validation_data=(X_val_subset, y_val_subset))
best_model = tuner.get_best_models()[0]
keras.utils.plot_model(best_model, show_shapes=True)

Here are best parameters:

In [None]:
# Some of best parameters
# {'dnn_width': 64, 'dnn_depth': 9, 'dropout': 0.2, 'learing_rate': 0.005}
# {'dnn_width': 64, 'dnn_depth': 6, 'dropout': 0.3, 'learing_rate': 0.005}
best_hp = tuner.get_best_hyperparameters()[0]
best_hp.get_config()["values"]

In [None]:
if not config.train_with_fulldataset:
    model = best_model
else:
    keras.backend.clear_session()
    model = tuner.hypermodel.build(best_hp)
    early_stopping = keras.callbacks.EarlyStopping(patience=10)
    model_checkpoint = keras.callbacks.ModelCheckpoint(config.model_path, save_best_only=True)
    reduce_lr =  keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                              patience=5, min_lr=1e-7)
    history = model.fit(x=X_train, y=y_train, epochs=config.epochs, batch_size=config.batch_size, validation_data=(X_val, y_val), callbacks=[early_stopping, model_checkpoint, reduce_lr])
    model.load_weights(config.model_path)
    pd.DataFrame(history.history).plot()

In [None]:
del X_train
del y_train
del X_val
del y_val
gc.collect()

## Submission

In [None]:
test = pd.read_csv(config.test_path)
_ = test.pop(config.id_field)
test = test / train_max
# Submit probabilities has higher score than labels
y_pred = model.predict(test)
#y_pred = np.array(model.predict(test).reshape(-1) > 0.5, dtype=int)
print(y_pred.shape)
sample_submission = pd.read_csv(config.sample_submission_path)
sample_submission[config.label_field] = y_pred
sample_submission.to_csv(config.submission_path, index=False)