## TPS-10-21: DNN

## Setup

In [None]:
import numpy as np
import pandas as pd 
from tensorflow import keras
from sklearn.model_selection import train_test_split
import keras_tuner as kt
import tensorflow as tf
import gc

## Configuration

In [None]:
batch_size = 128

## Import and preprocess datasets

In [None]:
train = pd.read_csv("/kaggle/input/tabular-playground-series-oct-2021/train.csv")

In [None]:
train.head()

In [None]:
train.pop("id")
target = train.pop("target")

In [None]:
train_mean = train.mean()
train_std = train.std()

In [None]:
train = (train - train_mean) / train_std

In [None]:
X_train, X_val, y_train, y_val = train_test_split(train, target, test_size=0.15, random_state=42)

In [None]:
X_train.shape, X_val.shape, y_train.shape, y_val.shape

In [None]:
del train
del target
gc.collect()

In [None]:
portion = 0.01
train_indices = np.random.choice(X_train.shape[0], int(X_train.shape[0] * portion))
X_train_subset = X_train.iloc[train_indices]
y_train_subset = y_train.iloc[train_indices]
val_indices = np.random.choice(X_val.shape[0], int(X_val.shape[0] * portion))
X_val_subset = X_val.iloc[val_indices]
y_val_subset = y_val.iloc[val_indices]

## Model Development

In [None]:
def build_model(hp):
    inputs = tf.keras.layers.Input((X_train.shape[1]))
    width = hp.Choice('width', [16, 32, 64, 128])
    depth = hp.Choice('depth', [3, 4, 5, 6, 7, 8, 9, 10])
    x = keras.layers.Dense(
            width, 
            activation='relu'
        )(inputs)
    for i in range(depth - 1):
        x = keras.layers.Dense(
            width, 
            activation=hp.Choice('activation', ["relu", "elu", "linear"])
        )(x)
        x = keras.layers.Dropout(
            hp.Choice('dropout', [0.1, 0.2, 0.3, 0.4, 0.5])
        )(x)
    output = keras.layers.Dense(1, activation="sigmoid")(x)
    model = keras.Model(inputs=inputs, outputs=output)
    adam = keras.optimizers.Adam(learning_rate=hp.Float("learing_rate", 1e-5, 5e-3))
    model.compile(loss='binary_crossentropy', optimizer=adam, metrics=["accuracy", keras.metrics.AUC()])
    return model

In [None]:
tuner = kt.RandomSearch(
    build_model,
    objective=kt.Objective("val_auc", direction="max"),
    max_trials=100)
tuner.search(x=X_train_subset, y=y_train_subset, epochs=5, validation_data=(X_val_subset, y_val_subset))
best_model = tuner.get_best_models()[0]
keras.utils.plot_model(best_model, show_shapes=True)

In [None]:
best_hp = tuner.get_best_hyperparameters()[0]

Here is best parameters:

In [None]:
for param in ["width", "depth", "learing_rate"]:
    print("%s:"%(param), best_hp.get(param))

In [None]:
keras.backend.clear_session()

In [None]:
model = tuner.hypermodel.build(best_hp)

In [None]:
model_checkpoint_path = "model.h5"
early_stopping = keras.callbacks.EarlyStopping(patience=10)
model_checkpoint = keras.callbacks.ModelCheckpoint(model_checkpoint_path, save_best_only=True)
reduce_lr =  keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                              patience=5, min_lr=1e-7)

In [None]:
history = model.fit(x=X_train, y=y_train, epochs=50, validation_data=(X_val, y_val), callbacks=[early_stopping, model_checkpoint, reduce_lr])

In [None]:
model.load_weights(model_checkpoint_path)

In [None]:
del X_train
del y_train
del X_val
del y_val
gc.collect()

In [None]:
pd.DataFrame(history.history).plot()

## Submission

In [None]:
test = pd.read_csv("/kaggle/input/tabular-playground-series-oct-2021/test.csv")
_ = test.pop("id")
test = (test - train_mean) / train_std

In [None]:
sample_submission = pd.read_csv("/kaggle/input/tabular-playground-series-oct-2021/sample_submission.csv")
y_pred = model.predict(test).reshape(-1)
print(y_pred.shape)
sample_submission["target"] = y_pred
sample_submission.to_csv("submission.csv", index=False)