In [2]:
# ![Screen Shot 2021-11-09 at 7.52.31 AM.png](attachment:2f753b90-c10d-42de-8852-46b36e352d35.png)

In [3]:
import os

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"  # suppressing tensorflow warnings

import numpy as np
import pandas as pd
import platform
import seaborn as sns
import neptune.new as neptune
import glob
from neptune.new.integrations.tensorflow_keras import NeptuneCallback
from datetime import datetime
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import (
    train_test_split,
    StratifiedKFold,
    StratifiedShuffleSplit,
)
from sklearn.preprocessing import (
    StandardScaler,
    MinMaxScaler,
    RobustScaler,
    Normalizer,
    PowerTransformer,
    OneHotEncoder,
)
from scikitplot.metrics import plot_roc, plot_precision_recall
from tensorflow import keras
from keras import layers
from keras import Sequential
from keras.layers import Dense, Flatten, InputLayer, Dropout
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras.layers.normalization import BatchNormalization
import keras_tuner as kt

import matplotlib.pyplot as plt  # plotting learning curves for DNN

plt.style.use("seaborn-whitegrid")
plt.rc("figure", autolayout=True)
plt.rc(
    "axes",
    labelweight="bold",
    labelsize="medium",
    titleweight="bold",
    titlesize=14,
    titlepad=10,
)

In [4]:
running_on = platform.system()
if running_on == "Darwin":  # I'm a Mac
    drive_path = "/Users/pmm/My Drive/Colab Notebooks/TPS 2021-11"
elif running_on == "Windows":  # I'm a PC
    drive_path = r"C:\Users\pmm\My Drive\Colab Notebooks\TPS 2021-11"
elif "google.colab" in str(get_ipython()):
    drive_path = "/content"

path = drive_path
now = lambda: datetime.now().strftime("%Y-%m-%dT%Hh%Mm%Ss")
random_state = 53

In [5]:
train_data = pd.read_feather(os.path.join(path, "train.feather")).drop(
    columns=["id"]
)
test_data = pd.read_feather(os.path.join(path, "test.feather")).drop(
    columns=["id"]
)

In [6]:
# scaler = StandardScaler()
scaler = RobustScaler()
# scaler = MinMaxScaler()
# scaler = PowerTransformer()
col_names = test_data.columns
train_target = train_data["target"]
scaled_train_data = pd.concat(
    [
        pd.DataFrame(
            scaler.fit_transform(train_data.drop(columns=["target"])),
            columns=col_names,
        ),
        train_target,
    ],
    axis=1,
)
scaled_test_data = pd.DataFrame(scaler.transform(test_data), columns=col_names)

### Normalize

In [7]:
# norm = Normalizer()
# col_names = scaled_test_data.columns
# train_target = scaled_train_data["target"]
# norm_train_data = pd.concat(
#     [
#         pd.DataFrame(
#             norm.fit_transform(scaled_train_data.drop(columns=["target"])),
#             columns=col_names,
#         ),
#         train_target,
#     ],
#     axis=1,
# )
# norm_test_data = pd.DataFrame(norm.transform(scaled_test_data), columns=col_names)

In [8]:
y = scaled_train_data["target"]
X = scaled_train_data.drop(columns=["target"])
X_test = scaled_test_data
# y = norm_train_data["target"]
# X = norm_train_data.drop(columns=["target"])
# X_test = norm_test_data

### To Neptune and beyond...

In [9]:
def get_api_token(file):
    if os.path.isfile(file):
        api_file = open(file, "r")
        api_token = api_file.readline().rstrip()
    else:
        print(f"Neptune API token file {file} not found")
        print(f"Enter API token: ", end="")
        api_token = str(input()).rstrip()
    
    if api_token == "": 
        sys.exit("API token cannot be empty, numbnuts!")

    return api_token

In [10]:
from pathlib import Path
home = str(Path.home())

keyfile = os.path.join(home, ".neptune_api")
project = "pmoriarty/Keras-TPS-2021-11"
api_token = get_api_token(keyfile)

run = neptune.init(project=project, api_token=api_token,)

params = {
    "epochs": 99999,
    "batch_size": 1024,
    "verbose": 0,
    "dense_1": 128,
    "dense_2": 64,
    "dense_3": 32,
    #     "dense_4": 16,
    "drop_1": 0.5,
    "drop_2": 0.4,
#     "drop_3": 0.25,
    "activation": "swish",
}

run["hyper-parameters"] = params

https://app.neptune.ai/pmoriarty/Keras-TPS-2021-11/e/KER1-23
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.


### Building the network architecture 
Experimented with number of hidden layers and number of units in layers. Increasing hidden layers upto 4 resulted in minor gains in performance.

In [11]:
early_stopping = EarlyStopping(
    monitor="val_loss",
    min_delta=0,
    patience=20,
    verbose=0,
    mode="min",
    baseline=None,
    restore_best_weights=True,
)

reduce_lr = ReduceLROnPlateau(
    monitor="val_loss", factor=0.2, patience=5, mode="min"
)

neptune_cbk = NeptuneCallback(run=run, base_namespace="metrics")

In [12]:
def get_model(name: str):
    input_shape = [X_train.shape[1]]
    model = keras.Sequential(name=name)

    model.add(InputLayer(input_shape=input_shape))

    model.add(Dense(params["dense_1"], activation=params["activation"]))
    model.add(BatchNormalization())
    model.add(Dropout(rate=params["drop_1"]))

    model.add(Dense(params["dense_2"], activation=params["activation"]))
    model.add(BatchNormalization())
    model.add(Dropout(rate=params["drop_2"]))

    model.add(Dense(params["dense_3"], activation=params["activation"]))
#     model.add(BatchNormalization())
#     model.add(Dropout(rate=params["drop_3"]))

    #     model.add(Dense(params["dense_4"], activation=params["activation"]))
    #     model.add(BatchNormalization())
    #     model.add(Dropout(params["fourth_drop"]))

    model.add(Dense(1, activation="sigmoid"))

    return model

**Training the model**  


In [None]:
cv = StratifiedKFold(n_splits=20, shuffle=True, random_state=random_state)
# cv = StratifiedShuffleSplit(
#     n_splits=5, test_size=0.10, random_state=random_state
# )

n_folds = range(cv.n_splits)
scores = {fold: None for fold in n_folds}
predictions = []

for fold, (idx_train, idx_val) in enumerate(cv.split(X, y)):
    X_train, y_train = X.iloc[idx_train], y.iloc[idx_train]
    X_val, y_val = X.iloc[idx_val], y.iloc[idx_val]

    model = get_model(name="Baseline")

    model.compile(
        optimizer="adam", loss="binary_crossentropy", metrics=["AUC"]
    )

    print(f"Fold #{fold + 1:2d}: ", end="")

    history = model.fit(
        X_train,
        y_train,
        validation_data=(X_val, y_val),
        # batch_size=1024,
        batch_size=params["batch_size"],
        # epochs=99999,
        epochs=params["epochs"],
        # verbose=0,
        verbose=params["verbose"],
        callbacks=[early_stopping, reduce_lr, neptune_cbk],
    )

    scores[fold] = history.history

    max_auc = np.max(scores[fold]["val_auc"])
    print(f"Max AUC = {max_auc:8.6f}")

    # thresh = 0.755
    # if max_auc < thresh:
    #     print(f"Bailing out! AUC is less than {thresh}")
    #     break

    prediction = model.predict(X_test).reshape(1, -1)[0]
    predictions.append(prediction)

overall_auc = [np.max(scores[fold]["val_auc"]) for fold in n_folds]
print(f"Training Mean AUC = {np.mean(overall_auc):8.6f}")
run["Training Mean AUC"] = np.mean(overall_auc)

Max AUC = 0.759391
Fold # 2: Max AUC = 0.755943
Fold # 3: Max AUC = 0.760105
Fold # 4: Max AUC = 0.756662
Fold # 5: Max AUC = 0.759871
Fold # 6: 

In [None]:
### plot train versus validation loss for each epoch
fig, ax = plt.subplots(5, 4, tight_layout=True, figsize=(20, 20))
ax = ax.flatten()

for fold in n_folds:
    df_eval = pd.DataFrame(
        {"train_auc": scores[fold]["auc"], "val_auc": scores[fold]["val_auc"],}
    )

    min_train = np.round(np.min(df_eval["train_auc"]), 5)
    min_val = np.round(np.min(df_eval["val_auc"]), 5)
    delta = np.round(min_val - min_train, 5)

    sns.lineplot(
        x=df_eval.index,
        y=df_eval["train_auc"],
        label="train_auc",
        ax=ax[fold],
    )

    sns.lineplot(
        x=df_eval.index, y=df_eval["val_auc"], label="val_auc", ax=ax[fold],
    )

    ax[fold].set_ylabel("")
    ax[fold].set_xlabel(
        f"Fold {fold+1}\nmin_train: {min_train}\nmin_val: {min_val}\ndelta: {delta}",
        fontstyle="italic",
    )

sns.despine()

In [None]:
model.summary()

In [None]:
model.save("my_model")

run["my_model/saved_model"].upload("my_model/saved_model.pb")
for name in glob.glob("my_model/variables/*"):
    run[name].upload(name)

In [None]:
model_name = type(model).__name__
submission_file = f"{drive_path}/submission_{model_name}_{now()}.csv"

# y_pred = model.predict(X_test).ravel()
submission = pd.read_csv(f"{path}/sample_submission.csv")
submission["target"] = np.mean(np.column_stack(predictions), axis=1)
submission.to_csv(submission_file, index=False)

In [None]:
model_params = model.get_weights()
val_auc = np.mean(overall_auc)
print("test auc:")
test_auc = float(input())
timestamp = datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
exp_f = os.path.join(drive_path, "Experiments.csv")
cols = ["date", "model", "params", "val_auc", "test_auc"]
experiment = pd.DataFrame(
    [[timestamp, model_name, model_params, val_auc, test_auc]], columns=cols,
)

if os.path.exists(exp_f):
    experiments = pd.read_csv(exp_f)
else:
    experiments = pd.DataFrame(columns=cols)

experiments = pd.concat([experiments, experiment], ignore_index=True)
experiments.to_csv(exp_f, index=False)

**--- Batch Size = 1024 / StratifiedKFold (n_splits=5) / StandardScaler**  
Overall Mean AUC:  0.757540249824524   - 128-64  
Overall Mean AUC:  0.7558159351348877 - Minus final 64 layer  
Overall Mean AUC:  0.7574994564056396 - 64 / Norm / 0.5 dropout  
Overall Mean AUC:  0.7572387456893921 - 100-100-100 / Norm / 0.5 dropout  
Overall Mean AUC:  0.7573859572410584 - 100-75-50 / Norm / 0.5 dropout  
Overall Mean AUC:  0.7493987083435059 - 10-10-10-10-8 / Norm / 0.5  
Overall Mean AUC:  0.7573502540588379 - 128-128-128-64 / 0.5 / no Norm on final layer  
Overall Mean AUC:  0.7572739481925964 - 128-128-64 / 0.5 / no Norm on final layer  
Overall Mean AUC:  0.7574271202087403 - 128-96-128 / 0.5 / no Norm on final layer  
Overall Mean AUC:  0.7574907898902893 - 128-96-64 / Norm / 0.5  
**--- StratifiedShuffleSplit (n_splits=5)---**  
Overall Mean AUC:  0.757282507419586  - 128-96-64 / Norm / 0.5  
**--- Added Normalizer after Scaler ---**    
Overall Mean AUC:  0.7565243363380432 - 128-96-64 / Norm / 0.5  
**--- Test Split = 0.33 ---**    
Overall Mean AUC:  0.7556142687797547 - 128-96-64 / Norm / 0.5  
**--- Test Split = 0.10 ---**   
Overall Mean AUC:  0.7565118908882141 - 128-96-64 / Norm / 0.5  
**--- StratifiedKFold ---**    
Overall Mean AUC:  0.7566200375556946 - 128-64 / 0.5  
Overall Mean AUC:  0.7564255118370056 - 128-64 / Norm / 0.5  
**--- Removed Normalizer ---**    
Overall Mean AUC:  0.7570324659347534 - 128-64  
Overall Mean AUC:  0.7570019364356995 - 128-64 / Norm  
Overall Mean AUC:  0.7556699037551879 - 128-64 / Norm / 0.5  
**--- Batch Size = 2048 ---**    
Overall Mean AUC:  0.7566995620727539 - 128-64  
**--- Batch Size = 512 ---**   
Overall Mean AUC:  0.7569037437438965 - 128-64
Overall Mean AUC:  0.7569687366485596 - 64-32  
Overall Mean AUC:  0.7567706227302551 - 32-16
Overall Mean AUC:  0.7530458807945252 - 32-24-16  
**--(n_splits=10) RobustScaler--**  
Overall Mean AUC:  0.7581023335456848 - 128-64 / Norm / 0.5   
**--MinMaxScaler--**  
Overall Mean AUC:  0.7551696360111236 - 128-64 / Norm / 0.5   
**--PowerTransformer--**  
Overall Mean AUC:  0.7544887363910675 - 128-64 / Norm / 0.5   
**--(n_splits=20) RobustScaler--**  
Overall Mean AUC:  0.7586091578006744 - 128-64 / Norm / 0.5   
Overall Mean AUC:  0.7508559554815293 - 80-64-32-16 / Norm / 0.5  
Overall Mean AUC:  0.754138734936714  - 128 / Norm  
Training Mean AUC = 0.758493 - 80-64-32 / Norm  
Training Mean AUC = 0.758373 - 80-64-32  
Training Mean AUC = 0.758657 - 80-64-32 (.4 dropout)  
Training Mean AUC = 0.758585 - 80-64-32 (.4 dropout) / Norm  
Training Mean AUC = 0.758751 - 128-64-32 / Norm  
Training Mean AUC = 0.758751 - 128-64-32 (.4 dropout) / Norm  
Training Mean AUC = 0.758215 - 128-64-32 (.3 dropout) / Norm  
Training Mean AUC = 0.756906 - 128-64-32 (.6 dropout) / Norm  
Training Mean AUC = 0.758297 - 128-64-32 (.55 dropout) / Norm  
Training Mean AUC = 0.758664 = 128-64-32 (.5 / .4 dropout) / Norm

### Save Model

In [None]:
for name in glob.glob("my_model/variables/*"):
    print(name)

In [None]:
# Saving the architecture to a txt file:
from contextlib import redirect_stdout

with open(f"./{model_name}_arch.txt", "w") as f:
    with redirect_stdout(f):
        model.summary()

# Log it to Neptune:
run[f"io_files/artifacts/{model_name}_arch"].upload(f"./{model_name}_arch.txt")

In [None]:
run.stop()

In [None]:
def reset_gpu():
    from numba import cuda

    device = cuda.get_current_device()
    device.reset()

In [None]:
# reset_gpu()

### Credits

https://www.kaggle.com/mlanhenke/tps-11-nn-baseline-keras  
https://www.kaggle.com/stiwar1/tps-nov-21-neural-network-baseline