# Neural Network Baseline

In this notebook, we get a baseline for a Keras model.

In [1]:
# Global variables for testing changes to this notebook quickly
RANDOM_SEED = 0
NUM_FOLDS = 10

In [2]:
# General Imports
import numpy as np
import pandas as pd
import scipy.stats as stats
import time
import os
import pyarrow
import gc
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

# Logging/Warnings
import warnings
warnings.filterwarnings('ignore')

# Tensorflow/Keras
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow import keras


# Keras imports
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers.schedules import CosineDecayRestarts
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

# TF addons
from tensorflow_addons.optimizers import SWA, Lookahead, AdamW

# Model evaluation
from sklearn.base import clone
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler, PowerTransformer
from sklearn.preprocessing import RobustScaler, MinMaxScaler
from sklearn.metrics import roc_auc_score, accuracy_score

# Plotting
import matplotlib.pyplot as plt
%matplotlib inline

# Set Seed
tf.random.set_seed(RANDOM_SEED)

# Feature Engineering

In [3]:
%%time

# Load data
train = pd.read_feather(f'../data/train.feather')
test = pd.read_feather('../data/test.feather')
submission = pd.read_csv('../data/sample_submission.csv')

# Get feature columns
features = [x for x in train.columns if x not in ['id', 'target']]

Wall time: 824 ms


In [4]:
h_skew = train[features].loc[:,train[features].skew() >= 2].columns  # with Skewed 
l_skew = train[features].loc[:,train[features].skew() < 2].columns   # Bimodal

# Skewed distrubutions
train['median_h'] = train[h_skew].median(axis=1)
test['median_h'] = test[h_skew].median(axis=1)

train['var_h'] = train[h_skew].var(axis=1)
test['var_h'] = test[h_skew].var(axis=1)

# Bimodal distributions
train['mean_l'] = train[l_skew].mean(axis=1)
test['mean_l'] = test[l_skew].mean(axis=1)

train['std_l'] = train[l_skew].std(axis=1)
test['std_l'] = test[l_skew].std(axis=1)

train['median_l'] = train[l_skew].median(axis=1)
test['median_l'] = test[l_skew].median(axis=1)

train['skew_l'] = train[l_skew].skew(axis=1)
test['skew_l'] = test[l_skew].skew(axis=1)

train['max_l'] = train[l_skew].max(axis=1)
test['max_l'] = test[l_skew].max(axis=1)

train['var_l'] = train[l_skew].var(axis=1)
test['var_l'] = test[l_skew].var(axis=1)

# Update feature columns
features = [x for x in train.columns if x not in ['id', 'target']]

# Scoring Function

A benchmarking function which accepts two arguments:

* model_builder - a function which creates a keras model
* fit_params - parameters for the .fit method

In [5]:
# Scoring/Training Baseline Function
def train_model(sklearn_model):
    
    # Store the holdout predictions
    oof_preds = np.zeros((train.shape[0],))
    test_preds = np.zeros((test.shape[0],))
    scores = np.zeros(NUM_FOLDS)
    times = np.zeros(NUM_FOLDS)
    print('')
    
    # Stratified k-fold cross-validation
    skf = StratifiedKFold(n_splits = NUM_FOLDS, shuffle = True, random_state = RANDOM_SEED)
    for fold, (train_idx, valid_idx) in enumerate(skf.split(train, train['target'])):
        
        scaler = make_pipeline(
            StandardScaler(),
            MinMaxScaler()
        )
        
        # Training and Validation Sets
        X_train = scaler.fit_transform(train[features].iloc[train_idx])
        X_valid = scaler.transform(train[features].iloc[valid_idx])
        X_test = scaler.transform(test[features])
        
        # Training and Target Labels
        y_train = train['target'].iloc[train_idx].to_numpy()
        y_valid = train['target'].iloc[valid_idx].to_numpy()
        
        
        # Create model
        model = clone(sklearn_model)
            
        start = time.time()

        model.fit(X_train, y_train)
        
        end = time.time()
        
        # validation and test predictions
        valid_preds = model.predict_proba(X_valid)[:, 1]
        test_preds += model.predict_proba(X_test)[:, 1] / NUM_FOLDS
        oof_preds[valid_idx] = valid_preds
        
        # fold auc score
        fold_auc = roc_auc_score(y_valid, valid_preds)
        end = time.time()
        print(f'Fold {fold} (AUC): {round(fold_auc, 5)} in {round(end-start,2)}s.')
        scores[fold] = fold_auc
        times[fold] = end-start
        
        time.sleep(0.5)
        
    print("\nAverage AUC:", round(scores.mean(), 5))
    print("Worst AUC:", round(scores.min(), 5))
    print(f'Training Time: {round(times.sum(), 2)}s')
    
    return scores, test_preds, oof_preds

# MLP w/ Learning Rate Decay

We follow the NN architecture from this [kaggle notebook](https://www.kaggle.com/adityasharma01/simple-nn-tps-nov-21).

In [6]:
# Keras Parameters
BATCH_SIZE = 2048
EPOCHS = 1000
INIT_LR = 2.65e-4
EARLY_STOP = 20
VERBOSE = 0

In [7]:
def build_model():
    model = keras.Sequential(
        [
            layers.InputLayer(input_shape = (len(features),)),
            layers.Dense(108, activation="swish"),
            layers.Dense(64, activation="swish"),
            layers.Dense(32, activation="swish"),
            layers.Dense(1, activation="sigmoid"),
        ]
    )

    model.compile(
        loss = tf.keras.losses.BinaryCrossentropy(), 
        optimizer = tf.keras.optimizers.Adam(
            learning_rate = INIT_LR,
        ),
        metrics=[tf.keras.metrics.AUC()],
    )
    return model 

In [8]:
# Fit parameters for Keras model
baseline_model = KerasClassifier(
    build_model,
    batch_size = BATCH_SIZE,
    epochs = EPOCHS,
    verbose = VERBOSE,
    shuffle = True,
    validation_split = 0.1,
    callbacks = [
        EarlyStopping(
            patience=EARLY_STOP,
            monitor='val_loss',
            restore_best_weights=True,
        ),
        ReduceLROnPlateau(
            monitor='val_loss', 
            factor=0.2, 
            patience=7, 
        )
    ],
)

In [9]:
# Simple NN Baseline
scores, test_preds, oof_preds = train_model(baseline_model)

submission['target'] = test_preds
submission.to_csv('../output/simple_nn_submission.csv', index=False)


Fold 0 (AUC): 0.75379 in 210.52s.
Fold 1 (AUC): 0.74887 in 223.53s.
Fold 2 (AUC): 0.75186 in 281.04s.
Fold 3 (AUC): 0.75429 in 225.41s.
Fold 4 (AUC): 0.75135 in 208.11s.
Fold 5 (AUC): 0.75156 in 236.88s.
Fold 6 (AUC): 0.75555 in 236.65s.
Fold 7 (AUC): 0.75147 in 267.44s.
Fold 8 (AUC): 0.75353 in 200.61s.
Fold 9 (AUC): 0.74782 in 241.68s.

Average AUC: 0.75201
Worst AUC: 0.74782
Training Time: 2331.87s
