Notebook Sections

1. [Data Preprocessing](#Data-Preprocessing)
2. [Compare Results of Different Activation Functions](#Compare-Results-of-Different-Activation-Functions)
3. [Tuning Other Parameters (number of layers, number of neurons, optimizer)](#Tuning-Other-Parameters-(number-of-layers,-number-of-neurons,-optimizer)
4. [Build and Train the Deep Learning Model Using Best Parameters](#Build-and-Train-the-Deep-Learning-Model-Using-Best-Parameters)
5. [Best Model Prediction and Submission](#Prediction-and-Submission)

In [None]:
import numpy as np
import pandas as pd
# Setup plotting
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')
# Set Matplotlib defaults
plt.rc('figure', autolayout=True)
plt.rc('axes', labelweight='bold', labelsize='large',
       titleweight='bold', titlesize=18, titlepad=10)


from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer

from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import plot_model, to_categorical
from keras.wrappers.scikit_learn import KerasClassifier
#from scikeras.wrappers import KerasClassifier

# Data Preprocessing

In [None]:
df = pd.read_csv('../input/tabular-playground-series-feb-2022/train.csv', index_col='row_id')
test_df = pd.read_csv('../input/tabular-playground-series-feb-2022/test.csv', index_col='row_id')

X = df.copy()
y = X.pop('target')

# encode class values as integers
le = LabelEncoder()
y = le.fit_transform(y)
le.classes_

In [None]:
# stratify - make sure classes are evenly represented across splits
X_train, X_valid, y_train, y_valid = \
    train_test_split(X, y, stratify=y, train_size=0.75)

input_shape = [X_train.shape[1]]

# Compare Results of Different Activation Functions

In [None]:
def get_model(act_function):
    model=Sequential()
    model.add(Dense(512, input_shape=input_shape, activation=act_function))
    model.add(Dense(512, activation=act_function))
    model.add(Dense(10, activation='softmax'))
    model.compile(optimizer='adam', 
              loss='sparse_categorical_crossentropy', 
              metrics=['accuracy'])
    return model

In [None]:
# Activation functions to try out
activations = ['sigmoid', 'tanh', 'relu', 'leaky_relu', 'PReLU', 'selu', 'swish']

# Dictionary to store results
activation_results = {}
for func in activations:
    model = get_model(act_function=func)
    history = model.fit(X_train, y_train, 
                     validation_data=(X_valid, y_valid),
                     batch_size=2048,
                     epochs=50, 
                     verbose=0)
    activation_results[func] = history

In [None]:
val_loss_per_func = {k:v.history['val_loss'] for k,v in activation_results.items()}
val_loss_curves = pd.DataFrame(val_loss_per_func)
val_loss_curves.plot(title='Loss per Activation function')

val_acc_per_func = {k:v.history['val_accuracy'] for k,v in activation_results.items()}
val_acc_curves = pd.DataFrame(val_acc_per_func)
val_acc_curves.plot(title='Accuracy per Activation function')

In [None]:
for func in activations:
    print("Best Validation Loss for {}: {}".format(func, min(activation_results[func].history['val_loss'])) + \
         "\nBest Validation Accuracy for {}: {}".format(func, max(activation_results[func].history['val_accuracy'])) )
    print("-"*50)

The best activation functions are relu and PReLU

# Tuning Other Parameters (number of layers, number of neurons, optimizer)

In [None]:
def create_model(nl=2, nn=256, optimizer='adam', activation='relu'):
    model=Sequential()
    model.add(Dense(256, input_shape=input_shape, activation=activation))
    # add as many hidden laters as specified in nl
    for i in range(nl):
        # layers have nn neurons
        model.add(Dense(nn, activation=activation))
        Dropout(0.3),
        BatchNormalization(),
    model.add(Dense(10, activation='softmax'))
    model.compile(optimizer=optimizer, 
                  loss='sparse_categorical_crossentropy', 
                  metrics=['accuracy'])
    return model

In [None]:
%%time
np.random.seed(123)

# create a sklearn estimator
model = KerasClassifier(build_fn=create_model,  epochs=30, batch_size=500, nn=512, nl=2, activation='relu')

# testing parameters
params = dict(nl=[2, 3, 4, 5, 6], 
              nn=[128, 256, 512, 1024, 2048], 
              optimizer=['adam', 'sgd'],
              activation=['relu', 'PReLU'])

# Create a random search cv object and fit it to the train data
random_search = RandomizedSearchCV(model, param_distributions=params, scoring='accuracy', cv=2, n_iter=20)
random_search_results = random_search.fit(X_train, y_train, verbose=0)

# print best results
print('-'*70 + '\n' +'-'*70 + "\nBest: {} using {}".format(random_search_results.best_score_, random_search_results.best_params_))
print('-'*70 + '\n' +'-'*70)

# Build and Train the Deep Learning Model Using Best Parameters

In [None]:
best_model = create_model(nl=2, nn=2048, optimizer='adam', activation='relu')

In [None]:
# train the model
early_stopping = EarlyStopping(
    patience=10, 
    min_delta=0.001, 
    restore_best_weights=True)

history = best_model.fit(X_train, y_train, 
                         validation_data=(X_valid, y_valid), 
                         epochs=200, 
                         batch_size=500,
                         callbacks=[early_stopping], 
                         verbose=0)

In [None]:
history_df = pd.DataFrame(history.history)

print(("Best Validation Loss: {:0.4f}" + \
      "\nBest Validation Accuracy: {:0.4f}")\
     .format(history_df['val_loss'].min(),
            history_df['val_accuracy'].max()))

history_df.loc[:, ['loss', 'val_loss']].plot(title='Cross-entropy')
history_df.loc[:, ['accuracy', 'val_accuracy']].plot(title='Accuracy')

# Best Model Prediction and Submission

In [None]:
y_pred = best_model.predict(test_df)
y_pred = np.argmax(y_pred, axis = 1)
y_pred = le.inverse_transform(y_pred)
y_pred

In [None]:
submission = pd.read_csv('../input/tabular-playground-series-feb-2022/sample_submission.csv')
submission['target'] = y_pred
submission.to_csv("submission.csv", index=False)
submission