# Find the optimal hyperparameters for the multi layer perceptron model

In [1]:
#install libraries (if not already in environment)
!pip install --upgrade scikit-learn
!pip install pandas
!pip install pyyaml h5py 
!pip install seaborn


Collecting scikit-learn
  Downloading scikit_learn-0.24.1-cp36-cp36m-manylinux2010_x86_64.whl (22.2 MB)
[K     |████████████████████████████████| 22.2 MB 982 kB/s eta 0:00:01
[?25hCollecting threadpoolctl>=2.0.0
  Downloading threadpoolctl-2.1.0-py3-none-any.whl (12 kB)
Collecting joblib>=0.11
  Downloading joblib-1.0.1-py3-none-any.whl (303 kB)
[K     |████████████████████████████████| 303 kB 1.6 MB/s eta 0:00:01
Collecting scipy>=0.19.1
  Downloading scipy-1.5.4-cp36-cp36m-manylinux1_x86_64.whl (25.9 MB)
[K     |████████████████████████████████| 25.9 MB 1.2 MB/s eta 0:00:01
[?25hInstalling collected packages: threadpoolctl, joblib, scipy, scikit-learn
Successfully installed joblib-1.0.1 scikit-learn-0.24.1 scipy-1.5.4 threadpoolctl-2.1.0
You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Collecting pandas
  Downloading pandas-1.1.5-cp36-cp36m-manylinux1_x86_64.whl (9.5 MB)
[K     |████████████████████████████████| 9.5 MB 831 kB/s 

In [2]:
# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.optimizers import Adam, SGD, Adamax

import sklearn.metrics as metrics
from sklearn.metrics import auc, plot_roc_curve, roc_curve, mean_squared_error, accuracy_score, roc_auc_score, classification_report, confusion_matrix, log_loss
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
import sklearn

#add path to the functions folder
import sys
sys.path.append('../onc_functions')

# load custom function for building the NN
from build_mlp import build_mlp 

# other libraries
import numpy as np
import pandas as pd

import pickle

#plotting
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# Make numpy values easier to read.
np.set_printoptions(precision=3, suppress=True)

print('tensorflow-' + tf.__version__)
print('python-' + sys.version)
print('sklearn-' + sklearn.__version__)

tensorflow-2.4.1
python-3.6.9 (default, Oct  8 2020, 12:12:24) 
[GCC 8.4.0]
sklearn-0.24.1


In [4]:
with open('numeric_columns.pickle', 'rb') as f:  
    nu_cols = pickle.load(f)

# Create Model Layers

- trying different weights (to handle the class imbalance) requires that you rerun the grid search with each weight

In [8]:

def mlp_cv(selected_class_weight, weight_name, imputation):
    # fix random seed for reproducibility
    seed = 78
    np.random.seed(seed)

    #import an imputed dataset
    with open('complete' + str(imputation) + '.pickle', 'rb') as f:
        dataset = pickle.load(f)

    #keep only the training data subsets
    X_train =  dataset[dataset.subset <= 6].copy().sort_values(by = 'usrds_id')
    del dataset

    y_train = np.array(X_train.pop('died_in_90'))

    #scale the numeric columns
    scaler = StandardScaler()
    X_train[nu_cols] = scaler.fit_transform(X_train[nu_cols])
    X_train = np.array(X_train.drop(columns=['subset','usrds_id','impnum'])) 
    print('scaled shape train ' + str(X_train.shape))


    # Create a MirroredStrategy (can take advantage of a GPU if you have some, otherwise it just uses a single threaded approach)
    strategy = tf.distribute.MirroredStrategy()
    print("Number of devices: {}".format(strategy.num_replicas_in_sync))

    # Open a strategy scope.
    with strategy.scope():
        # Everything that creates variables should be under the strategy scope.
        # In general this is only model construction & `compile()`.
        # Wrap Keras model so it can be used by scikit-learn

        # grid search epochs, batch size and optimizer
        neurons = [16,32, 64, 128]
        layers = [1, 2]
        kernel_regularizer = ['l2']
        dropout_rate = [ 0.1, 0.2, 0.4, 0.5, 0.6]
        learn_rate = [.001, .0001, .0002]
        activation = ['relu', 'sigmoid', 'tanh']
        optimizer = ['Adam']
        epochs = [10, 20] # 1mill/256=4000 steps for one pass thru dataset
        batches = [512, 256]
        output_bias = [None]

        params = dict(neurons=neurons, 
                          layers=layers,
                          kernel_regularizer=kernel_regularizer, 
                          dropout_rate=dropout_rate,
                          learn_rate=learn_rate, 
                          activation=activation,
                          optimizer = optimizer,
                          epochs=epochs, 
                          batch_size=batches, 
                          output_bias=output_bias)
            
        # early stopping for the epochs based on the auc under the precision recall curve
        early_stopping = tf.keras.callbacks.EarlyStopping(
                            monitor='auc_pr' ,
                            verbose=1,
                            patience=10,
                            mode='max',
                            restore_best_weights=True)
        
        # use the Keras wrapper for scikitlearn and our custom build_mlp function imported above
        weighted_model_skl = KerasClassifier(build_fn=build_mlp, 
                                         verbose=0)

        # evaluate using 5-fold cross validation
        grid = GridSearchCV(
                weighted_model_skl,
                param_grid=params, 
                cv=2,
                scoring='average_precision',
                return_train_score=True,
                n_jobs=-1
            )


    print('fit model')
    grid_result = grid.fit(
                X_train, 
                y_train, 
                class_weight=selected_class_weight,
                callbacks=[early_stopping]
            )

    # summarize results
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']

    for mean, stdev, param in zip(means, stds, params):
                    print("%f (%f) with: %r" % (mean, stdev, param))
    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    
    #save results
    with open('./results/2021_grid_best_params_imp_' + str(imputation) + '_weight_' + str(weight_name) + '.pickle', 'wb') as f:  
                    pickle.dump(grid_result.best_params_, f)

    with open('./results/2021_grid_best_auc_imp_' + str(imputation) + '_weight_' + str(weight_name)  + '.pickle','wb') as f:  
                    pickle.dump(grid_result.best_score_, f)

    with open('./results/2021_grid_cv_results_imp_' + str(imputation) + '_weight_' + str(weight_name) + '.pickle','wb') as f:  
                    pickle.dump(grid_result.cv_results_, f)

In [12]:
total = 1150195
positive_class_count =  86083      #(7.48% of total)
neg_class_count = 1064112     #(92.52% of total)
# Scaling by total/2 helps keep the loss to a similar magnitude.
# The sum of the weights of all examples stays the same.
weight_for_0 = (1 / neg_class_count)*(total)/2.0 
weight_for_1 = (1 / positive_class_count)*(total)/2.0

class_weight_m = {0: weight_for_0, 1: weight_for_1}
class_weight_5 = {0: 1, 1: 5}
class_weight_10 = {0: 1, 1: 10}
class_weight_20 = {0: 1, 1: 20}

#run the cross validation with the amount of weighting for each class 
mlp_cv(class_weight_20, weight_name=20, imputation=5)

scaled shape train (804890, 294)
INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)
Number of devices: 1
fit model n/
Best: 0.235455 using {'activation': 'relu', 'batch_size': 256, 'dropout_rate': 0.2, 'epochs': 15, 'kernel_regularizer': 'l2', 'layers': 2, 'learn_rate': 0.0002, 'neurons': 32, 'optimizer': 'Adam', 'output_bias': None}
0.234318 (0.009525) with: {'activation': 'relu', 'batch_size': 256, 'dropout_rate': 0.2, 'epochs': 10, 'kernel_regularizer': 'l2', 'layers': 2, 'learn_rate': 0.0002, 'neurons': 16, 'optimizer': 'Adam', 'output_bias': None}
0.233863 (0.008253) with: {'activation': 'relu', 'batch_size': 256, 'dropout_rate': 0.2, 'epochs': 10, 'kernel_regularizer': 'l2', 'layers': 2, 'learn_rate': 0.0002, 'neurons': 32, 'optimizer': 'Adam', 'output_bias': None}
0.235408 (0.007534) with: {'activation': 'relu', 'batch_size': 256, 'dropout_rate': 0.2, 'epochs': 15, 'kernel_regularizer': 'l2', 'layers': 2, 'learn_rate': 0.0002, '