# Hyperparameter optimization

Gridsearch various parameters for neural network:
 - Activation function (tried earlier, didn't work so decided to stick with 'ReLU')
 - Dropout parameter
 - Epoch number (to save time, fixed this at epochs=10)
 - Batchsize
 
Snippets of code adapted from https://machinelearningmastery.com/grid-search-hyperparameters-deep-learning-models-python-keras/

In [1]:
import numpy as np
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.wrappers.scikit_learn import KerasClassifier
from keras.constraints import maxnorm
from keras.utils.np_utils import to_categorical

Using TensorFlow backend.


In [2]:
# Import the data from training_data.txt
traindata = np.genfromtxt('training_data.txt', delimiter = ' ', skip_header=1)
X_train = traindata[:, 1:]
Y_train = traindata[:, 0]

# Import the data from test_data.txt
testdata = np.genfromtxt('test_data.txt', delimiter = ' ', skip_header=1)

In [3]:
# Normalize the data and change labels to one-hot
X_train_final = X_train/float(len(X_train[0]))
Y_train_final = to_categorical(Y_train, 2)

## Search over dropout rate

In [4]:
# Function to create model, required for KerasClassifier
def create_model(dropout_rate):
    
    # Create model
    model = Sequential()
    
    model.add(Dense(300, input_shape=(len(X_train[0]),)))
    model.add(Activation('relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(300))
    model.add(Activation('relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(400))
    model.add(Activation('relu'))
    
    model.add(Dense(2))
    model.add(Activation('softmax'))
    
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [5]:
# Fix random seed for reproducibility
seed = 24
np.random.seed(seed)

# Create model
model = KerasClassifier(build_fn=create_model, epochs=10, batch_size=30, verbose=0)

In [8]:
# Define the grid search parameters
#activation = ['relu', 'tanh', 'sigmoid']
dropout_rate = [0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5]

# First search over activation and dropout rate
param_grid = dict(dropout_rate=dropout_rate)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=5, verbose=3)
grid_result = grid.fit(X_train_final, Y_train_final)

Fitting 5 folds for each of 7 candidates, totalling 35 fits
[CV] dropout_rate=0.2 ................................................
[CV] dropout_rate=0.2 ................................................
[CV] dropout_rate=0.2 ................................................
[CV] dropout_rate=0.2 ................................................
[CV] ........... dropout_rate=0.2, score=0.861249992847, total= 3.2min
[CV] dropout_rate=0.2 ................................................
[CV] ........... dropout_rate=0.2, score=0.823749996573, total= 3.2min
[CV] dropout_rate=0.25 ...............................................
[CV] ............ dropout_rate=0.2, score=0.84224999398, total= 3.2min
[CV] dropout_rate=0.25 ...............................................
[CV] ........... dropout_rate=0.2, score=0.847999992073, total= 3.2min
[CV] dropout_rate=0.25 ...............................................
[CV] ........... dropout_rate=0.2, score=0.827499994785, total= 3.5min
[CV] dropout_rate

[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed: 20.8min


[CV] dropout_rate=0.45 ...............................................
[CV] .......... dropout_rate=0.45, score=0.862499994338, total= 3.5min
[CV] ........... dropout_rate=0.4, score=0.844749995172, total= 3.5min
[CV] dropout_rate=0.45 ...............................................
[CV] dropout_rate=0.45 ...............................................
[CV] .......... dropout_rate=0.45, score=0.842499994189, total= 3.5min
[CV] dropout_rate=0.5 ................................................
[CV] .......... dropout_rate=0.45, score=0.844249995798, total= 3.5min
[CV] dropout_rate=0.5 ................................................
[CV] .......... dropout_rate=0.45, score=0.825749994516, total= 3.6min
[CV] dropout_rate=0.5 ................................................
[CV] .......... dropout_rate=0.45, score=0.837499995828, total= 3.5min
[CV] dropout_rate=0.5 ................................................
[CV] ........... dropout_rate=0.5, score=0.865749992579, total= 3.5min
[CV] d

[Parallel(n_jobs=-1)]: Done  35 out of  35 | elapsed: 30.6min finished


In [9]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best: 0.849200 using {'dropout_rate': 0.5}


In [10]:
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

0.840550 (0.013707) with: {'dropout_rate': 0.2}
0.842850 (0.014075) with: {'dropout_rate': 0.25}
0.834700 (0.011268) with: {'dropout_rate': 0.3}
0.845850 (0.010404) with: {'dropout_rate': 0.35}
0.834600 (0.016130) with: {'dropout_rate': 0.4}
0.842500 (0.011905) with: {'dropout_rate': 0.45}
0.849200 (0.008340) with: {'dropout_rate': 0.5}


## Search over batch size

In [4]:
# Now fix the batch size
def create_model():
    
    # Create model
    model = Sequential()
    
    model.add(Dense(300, input_shape=(len(X_train[0]),)))
    model.add(Activation('relu'))
    model.add(Dropout(0.25))
    model.add(Dense(300))
    model.add(Activation('relu'))
    model.add(Dropout(0.25))
    model.add(Dense(400))
    model.add(Activation('relu'))
    
    model.add(Dense(2))
    model.add(Activation('softmax'))
    
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [5]:
# Fix random seed for reproducibility
seed = 10
np.random.seed(seed)

# Create model
model = KerasClassifier(build_fn=create_model, verbose=0, epochs=10)

In [6]:
# Define the grid search parameters
batch_size = [100, 200, 300, 400, 500]
#epochs = [10, 20, 30]

param_grid = dict(batch_size=batch_size) #, epochs=epochs)
grid_epochbatch = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=5, verbose=3)
grid_result_epochbatch = grid_epochbatch.fit(X_train_final, Y_train_final)

Fitting 5 folds for each of 5 candidates, totalling 25 fits
[CV] batch_size=100 ..................................................
[CV] batch_size=100 ..................................................
[CV] batch_size=100 ..................................................
[CV] batch_size=100 ..................................................
[CV] ............. batch_size=100, score=0.855749997497, total= 1.9min
[CV] batch_size=100 ..................................................
[CV] ............. batch_size=100, score=0.853500001132, total= 1.9min
[CV] batch_size=200 ..................................................
[CV] ............. batch_size=100, score=0.847750000656, total= 1.9min
[CV] batch_size=200 ..................................................
[CV] ............. batch_size=100, score=0.829250001907, total= 1.9min
[CV] batch_size=200 ..................................................
[CV] ............. batch_size=200, score=0.835749998689, total= 1.7min
[CV] batch_size=2

[Parallel(n_jobs=-1)]: Done  25 out of  25 | elapsed:  9.2min finished


In [7]:
# summarize results
print("Best: %f using %s" % (grid_result_epochbatch.best_score_, grid_result_epochbatch.best_params_))

Best: 0.848200 using {'batch_size': 100}


In [8]:
means = grid_result_epochbatch.cv_results_['mean_test_score']
stds = grid_result_epochbatch.cv_results_['std_test_score']
params = grid_result_epochbatch.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

0.848200 (0.009872) with: {'batch_size': 100}
0.845900 (0.006893) with: {'batch_size': 200}
0.839300 (0.004017) with: {'batch_size': 300}
0.845200 (0.005875) with: {'batch_size': 400}
0.840550 (0.010032) with: {'batch_size': 500}


# Search over batch size and dropout rate

In [4]:
def create_model(dropout_rate):
    
    # Create model
    model = Sequential()
    
    model.add(Dense(300, input_shape=(len(X_train[0]),)))
    model.add(Activation('relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(300))
    model.add(Activation('relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(400))
    model.add(Activation('relu'))
    
    model.add(Dense(2))
    model.add(Activation('softmax'))
    
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [5]:
# Fix random seed for reproducibility
seed = 23
np.random.seed(seed)

# Create model
model = KerasClassifier(build_fn=create_model, verbose=0, epochs=10)

In [7]:
# Define the grid search parameters
batch_size = [50, 100, 150, 200, 250]
dropout_rate = [0.2, 0.3, 0.4, 0.5, 0.6]

param_grid = dict(batch_size=batch_size, dropout_rate=dropout_rate) #, epochs=epochs)
grid_batchdropout = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=5, verbose=3)
grid_result_batchdropout = grid_batchdropout.fit(X_train_final, Y_train_final)

Fitting 5 folds for each of 25 candidates, totalling 125 fits
[CV] dropout_rate=0.2, batch_size=50 .................................
[CV] dropout_rate=0.2, batch_size=50 .................................
[CV] dropout_rate=0.2, batch_size=50 .................................
[CV] dropout_rate=0.2, batch_size=50 .................................
[CV]  dropout_rate=0.2, batch_size=50, score=0.857999992371, total= 2.9min
[CV] dropout_rate=0.2, batch_size=50 .................................
[CV]  dropout_rate=0.2, batch_size=50, score=0.837249993533, total= 2.9min
[CV] dropout_rate=0.3, batch_size=50 .................................
[CV]  dropout_rate=0.2, batch_size=50, score=0.846999994665, total= 2.9min
[CV] dropout_rate=0.3, batch_size=50 .................................
[CV]  dropout_rate=0.2, batch_size=50, score=0.843749993294, total= 2.9min
[CV] dropout_rate=0.3, batch_size=50 .................................
[CV]  dropout_rate=0.2, batch_size=50, score=0.850499992073, total= 3.

[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed: 104.8min


[CV]  dropout_rate=0.2, batch_size=100, score=0.831499995291, total= 1.9min
[CV] dropout_rate=0.2, batch_size=100 ................................
[CV]  dropout_rate=0.2, batch_size=100, score=0.856000000238, total= 1.9min
[CV] dropout_rate=0.2, batch_size=100 ................................
[CV]  dropout_rate=0.2, batch_size=100, score=0.841249999404, total= 1.9min
[CV] dropout_rate=0.3, batch_size=100 ................................
[CV]  dropout_rate=0.6, batch_size=50, score=0.845999994874, total= 2.1min
[CV] dropout_rate=0.3, batch_size=100 ................................
[CV]  dropout_rate=0.2, batch_size=100, score=0.841749997437, total= 1.7min
[CV] dropout_rate=0.3, batch_size=100 ................................
[CV]  dropout_rate=0.2, batch_size=100, score=0.853750003874, total= 1.7min
[CV] dropout_rate=0.3, batch_size=100 ................................
[CV]  dropout_rate=0.3, batch_size=100, score=0.859499999881, total= 1.7min
[CV] dropout_rate=0.3, batch_size=100 .....

[CV]  dropout_rate=0.3, batch_size=200, score=0.858749997616, total= 1.3min
[CV] dropout_rate=0.3, batch_size=200 ................................
[CV]  dropout_rate=0.3, batch_size=200, score=0.85000000298, total= 1.3min
[CV] dropout_rate=0.4, batch_size=200 ................................
[CV]  dropout_rate=0.3, batch_size=200, score=0.84525000751, total= 1.3min
[CV] dropout_rate=0.4, batch_size=200 ................................
[CV]  dropout_rate=0.3, batch_size=200, score=0.845750001073, total= 1.3min
[CV] dropout_rate=0.4, batch_size=200 ................................
[CV]  dropout_rate=0.3, batch_size=200, score=0.852500003576, total= 1.3min
[CV] dropout_rate=0.4, batch_size=200 ................................
[CV]  dropout_rate=0.4, batch_size=200, score=0.854249998927, total= 1.3min
[CV] dropout_rate=0.4, batch_size=200 ................................
[CV]  dropout_rate=0.4, batch_size=200, score=0.841749998927, total= 1.3min
[CV] dropout_rate=0.5, batch_size=200 ......

[Parallel(n_jobs=-1)]: Done 125 out of 125 | elapsed: 141.5min finished


In [10]:
# summarize results
print("Best: %f using %s" % (grid_result_batchdropout.best_score_, grid_result_batchdropout.best_params_))

Best: 0.851100 using {'dropout_rate': 0.2, 'batch_size': 200}


In [11]:
means = grid_result_batchdropout.cv_results_['mean_test_score']
stds = grid_result_batchdropout.cv_results_['std_test_score']
params = grid_result_batchdropout.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

0.847300 (0.006905) with: {'dropout_rate': 0.2, 'batch_size': 50}
0.842800 (0.011772) with: {'dropout_rate': 0.3, 'batch_size': 50}
0.845350 (0.007690) with: {'dropout_rate': 0.4, 'batch_size': 50}
0.847650 (0.005171) with: {'dropout_rate': 0.5, 'batch_size': 50}
0.845050 (0.003638) with: {'dropout_rate': 0.6, 'batch_size': 50}
0.844850 (0.008992) with: {'dropout_rate': 0.2, 'batch_size': 100}
0.811950 (0.040761) with: {'dropout_rate': 0.3, 'batch_size': 100}
0.843350 (0.010822) with: {'dropout_rate': 0.4, 'batch_size': 100}
0.844600 (0.014763) with: {'dropout_rate': 0.5, 'batch_size': 100}
0.847650 (0.007993) with: {'dropout_rate': 0.6, 'batch_size': 100}
0.846600 (0.008206) with: {'dropout_rate': 0.2, 'batch_size': 150}
0.848400 (0.007468) with: {'dropout_rate': 0.3, 'batch_size': 150}
0.847950 (0.008005) with: {'dropout_rate': 0.4, 'batch_size': 150}
0.831500 (0.017798) with: {'dropout_rate': 0.5, 'batch_size': 150}
0.837400 (0.014148) with: {'dropout_rate': 0.6, 'batch_size': 150}


## Search over dropout rate (again -- final search)

In [4]:
# Function to create model, required for KerasClassifier
def create_model(dropout_rate):
    
    # Create model
    model = Sequential()
    
    model.add(Dense(400, input_shape=(len(X_train[0]),)))
    model.add(Activation('relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(400))
    model.add(Activation('relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(400))
    model.add(Activation('relu'))
    
    model.add(Dense(2))
    model.add(Activation('softmax'))
    
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [5]:
# Fix random seed for reproducibility
seed = 98
np.random.seed(seed)

# Create model
model = KerasClassifier(build_fn=create_model, epochs=10, batch_size=200, verbose=0)

In [6]:
# Define the grid search parameters
#activation = ['relu', 'tanh', 'sigmoid']
dropout_rate = [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5]

# First search over activation and dropout rate
param_grid = dict(dropout_rate=dropout_rate)
grid_final = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=5, verbose=3)
grid_result_final = grid_final.fit(X_train_final, Y_train_final)

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV] dropout_rate=0.1 ................................................
[CV] dropout_rate=0.1 ................................................
[CV] dropout_rate=0.1 ................................................
[CV] dropout_rate=0.1 ................................................
[CV] ........... dropout_rate=0.1, score=0.859999999404, total= 3.0min
[CV] dropout_rate=0.1 ................................................
[CV] ........... dropout_rate=0.1, score=0.839500001073, total= 3.0min
[CV] dropout_rate=0.15 ...............................................
[CV] ............ dropout_rate=0.1, score=0.84675000906, total= 3.0min
[CV] dropout_rate=0.15 ...............................................
[CV] ........... dropout_rate=0.1, score=0.840749999881, total= 3.0min
[CV] dropout_rate=0.15 ...............................................
[CV] ........... dropout_rate=0.1, score=0.853249996901, total= 2.9min
[CV] dropout_rate

[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed: 18.3min


[CV] ........... dropout_rate=0.3, score=0.831999996305, total= 3.0min
[CV] dropout_rate=0.35 ...............................................
[CV] .......... dropout_rate=0.35, score=0.860750004649, total= 3.0min
[CV] dropout_rate=0.35 ...............................................
[CV] .......... dropout_rate=0.35, score=0.841500005126, total= 3.0min
[CV] dropout_rate=0.4 ................................................
[CV] .......... dropout_rate=0.35, score=0.833000001311, total= 3.0min
[CV] dropout_rate=0.4 ................................................
[CV] .......... dropout_rate=0.35, score=0.839250001311, total= 3.0min
[CV] dropout_rate=0.4 ................................................
[CV] .......... dropout_rate=0.35, score=0.853249999881, total= 3.0min
[CV] dropout_rate=0.4 ................................................
[CV] ........... dropout_rate=0.4, score=0.851250001788, total= 3.0min
[CV] dropout_rate=0.4 ................................................
[CV] .

[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed: 34.3min finished


In [9]:
# summarize results
print("Best: %f using %s" % (grid_result_final.best_score_, grid_result_final.best_params_))

Best: 0.849200 using {'dropout_rate': 0.5}


In [10]:
means = grid_result_final.cv_results_['mean_test_score']
stds = grid_result_final.cv_results_['std_test_score']
params = grid_result_final.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

0.840550 (0.013707) with: {'dropout_rate': 0.2}
0.842850 (0.014075) with: {'dropout_rate': 0.25}
0.834700 (0.011268) with: {'dropout_rate': 0.3}
0.845850 (0.010404) with: {'dropout_rate': 0.35}
0.834600 (0.016130) with: {'dropout_rate': 0.4}
0.842500 (0.011905) with: {'dropout_rate': 0.45}
0.849200 (0.008340) with: {'dropout_rate': 0.5}


## Search over dropouts for new architecture 

In [4]:
# Function to create model, required for KerasClassifier
def create_model(dropout_rate):
    
    # Create model
    model = Sequential()
    
    model.add(Dense(600, input_shape=(len(X_train[0]),)))
    model.add(Activation('relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(400))
    model.add(Activation('relu'))
    
    model.add(Dense(2))
    model.add(Activation('softmax'))
    
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [5]:
# Fix random seed for reproducibility
seed = 74
np.random.seed(seed)

# Create model
model = KerasClassifier(build_fn=create_model, epochs=10, batch_size=200, verbose=0)

In [6]:
# Define the grid search parameters
#activation = ['relu', 'tanh', 'sigmoid']
dropout_rate = [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5]

# First search over activation and dropout rate
param_grid = dict(dropout_rate=dropout_rate)
grid_last = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=5, verbose=3)
grid_result_last = grid_last.fit(X_train_final, Y_train_final)

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV] dropout_rate=0.1 ................................................
[CV] dropout_rate=0.1 ................................................
[CV] dropout_rate=0.1 ................................................
[CV] dropout_rate=0.1 ................................................
[CV] ........... dropout_rate=0.1, score=0.856749996543, total= 2.1min
[CV] dropout_rate=0.1 ................................................
[CV] ........... dropout_rate=0.1, score=0.836499997973, total= 2.1min
[CV] dropout_rate=0.15 ...............................................
[CV] ........... dropout_rate=0.1, score=0.842750000954, total= 2.1min
[CV] dropout_rate=0.15 ...............................................
[CV] ........... dropout_rate=0.1, score=0.841749998927, total= 2.1min
[CV] dropout_rate=0.15 ...............................................
[CV] ........... dropout_rate=0.1, score=0.854000002146, total= 2.1min
[CV] dropout_rate

[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed: 16.7min


[CV] dropout_rate=0.35 ...............................................
[CV] ........... dropout_rate=0.3, score=0.854249995947, total= 2.7min
[CV] dropout_rate=0.35 ...............................................
[CV] .......... dropout_rate=0.35, score=0.820000007749, total= 2.7min
[CV] dropout_rate=0.35 ...............................................
[CV] .......... dropout_rate=0.35, score=0.854250001907, total= 2.7min
[CV] dropout_rate=0.4 ................................................
[CV] .......... dropout_rate=0.35, score=0.834749999642, total= 2.7min
[CV] dropout_rate=0.4 ................................................
[CV] .......... dropout_rate=0.35, score=0.816000005603, total= 2.8min
[CV] dropout_rate=0.4 ................................................
[CV] .......... dropout_rate=0.35, score=0.793500000238, total= 2.7min
[CV] dropout_rate=0.4 ................................................
[CV] ........... dropout_rate=0.4, score=0.859500005841, total= 2.7min
[CV] .

[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed: 31.5min finished


In [7]:
# summarize results
print("Best: %f using %s" % (grid_result_last.best_score_, grid_result_last.best_params_))

Best: 0.850350 using {'dropout_rate': 0.2}


In [8]:
means = grid_result_last.cv_results_['mean_test_score']
stds = grid_result_last.cv_results_['std_test_score']
params = grid_result_last.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

0.846350 (0.007718) with: {'dropout_rate': 0.1}
0.849350 (0.005469) with: {'dropout_rate': 0.15}
0.850350 (0.007808) with: {'dropout_rate': 0.2}
0.828700 (0.016131) with: {'dropout_rate': 0.25}
0.849900 (0.007937) with: {'dropout_rate': 0.3}
0.823700 (0.020201) with: {'dropout_rate': 0.35}
0.842900 (0.012441) with: {'dropout_rate': 0.4}
0.843700 (0.007881) with: {'dropout_rate': 0.45}
0.846000 (0.006751) with: {'dropout_rate': 0.5}
