In [68]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, train_test_split
import h5py

test_size = 0.25
N = int(2500 * test_size)
fid = h5py.File(r"/Users/nathanbrockbank/Downloads/Project_Packet/dataset_patchcamelyon.h5", 'r') 
X_test, Y_test = fid['X'][:N], fid['Y'][:N]
X, Y = fid['X'][N:], fid['Y'][N:]
X = X / 255.
X_test = X_test / 255.
fid.close()

print(X.shape, X_test.shape, Y.shape, Y_test.shape)



(1875, 96, 96, 3) (625, 96, 96, 3) (1875, 1, 1, 1) (625, 1, 1, 1)


In [70]:
X_gs = tf.image.rgb_to_grayscale(X).shape
X_gs_test = tf.image.rgb_to_grayscale(X_test).shape

In [71]:
def create_cnn(color=True, d_units=[128, 64], bn=True, dr=0.25, 
               pool_fcn='max', opt_fcn='RMSprop', lr = 0.01, act_fcn='relu', 
               filters=[64, 128, 256], kernels=[8, 3], stride=2, pad_mthd='same'):
    
    # determine input (RBG/GS)
    if color:
        input_shape = [96, 96, 3]
    else:
        input_shape = [96, 96, 1]
    
    # ensure there are filter and kernel sizes set for each convolutional layer
    c_depth = max(len(filters), len(kernel_size))
    if len(filters)>len(kernel_size):
        while len(filters)>len(kernel_size):
            kernel_size.append(kernel_size[len(kernel_size)-1])
    elif len(filters)<len(kernel_size):
        while len(filters)<len(kernel_size):
            filters.append(filters[len(filters)-1])
    
    # create cnn
    cnn_clf = keras.models.Sequential()
    
    # add convolutional layers
    if pool_fcn == "max":
        for i in range(c_depth):
            cnn_clf.add(keras.layers.Conv2D(filters=filters[i], kernel_size=kernel_size[i], activation=act_fcn, padding=pad_mthd, input_shape=input_shape))
            cnn_clf.add(keras.layers.MaxPooling2D(2))
    elif pool_fcn == "average":
        for i in range(c_depth):
            cnn_clf.add(keras.layers.Conv2D(filters=filters[i], kernel_size=kernel_size[i], activation=act_fcn, padding=pad_mthd, input_shape=input_shape))
            cnn_clf.add(keras.layers.AveragePooling2D(2))
    cnn_clf.add(keras.layers.Flatten())
    
    # add dense layers
    if bn:
        for i in d_units:
            cnn_clf.add(keras.layers.BatchNormalization())
            cnn_clf.add(keras.layers.Dense(i, activation=act_fcn, kernel_initializer='lecun_normal'))
            cnn_clf.add(keras.layers.Dropout(dr))
    else:
        for i in d_units:
            cnn_clf.add(keras.layers.Dense(i, activation=act_fcn, kernel_initializer='lecun_normal'))
            cnn_clf.add(keras.layers.Dropout(dr))

    # define optimizer fcn
    if opt_fcn == 'RMSprop':
        opt = keras.optimizers.RMSprop(lr=lr)
    elif opt_fcn == 'Adam':
        opt = keras.optimizers.Adam(lr=lr)
    elif opt_fcn == 'SGD':
        opt = keras.optimizers.SGD(lr=lr, nesterov=True)
    
    # add output layer and compile
    cnn_clf.add(keras.layers.Dense(2, activation='sigmoid'))
    cnn_clf.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
    
    return cnn_clf


In [72]:

cnn_wrap = keras.wrappers.scikit_learn.KerasClassifier(create_cnn)

cp_cb_cnn = keras.callbacks.ModelCheckpoint("model_trained_cnn.h5")
es_cb_cnn = keras.callbacks.EarlyStopping(monitor="accuracy", patience=5, restore_best_weights=True)

param_grid_c = {
    'color': [True],
    'd_units': [(128, 64), (64, 32)],
    'bn': [True, False],
    'dr': [0, 0.25],
    'pool_fcn': ['max', 'average'],
    'opt_fcn': ['RMSprop', 'Adam', 'SGD'],
    'lr': [0.01, 0.0001],
    'act_fcn': ['relu', 'selu'],
    'pad_mthd': ['valid']
}

param_grid_gs = {
    'color': [False],
    'd_units': [(128, 64), (64, 32)],
    'bn': [True, False],
    'dr': [0, 0.25],
    'pool_fcn': ['max', 'average'],
    'opt_fcn': ['RMSprop', 'Adam', 'SGD'],
    'lr': [0.01, 0.0001],
    'act_fcn': ['relu', 'selu'],
    'pad_mthd': ['valid']
}

search_cv_c = GridSearchCV(cnn_wrap, param_grid_c, cv=3, verbose=2, n_jobs=-1)
search_cv_c.fit(X, Y, epochs=5, verbose=2, callbacks=[es_cb_cnn, cp_cb_cnn])
print("color: ", search_cv_c.best_params_)
print(pd.DataFrame(search_cv_c.cv_results_))

search_cv_gs = GridSearchCV(cnn_wrap, param_grid_gs, cv=3, verbose=2, n_jobs=-1)
search_cv_gs.fit(X_gs, Y, epochs=5, verbose=2, callbacks=[es_cb_cnn, cp_cb_cnn])
print("grayscale: ", search_cv_gs.best_params_)
print(pd.DataFrame(search_cv_gs.cv_results_))

Fitting 3 folds for each of 192 candidates, totalling 576 fits


ValueError: Invalid shape for y: (1875, 1, 1, 1)

In [None]:
param_grid_c = {
    'color': [True],
    'd_units': [(128, 64), (64, 32)],
    'bn': [True, False],
    'dr': [0, 0.25, 0.5],
    'pool_fcn': ['max', 'average'],
    'opt_fcn': ['RMSprop', 'Adam', 'SGD'],
    'lr': np.logspace(-4, -2,num=3).tolist(),
    'act_fcn': ['relu', 'selu'],
    'filters': [(64, 128, 256), (64, 128), (128, 256)],
    'kernels': [(8, 3), (6, 4)],
    'stride': [2],
    'pad_mthd': ['valid']
}

param_grid_gs = {
    'color': [False],
    'd_units': [(128, 64), (64, 32)],
    'bn': [True, False],
    'dr': [0, 0.25, 0.5],
    'pool_fcn': ['max', 'average'],
    'opt_fcn': ['RMSprop', 'Adam', 'SGD'],
    'lr': np.logspace(-4, -2,num=3).tolist(),
    'act_fcn': ['relu', 'selu'],
    'filters': [(64, 128, 256), (64, 128), (128, 256)],
    'kernels': [(8, 3), (6, 4)],
    'stride': [2],
    'pad_mthd': ['valid']
}