## Hyperparameter Tuning for CNN

### Imports

In [1]:
import csv
import math

import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np

from keras import applications
from keras.callbacks import ModelCheckpoint
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential, Model
from keras.layers import Dropout, Flatten, Dense, Input
from keras.initializers import glorot_uniform
#from keras.applications.vgg16 import preprocess_input
#from keras.applications.resnet50 import preprocess_input
from keras.applications.mobilenet import preprocess_input


from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

Using TensorFlow backend.


### Load image paths, prices, and train/test split

In [2]:
# read the CSV into memory
prices = []
image_paths = []

data_path = "../datasets/bikes_im/"
with open("../datasets/bikes_filtered.csv") as file:
    reader = csv.reader(file)
    i = -1
    for row in reader:
        i += 1
        index = row[0]
        name = row[1]
        msrp = row[2]
        
        image_path = data_path + index + '.jpg'
        image_paths.append(image_path)
        prices.append(int(msrp))

train_indices = np.load("bikes_train_indices.npy")
test_indices = np.load("bikes_test_indices.npy")
print(train_indices.shape)
print(test_indices.shape)

(19658,)
(2185,)


### Initialize Image Generator

Due to the size of our dataset (>20,000 images), we cannot read all images into memory. Thus, we write our own image generator, which is a Python generator that reads images a minibatch at a time, preprocessing them and returning the input data and price labels as input to the neural network.

In [3]:
def image_generator(indices, batch_size):

    num_batches = int(len(indices) / batch_size)
    
    while True:
        for batch_i in range(num_batches):
            if batch_i == num_batches - 1:
                # special case: return as many as possible
                start_i = batch_i * batch_size
                batch_indices = indices[start_i:]
                
                X = np.zeros((len(batch_indices), 224, 224, 3))
                Y = np.zeros((len(batch_indices), 1))
                #Y = np.zeros((len(batch_indices), 1, 1, 1))
            
            else:
                start_i = batch_i * batch_size
                end_i = start_i + batch_size

                batch_indices = indices[start_i:end_i]

                X = np.zeros((batch_size, 224, 224, 3))
                Y = np.zeros((batch_size, 1))
                #Y = np.zeros((batch_size, 1, 1, 1))
            
            for i, index in enumerate(batch_indices):
                img = image.load_img(image_paths[index], target_size=(224, 224))
                X[i, :, :, :] = image.img_to_array(img)                
                Y[i] = prices[index]
            
            # use resnet50 preprocessing
            X = preprocess_input(X)
            
            yield (X, Y)

### Hyperparameters

We tune hyperparameters using grid search and random search, modifying one hyperparameter at a time while keeping the others constant.

In [4]:
# Hyperparameters

num_settings = 1

hp_dropout = [0.2] * num_settings

#RMSprop
hp_lr = [0.001] * num_settings
hp_rho = [0.9] * num_settings
hp_epsilon = [1e-07] * num_settings
hp_decay = [0.0] * num_settings

# Number of hidden units
hp_hidden = [256] * num_settings

# Minibatch size
hp_mbsize = [64] * num_settings
#hp_mbsize = [4] * num_settings

setting = 0

num_epochs = 20

In [5]:
input_tensor = Input(shape=(224,224,3))
model = applications.MobileNet(include_top=False, weights='imagenet', input_tensor=input_tensor, input_shape=(224,224,3))
model.summary()

#model = applications.mobilenet(weights='imagenet', include_top=False, input_tensor = input_tensor)


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
conv1 (Conv2D)               (None, 112, 112, 32)      864       
_________________________________________________________________
conv1_bn (BatchNormalization (None, 112, 112, 32)      128       
_________________________________________________________________
conv1_relu (Activation)      (None, 112, 112, 32)      0         
_________________________________________________________________
conv_dw_1 (DepthwiseConv2D)  (None, 112, 112, 32)      288       
_________________________________________________________________
conv_dw_1_bn (BatchNormaliza (None, 112, 112, 32)      128       
_________________________________________________________________
conv_dw_1_relu (Activation)  (None, 112, 112, 32)      0         
__________

In [6]:
from keras.layers.convolutional import Conv2D
from keras.layers.pooling import MaxPooling2D

#model is basic resnet
train_losses = np.zeros(num_settings)
dev_losses = np.zeros(num_settings)

#input_tensor = Input(shape=(224,224,3))
#model = applications.ResNet50(weights='imagenet', include_top=True, input_tensor = input_tensor)
# model.summary()

#model 2 pops off 20 layers
new_input = model.input
hidden_layer = model.layers[-10].output
model2 = Model(new_input, hidden_layer)

# model2.summary()

#top_model = Sequential()
#top_model.add(Flatten(input_shape=(model2.output_shape[1:])))
#top_model.add(Dropout(hp_dropout[setting]))
#top_model.add(Dense(hp_hidden[setting], activation='relu', kernel_initializer='glorot_uniform'))
#top_model.add(Dense(1, activation='linear', name='output', kernel_initializer='glorot_uniform'))

#model 3 has added layers
#new_conv = Conv2D(filters=64, kernel_size=(100, 100), name='new_conv', padding='same')(model2.output)
new_conv = Conv2D(filters=64, kernel_size=(4, 4), name='new_conv', padding='same')(model2.output)


# layer1 = model2.layers[-1](new_conv)

new_pool = MaxPooling2D(strides=(2,2), name='new_pool')
layer2 = new_pool(new_conv)

flattened = Flatten(input_shape=layer2.shape)(layer2)

dropout = Dropout(hp_dropout[setting], name='new_dropout')
layer3 = dropout(flattened)

dense1 = Dense(hp_hidden[setting], activation='relu', kernel_initializer='glorot_uniform', name='new_dense1')
layer4 = dense1(layer3)

dense2 = Dense(1, activation='linear', name='output', kernel_initializer='glorot_uniform')
layer5 = dense2(layer4)

model3 = Model(inputs= model2.input, outputs = layer5)

# to non-trainable (weights will not be updated)
for layer in model3.layers[:-6]:
    #print(layer)
    layer.trainable = False
    pass

model3.summary()



_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
conv1 (Conv2D)               (None, 112, 112, 32)      864       
_________________________________________________________________
conv1_bn (BatchNormalization (None, 112, 112, 32)      128       
_________________________________________________________________
conv1_relu (Activation)      (None, 112, 112, 32)      0         
_________________________________________________________________
conv_dw_1 (DepthwiseConv2D)  (None, 112, 112, 32)      288       
_________________________________________________________________
conv_dw_1_bn (BatchNormaliza (None, 112, 112, 32)      128       
_________________________________________________________________
conv_dw_1_relu (Activation)  (None, 112, 112, 32)      0         
__________

In [9]:
model3.compile(loss='mean_squared_error',
                      optimizer=optimizers.RMSprop(
                              lr=hp_lr[setting], 
                              rho=hp_rho[setting], 
                              epsilon=hp_epsilon[setting], 
                              decay=hp_decay[setting]))

#checkpoint_path = 'bikes-cnn-resnet.hdf5'
    
# keep a checkpoint
#checkpoint = ModelCheckpoint(checkpoint_path,
#                            monitor='val_loss',
#                            save_best_only=True,
#                            mode='min')



minibatch_size = 64 #hp_mbsize[setting]

train_steps = math.ceil(len(train_indices) / minibatch_size)
test_steps = math.ceil(len(test_indices) / minibatch_size)

# fine-tune the model
history = model3.fit_generator(
    image_generator(train_indices, minibatch_size),
    steps_per_epoch=train_steps,
    epochs=num_epochs,
    validation_data=image_generator(test_indices, minibatch_size),
    nb_val_samples=test_steps)
    #callbacks=[checkpoint])



Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


### Evaluation Metrics

In [11]:
# get predictions on each batch yielded the validation generator.

validation_generator = image_generator(test_indices, minibatch_size)

predicted = []
actual = []

for step in range(test_steps):
    X, Y = next(validation_generator)
    curr_pred = model3.predict(X)
    for entry in curr_pred:
        predicted.append(entry)
    for entry in Y:
        actual.append(entry)

In [12]:
predicted = np.array(predicted)
actual = np.array(actual)

MSE = mean_squared_error(predicted, actual)
MAE = mean_absolute_error(predicted, actual)
R2 = r2_score(actual, predicted)

print((MSE, MAE, R2))

(584020.43340827862, 429.4344282816017, 0.82321522729213281)
