In [1]:
import csv
import math

import matplotlib.pyplot as plt
import numpy as np

from keras import applications
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential, Model
from keras.layers import Dropout, Flatten, Dense, Input
from keras.initializers import glorot_uniform
from keras.applications.vgg16 import preprocess_input


Using TensorFlow backend.


In [2]:
# read the CSV into memory
prices = []
image_paths = []

data_path = "../datasets/bikes_im/"
with open("../datasets/bikes_filtered.csv") as file:
    reader = csv.reader(file)
    i = -1
    for row in reader:
        i += 1
        index = row[0]
        name = row[1]
        msrp = row[2]
        
        image_path = data_path + index + '.jpg'
        image_paths.append(image_path)
        prices.append(int(msrp))

train_indices = np.load("bikes_train_indices.npy")
test_indices = np.load("bikes_test_indices.npy")
print(train_indices.shape)
print(test_indices.shape)

(19658,)
(2185,)


In [3]:
# Take a data point and run it through the neural network
# Return the predicted value and calculate the MSE
def evaluate(model, index):
    msrp = prices[index]
    path = image_paths[index]
    img = image.load_img(path, target_size=(224, 224))
    data = np.expand_dims(image.img_to_array(img), axis=0)
    # TODO: the input should be preprocessed before feeding it in
    prediction = model.predict(data, msrp)
    
    #print("Bike index: " + str(index))
    #print("Actual price: " + str(msrp))
    #print("Predicted price: " + str(prediction))
    mse = (prediction-msrp)**2
    return mse

def image_generator(indices, batch_size):

    num_batches = int(len(indices) / batch_size)
    
    while True:
        for batch_i in range(num_batches):
            if batch_i == num_batches - 1:
                # special case: return as many as possible
                start_i = batch_i * batch_size
                batch_indices = indices[start_i:]
                
                X = np.zeros((len(batch_indices), 224, 224, 3))
                Y = np.zeros((len(batch_indices), 1))
            
            else:
                start_i = batch_i * batch_size
                end_i = start_i + batch_size

                batch_indices = indices[start_i:end_i]

                X = np.zeros((batch_size, 224, 224, 3))
                Y = np.zeros((batch_size, 1))
            
            for i, index in enumerate(batch_indices):
                img = image.load_img(image_paths[index], target_size=(224, 224))
                X[i, :, :, :] = image.img_to_array(img)                
                Y[i] = prices[index]
            
            # use vgg16 preprocessing
            X = preprocess_input(X)
            
            yield (X, Y)

In [6]:
# Hyperparameters

num_settings = 7

hp_dropout = [0.2] * num_settings

#RMSprop
hp_lr = [0.0001, 0.001, 0.01, 0.1, 1, 10, 100]
hp_rho = [0.9] * num_settings
hp_epsilon = [1e-07] * num_settings
hp_decay = [0.0] * num_settings

# Number of hidden units
hp_hidden = [256] * num_settings

# Minibatch size
hp_mbsize = [64] * num_settings

In [7]:
# Each index in the list corresponds to a HP setting
# E.G. setting = 1 will select dropout = 0.01, lr = 0.01, rho = 0.9 ...
# TODO: vary the settings, right now I only vary dropout and clamp all the others to a single value

# store the results of each setting
train_losses = np.zeros(num_settings)
dev_losses = np.zeros(num_settings)

for setting in range(num_settings):
    # build the VGG16 network
    input_tensor = Input(shape=(224,224,3))
    model = applications.VGG16(weights='imagenet', include_top=False, input_tensor = input_tensor)
    
    # build a classifier model to put on top of the convolutional model
    top_model = Sequential()
    top_model.add(Flatten(input_shape=(model.output_shape[1:])))


    # Output layer
    # We do random weight intialization
    top_model.add(Dropout(hp_dropout[setting]))
    top_model.add(Dense(hp_hidden[setting], activation='relu', kernel_initializer='glorot_uniform'))
    top_model.add(Dense(1, activation='linear', name='output', kernel_initializer='glorot_uniform'))
    
    # add the model on top of the convolutional base
    new_model = Model(inputs= model.input, outputs = top_model(model.output))

    # set the first 19 layers (up to the last conv block)
    # to non-trainable (weights will not be updated)
    for layer in new_model.layers[:19]:
        layer.trainable = False

    # RMSprop
    new_model.compile(loss='mean_squared_error',
                      optimizer=optimizers.RMSprop(
                              lr=hp_lr[setting], 
                              rho=hp_rho[setting], 
                              epsilon=hp_epsilon[setting], 
                              decay=hp_decay[setting]))
    
    epochs = 10
    minibatch_size = hp_mbsize[setting]

    train_steps = math.ceil(len(train_indices) / minibatch_size)
    test_steps = math.ceil(len(test_indices) / minibatch_size)

    # fine-tune the model
    history = new_model.fit_generator(
        image_generator(train_indices, minibatch_size),
        steps_per_epoch=train_steps,
        epochs=epochs,
        validation_data=image_generator(test_indices, minibatch_size),
        nb_val_samples=test_steps)
    
    # TODO: we should consider the validation loss
    #error = []
    #for i in range(21843):
    #    error.append(evaluate(new_model,i))
    
    # store the training and dev losses for the last epoch (current model)
    train_losses[setting] = history.history['loss'][-1]
    dev_losses[setting] = history.history['val_loss'][-1]
    
    print(np.mean(error))
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['train', 'test'], loc='upper right')
    plt.show()
    
    print("==========")



Epoch 1/10

ResourceExhaustedError: OOM when allocating tensor with shape[58,64,224,224]
	 [[Node: block1_conv2_1/convolution = Conv2D[T=DT_FLOAT, data_format="NHWC", padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/gpu:0"](block1_conv1_1/Relu, block1_conv2_1/kernel/read)]]
	 [[Node: loss_1/mul/_463 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_533_loss_1/mul", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]

Caused by op 'block1_conv2_1/convolution', defined at:
  File "C:\Users\stevenzc\Anaconda\envs\tensorflow\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\Users\stevenzc\Anaconda\envs\tensorflow\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\Users\stevenzc\Anaconda\envs\tensorflow\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "C:\Users\stevenzc\Anaconda\envs\tensorflow\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "C:\Users\stevenzc\Anaconda\envs\tensorflow\lib\site-packages\ipykernel\kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "C:\Users\stevenzc\Anaconda\envs\tensorflow\lib\site-packages\zmq\eventloop\ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "C:\Users\stevenzc\Anaconda\envs\tensorflow\lib\site-packages\tornado\ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "C:\Users\stevenzc\Anaconda\envs\tensorflow\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\stevenzc\Anaconda\envs\tensorflow\lib\site-packages\zmq\eventloop\zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "C:\Users\stevenzc\Anaconda\envs\tensorflow\lib\site-packages\zmq\eventloop\zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "C:\Users\stevenzc\Anaconda\envs\tensorflow\lib\site-packages\zmq\eventloop\zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "C:\Users\stevenzc\Anaconda\envs\tensorflow\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\stevenzc\Anaconda\envs\tensorflow\lib\site-packages\ipykernel\kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "C:\Users\stevenzc\Anaconda\envs\tensorflow\lib\site-packages\ipykernel\kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "C:\Users\stevenzc\Anaconda\envs\tensorflow\lib\site-packages\ipykernel\kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "C:\Users\stevenzc\Anaconda\envs\tensorflow\lib\site-packages\ipykernel\ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\Users\stevenzc\Anaconda\envs\tensorflow\lib\site-packages\ipykernel\zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\Users\stevenzc\Anaconda\envs\tensorflow\lib\site-packages\IPython\core\interactiveshell.py", line 2698, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\Users\stevenzc\Anaconda\envs\tensorflow\lib\site-packages\IPython\core\interactiveshell.py", line 2802, in run_ast_nodes
    if self.run_code(code, result):
  File "C:\Users\stevenzc\Anaconda\envs\tensorflow\lib\site-packages\IPython\core\interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-7-50c887e778c4>", line 12, in <module>
    model = applications.VGG16(weights='imagenet', include_top=False, input_tensor = input_tensor)
  File "C:\Users\stevenzc\Anaconda\envs\tensorflow\lib\site-packages\keras\applications\vgg16.py", line 113, in VGG16
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
  File "C:\Users\stevenzc\Anaconda\envs\tensorflow\lib\site-packages\keras\engine\topology.py", line 603, in __call__
    output = self.call(inputs, **kwargs)
  File "C:\Users\stevenzc\Anaconda\envs\tensorflow\lib\site-packages\keras\layers\convolutional.py", line 164, in call
    dilation_rate=self.dilation_rate)
  File "C:\Users\stevenzc\Anaconda\envs\tensorflow\lib\site-packages\keras\backend\tensorflow_backend.py", line 3189, in conv2d
    data_format=tf_data_format)
  File "C:\Users\stevenzc\Anaconda\envs\tensorflow\lib\site-packages\tensorflow\python\ops\nn_ops.py", line 672, in convolution
    op=op)
  File "C:\Users\stevenzc\Anaconda\envs\tensorflow\lib\site-packages\tensorflow\python\ops\nn_ops.py", line 338, in with_space_to_batch
    return op(input, num_spatial_dims, padding)
  File "C:\Users\stevenzc\Anaconda\envs\tensorflow\lib\site-packages\tensorflow\python\ops\nn_ops.py", line 664, in op
    name=name)
  File "C:\Users\stevenzc\Anaconda\envs\tensorflow\lib\site-packages\tensorflow\python\ops\nn_ops.py", line 131, in _non_atrous_convolution
    name=name)
  File "C:\Users\stevenzc\Anaconda\envs\tensorflow\lib\site-packages\tensorflow\python\ops\gen_nn_ops.py", line 397, in conv2d
    data_format=data_format, name=name)
  File "C:\Users\stevenzc\Anaconda\envs\tensorflow\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 767, in apply_op
    op_def=op_def)
  File "C:\Users\stevenzc\Anaconda\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py", line 2630, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "C:\Users\stevenzc\Anaconda\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py", line 1204, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[58,64,224,224]
	 [[Node: block1_conv2_1/convolution = Conv2D[T=DT_FLOAT, data_format="NHWC", padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/gpu:0"](block1_conv1_1/Relu, block1_conv2_1/kernel/read)]]
	 [[Node: loss_1/mul/_463 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_533_loss_1/mul", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
