In [1]:
import csv
import math

import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np

from keras import applications
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential, Model
from keras.layers import Dropout, Flatten, Dense, Input
from keras.initializers import glorot_uniform
from keras.applications.vgg19 import preprocess_input


from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

Using TensorFlow backend.


In [2]:
# read the CSV into memory
prices = []
image_paths = []

data_path = "../datasets/bikes_im/"
with open("../datasets/bikes_filtered.csv") as file:
    reader = csv.reader(file)
    i = -1
    for row in reader:
        i += 1
        index = row[0]
        name = row[1]
        msrp = row[2]
        
        image_path = data_path + index + '.jpg'
        image_paths.append(image_path)
        prices.append(int(msrp))

train_indices = np.load("bikes_train_indices.npy")
val_indices = np.load("bikes_val_indices.npy")
print(train_indices.shape, val_indices.shape)

(17473,) (2185,)


In [3]:
from imgaug import augmenters as iaa

seq = iaa.Sequential([
    iaa.Crop(px=(0, 16)), # crop images from each side by 0 to 16px (randomly chosen)
    iaa.Fliplr(0.5), # horizontally flip 50% of the images
    iaa.GaussianBlur(sigma=(0, 3.0)) # blur images with a sigma of 0 to 3.0
])

def image_generator(indices, batch_size):

    num_batches = int(len(indices) / batch_size)
    
    while True:
        for batch_i in range(num_batches):
            if batch_i == num_batches - 1:
                # special case: return as many as possible
                start_i = batch_i * batch_size
                batch_indices = indices[start_i:]
                
                X = np.zeros((len(batch_indices), 224, 224, 3))
                Y = np.zeros((len(batch_indices), 1))
            
            else:
                start_i = batch_i * batch_size
                end_i = start_i + batch_size

                batch_indices = indices[start_i:end_i]

                X = np.zeros((batch_size, 224, 224, 3))
                Y = np.zeros((batch_size, 1))
            
            for i, index in enumerate(batch_indices):
                img = image.load_img(image_paths[index], target_size=(224, 224))
                X[i, :, :, :] = image.img_to_array(img)                
                Y[i] = prices[index]
            
            # use vgg16 preprocessing
            X = preprocess_input(X)
            X = seq.augment_images(X)
            
            yield (X, Y)

In [5]:
# Hyperparameters
hp_dropout = 0.5
hp_lr = 0.005
hp_hidden = 256

minibatch_size = 128
num_epochs = 500

input_tensor = Input(shape=(224,224,3))
model = applications.VGG19(weights='imagenet', include_top=False, input_tensor=input_tensor)

# build a classifier model to put on top of the convolutional model
top_model = Sequential()
top_model.add(Flatten(input_shape=(model.output_shape[1:])))

# Output layer
# We do random weight intialization
top_model.add(Dropout(hp_dropout))
top_model.add(Dense(hp_hidden, activation='relu', kernel_initializer='glorot_uniform'))
top_model.add(Dense(1, activation='linear', name='output', kernel_initializer='glorot_uniform'))

# add the model on top of the convolutional base
new_model = Model(inputs= model.input, outputs = top_model(model.output))

# Adam optimizer
new_model.compile(loss='mean_squared_error',
                  optimizer=optimizers.Adam(lr=hp_lr))

checkpoint_path = 'output/checkpoints/bikes-reg-vgg19.hdf5'
checkpoint = ModelCheckpoint(checkpoint_path,
                            monitor='val_loss',
                            save_best_only=True,
                            mode='min')    
tensorboard = TensorBoard(log_dir="output/logs/bikes-reg-vgg19")
                  
train_steps = math.ceil(len(train_indices) / minibatch_size)
val_steps = math.ceil(len(val_indices) / minibatch_size)

# fine-tune the model
history = new_model.fit_generator(
    image_generator(train_indices, minibatch_size),
    steps_per_epoch=train_steps,
    epochs=num_epochs,
    validation_data=image_generator(val_indices, minibatch_size),
    nb_val_samples=val_steps,
    callbacks=[checkpoint, tensorboard])



Epoch 1/100
 15/274 [>.............................] - ETA: 5:50 - loss: 163391459609965792.0000

KeyboardInterrupt: 