In [None]:
import csv
import math

import matplotlib.pyplot as plt
import numpy as np

from keras import applications
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential, Model
from keras.layers import Dropout, Flatten, Dense, Input
from keras.initializers import glorot_uniform
from keras.applications.vgg16 import preprocess_input

In [None]:
# build the VGG16 network
input_tensor = Input(shape=(224,224,3))
model = applications.VGG16(weights='imagenet', include_top=False, input_tensor = input_tensor)

In [None]:
# build a classifier model to put on top of the convolutional model
x = model.output
x = Flatten(input_shape=(model.output_shape[1:]))(x)
x = Dropout(0.2)(x)
x = Dense(256, activation='relu', kernel_initializer='glorot_uniform')(x)
x = Dense(4, activation='softmax', name='output', kernel_initializer='glorot_uniform')(x)

# add new classifier model on top of convolutional base
new_model = Model(model.input, x)

In [None]:
# set the first 19 layers (up to the last conv block)
# to non-trainable (weights will not be updated)
for layer in new_model.layers[:19]:
    layer.trainable = False

In [None]:
new_model.summary()

In [None]:
# Use SGD and Categorical CE Loss
sgd = optimizers.SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True)
new_model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])

# RMSprop
#new_model.compile(loss='mean_squared_error',
#                  optimizer=optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-07, decay=0.0))

In [None]:
# read the CSV into memory
prices = []
image_paths = []

data_path = "../datasets/bikes_im/"
with open("../datasets/bikes_classified.csv") as file:
    reader = csv.reader(file)
    i = -1
    for row in reader:
        i += 1
        index = row[0]
        name = row[1]
        msrp = row[2]
        label = row[3]
        
        image_path = data_path + index + '.jpg'
        image_paths.append(image_path)
        prices.append(str(label))

In [None]:
def image_generator(indices, batch_size):

    num_batches = int(len(indices) / batch_size)
    
    while True:
        for batch_i in range(num_batches):
            if batch_i == num_batches - 1:
                # special case: return as many as possible
                start_i = batch_i * batch_size
                batch_indices = indices[start_i:]
                
                X = np.zeros((len(batch_indices), 224, 224, 3))
                Y = np.zeros((len(batch_indices), 4)) # Change to one-hot
            
            else:
                start_i = batch_i * batch_size
                end_i = start_i + batch_size

                batch_indices = indices[start_i:end_i]

                X = np.zeros((batch_size, 224, 224, 3))
                Y = np.zeros((batch_size, 4)) # Change to one-hot
            
            for i, index in enumerate(batch_indices):
                img = image.load_img(image_paths[index], target_size=(224, 224))
                X[i, :, :, :] = image.img_to_array(img)
                # Convert to 1 hot vector
                p = prices[index]
                if p == "25":
                    Y[i,:] = np.array([1,0,0,0])
                if p == "50":
                    Y[i,:] = np.array([0,1,0,0])
                if p == "75":
                    Y[i,:] = np.array([0,0,1,0])
                if p == "100":
                    Y[i,:] = np.array([0,0,0,1])
            
            # use vgg16 preprocessing
            X = preprocess_input(X)
            
            yield (X, Y)

In [None]:
train_indices = np.load("bikes_train_indices.npy")
test_indices = np.load("bikes_test_indices.npy")
print(train_indices.shape)
print(test_indices.shape)

In [None]:
epochs = 30
minibatch_size = 32

train_steps = math.ceil(len(train_indices) / minibatch_size)
test_steps = math.ceil(len(test_indices) / minibatch_size)

# fine-tune the model
history = new_model.fit_generator(
    image_generator(train_indices, minibatch_size),
    steps_per_epoch=train_steps,
    epochs=epochs,
    validation_data=image_generator(test_indices, minibatch_size),
    nb_val_samples=test_steps)