## Stanford Cars

* Data set: https://ai.stanford.edu/~jkrause/cars/car_dataset.html
* Related papers: http://cs231n.stanford.edu/reports/2015/pdfs/lediurfinal.pdf, http://noiselab.ucsd.edu/ECE228/Reports/Report17.pdf
* Databricks notebook: https://demo.cloud.databricks.com/#notebook/4718421/command/4718433
* Databricks email thread: https://groups.google.com/a/databricks.com/d/msgid/ml-sme/CA%2BUeztiEsUTm2xEZnBZp2DOgiWocCkJ%3DLNo6q1-Fn3%2BXdN4prQ%40mail.gmail.com?utm_medium=email&utm_source=footer


### Solutions

* 88% accuracy with resnet152 https://github.com/foamliu/Car-Recognition
* Kaggle solution with 90% accuracy: https://www.kaggle.com/meaninglesslives/cars-eb0-keras


In [1]:
import scipy.io as sio
import numpy as np
from IPython.display import Image
import os
import cv2
from matplotlib import pyplot as plt
import pandas as pd
import keras
from keras_preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.models import Model
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras.layers.pooling import GlobalAveragePooling2D, AveragePooling2D
from keras import applications  # these are the applications built into keras
from keras_applications.resnet import ResNet152 # separate keras applications lib, seems more up to date
keras.backend.tensorflow_backend._get_available_gpus()

ModuleNotFoundError: No module named 'cv2'

In [37]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:

  %reload_ext autoreload


### Settings + globals

In [38]:
# Set to True if you need to convert the original images into the squashed 227x227 images.
# If you already have the squashed 227x227 images in cars_train_227_227, no need to run this. 
do_image_preprocessing = False

# Set to True if you want to enable the step that builds a conv net from scratch (as opposed to transfer
# learning).  
do_conv_net_from_scratch = False

# Set to true if you want to train/test vgg16
enable_vgg_16_training_testing = False

enable_resnet_152_bottleneck = True

In [39]:
def ensure_exists(path):
    if not os.path.exists(path):
        raise Exception("Could not find path: {}".format(path))

datadir = "datasets/StanfordCars"
cars_train_227_227 = os.path.join(datadir, "cars_train_227_227")
cars_test_227_227 = os.path.join(datadir, "cars_test_227_227")
ensure_exists(cars_train_227_227)
ensure_exists(cars_test_227_227)

# Annotations
cars_meta = sio.loadmat(datadir + "/cars_meta.mat")
cars_train = sio.loadmat(datadir + "/cars_train_annos.mat")
cars_test = sio.loadmat(datadir + "/cars_test_annos.mat")

In [40]:
classes = [None] # MatLab is 1-based, python 0-based
classes += [c[0].item() for c in cars_meta["class_names"][0]] 

In [41]:
def get_class(car):
    """
    Helper function to convert a raw "car" stored in matlab format into
    a dictionary w/ named fields
    """
    filename = car[5][0].item()
    class_id = car[4][0][0].item()
    bbox = {
        "x1": car[0][0][0].item(),
        "y1": car[1][0][0].item(),
        "x2": car[2][0][0].item(),
        "y2": car[3][0][0].item()
    }
    class_ = classes[car[4][0][0]]
    return {
        "filename":filename, 
        "class_id": class_id,
        "class": class_, 
        "bbox": bbox
    }

In [42]:
training_annotations = cars_train['annotations'][0]
car = training_annotations[2]
car_class = get_class(car)
car_class

{'filename': '00003.jpg',
 'class_id': 91,
 'class': 'Dodge Dakota Club Cab 2007',
 'bbox': {'x1': 85, 'y1': 109, 'x2': 601, 'y2': 381}}

In [43]:
test_annotations = cars_test['annotations'][0]
car = test_annotations[1]
car_class = get_class(car)
car_class

{'filename': '00002.jpg',
 'class_id': 103,
 'class': 'Ferrari 458 Italia Convertible 2012',
 'bbox': {'x1': 100, 'y1': 19, 'x2': 576, 'y2': 203}}

## Image preprocessing pipeline


### Crop with boundary

From the Lieu/Wang paper:

> To preserve some context surrounding the cars, we expanded each bounding box by 16 pixels on each side before cropping

### Resize to 227x227 square aspect ratio

From the Lieu/Wang paper:


> we resized each cropped image to a square aspect ratio and a resolution of 227x227
as required by the models. After discussions with Krause, we decided to squash images without preserving their original aspect ratios instead of scaling and cropping the image

In [44]:
def crop_expand_bounding_box(car_class, source_dir):
    
    """
    Given a car class:
    
    {'filename': '00003.jpg',
     'class_id': 145,
     'class': 'Jeep Patriot SUV 2012',
     'bbox': {'x1': 51, 'y1': 105, 'x2': 968, 'y2': 659}}
     
    And an source and output directory, do the following:
    
    1. Calculate the expanded bounding box (should not go outside image border)
    2. Crop the image with the expanding box
    3. Return cropped image
    """
    source_filename = "{}/{}".format(source_dir, car_class['filename'])
    
    if not os.path.exists(source_filename):
        raise Exception("Could not find source image file: {}".format(source_filename))
        
    source_img = cv2.imread(source_filename)
    height, width, channels = source_img.shape
    bbox_orig = car_class['bbox']
    bbox = expand_bounding_box(bbox_orig, (width, height), 16)
    cropped_img = source_img[bbox['y1']:bbox['y2'], bbox['x1']:bbox['x2']]
    return cropped_img

def expand_bounding_box(bounding_box, img_size, expand_pixels):
    
    """
    Given a bounding box:
    
    {'x1': 51, 'y1': 105, 'x2': 968, 'y2': 659}
    
    an image size tuple (width, height) and a number of pixels to expand (expand_pixels param)
    
    Return a larger bounding box that still fits within the image bounds.
    
    """
    width, height = img_size
    new_x1 = max(bounding_box['x1'] - expand_pixels,0)  # don't let the new_x1 go off left edge of image
    new_x2 = min(bounding_box['x2'] + expand_pixels, width)  # don't let new_x2 go off right edge of image
    new_y1 = max(bounding_box['y1'] - expand_pixels, 0)  # don't go off top edge of image
    new_y2 = min(bounding_box['y2'] + expand_pixels, height)  # don't go off bottom edge of image
    
    return {
        'x1': new_x1,
        'y1': new_y1,
        'x2': new_x2,
        'y2': new_y2,
    }


def process_cars(cars, source_dir, result_directory_path):
    """
    Loop over car_classes and write transformed image into result_directory_path
    """
    for car in cars:
        car_class = get_class(car)
        print("car_class: {}".format(car_class))
        cropped_img = crop_expand_bounding_box(car_class, source_dir)
        resized_img = cv2.resize(cropped_img, (227,227))
        target_file = os.path.join(result_directory_path, car_class['filename'])
        cv2.imwrite(target_file, resized_img)
        
def process_car():
    source_dir = os.path.join(datadir, "cars_test")
    cropped_img = crop_expand_bounding_box(car_class, source_dir)

    img = cv2.cvtColor(cropped_img, cv2.COLOR_BGR2RGB)
    plt.imshow(img)
        

In [45]:
if do_image_preprocessing:
    source_dir = os.path.join(datadir, "cars_train")
    process_cars(training_annotations, source_dir, cars_train_227_227)
    source_dir = os.path.join(datadir, "cars_test")
    process_cars(test_annotations, source_dir, cars_test_227_227)
    

## Keras ImageDataGenerator

### Based on tutorials/docs

* [Vijayabhaskar J's Tutorial on Keras flow_from_dataframe](https://medium.com/@vijayabhaskar96/tutorial-on-keras-flow-from-dataframe-1fd4493d237c)

In [46]:
def dataframes_from_annotations(cars):
    """
    Given the annotations in matlab/octave format, create dataframes
    """
    dataframe = pd.DataFrame(columns=['id', 'label'])
    
    for car in cars:
        # Example car_class: {'filename': '00001.jpg', 'class_id': 14, 'class': 'Audi TTS Coupe 2012', 'bbox': {..}}
        car_class = get_class(car)
        dataframe = dataframe.append(
            {"id": car_class['filename'], 
             "label": car_class['class'],
            }, 
            ignore_index=True,
        )
    
    return dataframe
    

In [47]:
training_dataframes = dataframes_from_annotations(training_annotations)
training_dataframes

Unnamed: 0,id,label
0,00001.jpg,Audi TTS Coupe 2012
1,00002.jpg,Acura TL Sedan 2012
2,00003.jpg,Dodge Dakota Club Cab 2007
3,00004.jpg,Hyundai Sonata Hybrid Sedan 2012
4,00005.jpg,Ford F-450 Super Duty Crew Cab 2012
...,...,...
8139,08140.jpg,Chrysler Town and Country Minivan 2012
8140,08141.jpg,smart fortwo Convertible 2012
8141,08142.jpg,Mercedes-Benz SL-Class Coupe 2009
8142,08143.jpg,Ford GT Coupe 2006


In [48]:
test_dataframes = dataframes_from_annotations(test_annotations)
test_dataframes

Unnamed: 0,id,label
0,00001.jpg,Suzuki Aerio Sedan 2007
1,00002.jpg,Ferrari 458 Italia Convertible 2012
2,00003.jpg,Jeep Patriot SUV 2012
3,00004.jpg,Toyota Camry Sedan 2012
4,00005.jpg,Tesla Model S Sedan 2012
...,...,...
8036,08037.jpg,Chevrolet Sonic Sedan 2012
8037,08038.jpg,Audi V8 Sedan 1994
8038,08039.jpg,Audi 100 Sedan 1994
8039,08040.jpg,BMW Z4 Convertible 2012


## Keras Baseline convnet from scratch

This takes the same approach as https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html, but it's not working very well.

TODO: checkout what they did differently in http://noiselab.ucsd.edu/ECE228/Reports/Report17.pdf

### Training/validation ImageDataGenerator helper functions

In [49]:
batch_size = 16
num_classes = 196 # the number of different cars
img_width = 227
img_height = 227

# TODO: I don't know if I should rely on the ImageDataGenerator for
# the validation split, since as seen in https://www.kaggle.com/meaninglesslives/cars-eb0-keras/notebook
# and https://github.com/foamliu/Car-Recognition/blob/master/train.py, it does a lot of data 
# augmentation on the training set, but NOT on the validation set.  So maybe it would be better
# to write the 
#datagen=ImageDataGenerator(rescale=1./255.,validation_split=0.25)

datagen=ImageDataGenerator(
    rescale=1./255.,
    validation_split=0.25,
    rotation_range=20.,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True
)

def get_train_generator(shuffle=False):
    train_generator=datagen.flow_from_dataframe(
        dataframe=training_dataframes,
        directory=cars_train_227_227,
        x_col='id',
        y_col='label',
        subset="training",
        batch_size=batch_size,
        shuffle=shuffle,
        seed=42,
        class_mode="categorical",
        target_size=(img_width,img_height),
    )
    return train_generator

def get_validation_generator(shuffle=False):
    validation_generator=datagen.flow_from_dataframe(
        dataframe=training_dataframes,
        directory=cars_train_227_227,
        x_col='id',
        y_col='label',
        subset="validation",
        batch_size=batch_size,
        shuffle=shuffle,
        seed=42,
        class_mode="categorical",
        target_size=(img_width,img_height),
    )
    return validation_generator

def get_test_generator(shuffle=False,classes=None):
    """
    This must take the "classes" as a param, which is a list of all the class labels:
    
        ['Audi TTS Coupe 2012', 'Acura TL Sedan 2012']
    
    Where the order is very important, because it's used to generate the one-hot
    encoded labels.  If the one-hot encoded labels are misaligned across the
    DataFrameIterator (training, validation, and test) then you will get totally
    wonky and invalid results.  This is required since the test set DataFrameIterators 
    uses it's own ImageDataGenerator separate from the one used by the training and 
    validation generators.
    """
    test_datagen=ImageDataGenerator(rescale=1./255.)
    test_generator=test_datagen.flow_from_dataframe(
        dataframe=test_dataframes,
        directory=cars_test_227_227,
        x_col='id',
        y_col='label',
        classes=classes,
        batch_size=batch_size,
        shuffle=shuffle,
        seed=42,
        class_mode="categorical",
        target_size=(img_width,img_height),
    )
    return test_generator




### Instantiate training/validation ImageDataGenerators


In [50]:
print("getting train_generator_non_shuffle")
train_generator_non_shuffle = get_train_generator(shuffle=False)
print("getting train_generator")
train_generator = get_train_generator(shuffle=True)
print("getting validation_generator_non_shuffle")
validation_generator_non_shuffle = get_validation_generator(shuffle=False)
print("getting validation_generator")
validation_generator = get_validation_generator(shuffle=True)
print("getting test_generator")

# Use the classes from any of the above DataFrameIterators for the
# the test set DataFrameIterator.
classes = list(train_generator_non_shuffle.class_indices.keys())
test_generator = get_test_generator(shuffle=False, classes=classes)

steps_per_epoch_training=train_generator_non_shuffle.n // train_generator_non_shuffle.batch_size
steps_per_epoch_validation=validation_generator_non_shuffle.n // validation_generator_non_shuffle.batch_size
steps_per_epoch_test=test_generator.n // test_generator.batch_size
print("steps_per_epoch_training: {}".format(steps_per_epoch_training))
print("steps_per_epoch_validation: {}".format(steps_per_epoch_validation))

getting train_generator_non_shuffle

Found 6115 images belonging to 196 classes.

getting train_generator

Found 6115 images belonging to 196 classes.

getting validation_generator_non_shuffle

Found 2029 images belonging to 196 classes.

getting validation_generator

Found 2029 images belonging to 196 classes.

getting test_generator

Found 8041 images belonging to 196 classes.

steps_per_epoch_training: 382

steps_per_epoch_validation: 126


### Define convnet model

This tries to build a convnet from scratch rather than using transfer learning to try to give some sort of baseline.  It's not giving any decent level of accuracy on the validation set though.

In [51]:

# Conv / pooling layers
model_convnet = Sequential()
model_convnet.add(Conv2D(32, (3, 3), input_shape=(img_width, img_height, 3)))
model_convnet.add(Activation('relu'))
model_convnet.add(MaxPooling2D(pool_size=(2, 2)))

model_convnet.add(Conv2D(32, (3, 3)))
model_convnet.add(Activation('relu'))
model_convnet.add(MaxPooling2D(pool_size=(2, 2)))

model_convnet.add(Conv2D(64, (3, 3)))
model_convnet.add(Activation('relu'))
model_convnet.add(MaxPooling2D(pool_size=(2, 2)))

# Fully connected layers

model_convnet.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model_convnet.add(Dense(256))
model_convnet.add(Activation('relu'))
model_convnet.add(Dropout(0.5))
model_convnet.add(Dense(num_classes, activation='softmax'))

model_convnet.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [52]:
num_epochs = 50
if do_conv_net_from_scratch:
    model_convnet.fit_generator(
        generator=train_generator,
        steps_per_epoch=steps_per_epoch_training,
        validation_data=validation_generator,
        validation_steps=steps_per_epoch_validation,
        epochs=num_epochs,
    )

## Keras VGG16 transfer learning model

### Generate bottleneck features

See dogs_vs_cats.ipynb

In [53]:
# build the network
model_vgg16 = applications.VGG16(
    weights='imagenet', 
    input_shape=(img_width, img_height, 3), 
    include_top=False
)
model_vgg16.summary()


_________________________________________________________________

Layer (type)                 Output Shape              Param #   


input_3 (InputLayer)         (None, 227, 227, 3)       0         

_________________________________________________________________

block1_conv1 (Conv2D)        (None, 227, 227, 64)      1792      

_________________________________________________________________

block1_conv2 (Conv2D)        (None, 227, 227, 64)      36928     

_________________________________________________________________

block1_pool (MaxPooling2D)   (None, 113, 113, 64)      0         

_________________________________________________________________

block2_conv1 (Conv2D)        (None, 113, 113, 128)     73856     

_________________________________________________________________

block2_conv2 (Conv2D)        (None, 113, 113, 128)     147584    

_________________________________________________________________

block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0     

### Get bottleneck predictions

In [54]:
def generator_with_labels(model, generator):
    """
    Helper which is an alternative to using model.predict_generator() which 
    has the advantage of also capturing the labels.
    See https://stackoverflow.com/questions/44970445/how-to-return-true-labels-of-items-when-using-predict-generator
    """
    while True:
        x, y = generator.next()
        yield x, model.predict_on_batch(x), y

In [55]:
def training_last_cnn_layer_with_labels(model, image_data_generator, steps_per_epoch):
        
    image_data_generator_w_labels = generator_with_labels(
        model, 
        image_data_generator,
    )
    
    num_steps_taken = 0
    y_preds = []
    y_labels = []
    for x, y_pred, y_label in image_data_generator_w_labels:
        print("{}/{}".format(num_steps_taken, steps_per_epoch))
        y_preds.append(y_pred)
        y_labels.append(y_label)
        num_steps_taken += 1
        if num_steps_taken >= steps_per_epoch:
            break
            
    return y_preds, y_labels
    

In [56]:
if enable_vgg_16_training_testing:
    training_y_preds_vgg16, training_y_labels_vgg16 = training_last_cnn_layer_with_labels(
        model_vgg16,
        train_generator_non_shuffle,
        steps_per_epoch_training
    )

In [57]:
if enable_vgg_16_training_testing:
    validation_y_preds_vgg16, validation_y_labels_vgg16 = training_last_cnn_layer_with_labels(
        model_vgg16,
        validation_generator_non_shuffle,
        steps_per_epoch_validation
    )

In [58]:
if enable_vgg_16_training_testing:
    training_y_pred_vgg16 = training_y_preds_vgg16[0]
    training_y_pred_vgg16.shape
    training_y_preds_vgg16_array = np.array(training_y_preds_vgg16)


In [59]:
if enable_vgg_16_training_testing:
    
    training_y_preds_flat_vgg16 = np.array(training_y_preds_vgg16).reshape(-1, 7, 7, 512)
    print("training_y_preds_vgg16 {} -> {}".format(
        np.array(training_y_preds_vgg16).shape,
        training_y_preds_flat_vgg16.shape,
    ))   
    
    training_y_labels_flat_vgg16 = np.array(training_y_labels_vgg16).reshape(-1, 196)
    print("training_y_labels_vgg16 {} -> {}".format(
        np.array(training_y_labels_vgg16).shape,
        training_y_labels_flat_vgg16.shape,
    ))   
    
    validation_y_preds_flat_vgg16 = np.array(validation_y_preds_vgg16).reshape(-1, 7, 7, 512)    
    print("validation_y_preds_vgg16 {} -> {}".format(
        np.array(validation_y_preds_vgg16).shape,
        validation_y_preds_flat_vgg16.shape,
    ))   
    
    validation_y_labels_flat_vgg16 = np.array(validation_y_labels_vgg16).reshape(-1, 196)
    print("validation_y_labels_vgg16 {} -> {}".format(
        np.array(validation_y_labels_vgg16).shape,
        validation_y_labels_flat_vgg16.shape,
    ))   

### Train top fully connected layer

In [60]:
# From https://github.com/fchollet/deep-learning-models/issues/13
sgd = keras.optimizers.SGD(lr=0.0005, decay=1e-6, momentum=0.9)
    
def train_top_model(num_epochs, bottleneck_predictions_train, bottleneck_predictions_validation, train_labels, validation_labels):
    """
    Best params so far:
    
    SGD with
       - 2 4096 dense layers
       - Dropout 0.6
       - lr=0.0005, decay=1e-6, momentum=0.9
       result: loss: 0.1307 - acc: 0.9890 - val_loss: 1.7402 - val_acc: 0.5342
    """
    
    top_model = Sequential()
    top_model.add(Flatten(input_shape=bottleneck_predictions_train.shape[1:]))
    top_model.add(Dense(4096, activation='relu'))
    top_model.add(Dense(4096, activation='relu'))
    top_model.add(Dropout(0.75))
    top_model.add(Dense(num_classes, activation='softmax'))
    
    top_model.compile(
        optimizer=sgd,
        loss='categorical_crossentropy', 
        metrics=['accuracy'],
    )

    top_model.fit(bottleneck_predictions_train, 
              train_labels,
              epochs=num_epochs,
              batch_size=batch_size,
              validation_data=(bottleneck_predictions_validation, validation_labels))
    
    return top_model

In [61]:
if enable_vgg_16_training_testing:
    top_model_vgg16 = train_top_model(
        num_epochs=130,
        bottleneck_predictions_train=training_y_preds_flat_vgg16,
        bottleneck_predictions_validation=validation_y_preds_flat_vgg16,
        train_labels=training_y_labels_flat_vgg16,
        validation_labels=validation_y_labels_flat_vgg16,
    )

## Fine tuning VGG16

### Set additional params


In [62]:
#learning_rate = 1e-4
#momentum = 0.9

# this is the number of layers that contains the first 4 (of 5 total) convblocks.
# this is special because these are the layers that we will freeze, whereas convblock 5
# will be fine-tuned.
num_first_4_convblock_layers = 15 

### Instantiate base model and freeze first four conv blocks

In [63]:
base_model_vgg16 = model_vgg16

base_model_vgg16.summary()

# Freeze first four conv blocks
for layer in base_model_vgg16.layers[:num_first_4_convblock_layers]:
    layer.trainable = False  # aka "freeze" this layer

_________________________________________________________________

Layer (type)                 Output Shape              Param #   


input_3 (InputLayer)         (None, 227, 227, 3)       0         

_________________________________________________________________

block1_conv1 (Conv2D)        (None, 227, 227, 64)      1792      

_________________________________________________________________

block1_conv2 (Conv2D)        (None, 227, 227, 64)      36928     

_________________________________________________________________

block1_pool (MaxPooling2D)   (None, 113, 113, 64)      0         

_________________________________________________________________

block2_conv1 (Conv2D)        (None, 113, 113, 128)     73856     

_________________________________________________________________

block2_conv2 (Conv2D)        (None, 113, 113, 128)     147584    

_________________________________________________________________

block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0     

### Combine base model with previously trained top model

In [64]:
if enable_vgg_16_training_testing:
    # add the model on top of the convolutional base
    combined_model_vgg16 = keras.Model(
        input= base_model_vgg16.input, 
        output=top_model_vgg16(base_model_vgg16.output)
    )
    combined_model_vgg16.summary()

In [65]:
if enable_vgg_16_training_testing:
    combined_model_vgg16.compile(loss='categorical_crossentropy',
                  optimizer=sgd,
                  metrics=['accuracy'])

In [66]:
if enable_vgg_16_training_testing:
    combined_model_vgg16.fit_generator(
        train_generator,
        steps_per_epoch=steps_per_epoch_training,
        epochs=5,
        validation_data=validation_generator,
        validation_steps=steps_per_epoch_validation)
    combined_model_vgg16.save("vgg16_fine_tuned_60percent_validation_accuracy.hdf5")

### Test set evaluation of fine-tuned VGG16 model


In [67]:
if enable_vgg_16_training_testing:
    test_loss, test_accuracy = combined_model_vgg16.evaluate_generator(
        generator = test_generator,
        steps = steps_per_epoch_test,
        verbose = 1,
    )
    print("test_loss: {}, test_accuracy: {}".format(test_loss, test_accuracy))

## Transfer learning on resnet-156

According to http://noiselab.ucsd.edu/ECE228/Reports/Report17.pdf, they were only able to get ~50% test set accuracy on VGG16, which is probably comparable to the above model.

TODO: first try to repro work from https://github.com/foamliu/Car-Recognition, then loop back to this.


### Differences between foamliu (working) and this one that might matter

* This uses a keras predefined model, foamli defines it's own -- might be lots of subtle model differences
* This one tries to first train bottleneck features, foamli just goes straight to fine tuning
* This one uses two extra FC layers and dropout, foamli does not
* This one freezes all of the resnet weights, not sure what foamli does here
* SGD params are different
* Pass in a ReduceLROnPlateau callback

### Differences between foamliu (working) and this one that should not matter

* This one uses a different approach to combining the model
* Foamli manually loads in weights
* Foamli uses image folders rather than labels



### Instantiate training/validation ImageDataGenerators

Cannot re-use training generators from above, since they are already exhausted

In [68]:
print("getting train_generator_non_shuffle")
train_generator_non_shuffle = get_train_generator(shuffle=False)
print("getting train_generator")
train_generator = get_train_generator(shuffle=True)
print("getting validation_generator_non_shuffle")
validation_generator_non_shuffle = get_validation_generator(shuffle=False)
print("getting validation_generator")
validation_generator = get_validation_generator(shuffle=True)
print("getting test_generator")

# Use the classes from any of the above DataFrameIterators for the
# the test set DataFrameIterator.
classes = list(train_generator_non_shuffle.class_indices.keys())
test_generator = get_test_generator(shuffle=False, classes=classes)

steps_per_epoch_training=train_generator_non_shuffle.n // train_generator_non_shuffle.batch_size
steps_per_epoch_validation=validation_generator_non_shuffle.n // validation_generator_non_shuffle.batch_size
steps_per_epoch_test=test_generator.n // test_generator.batch_size
print("steps_per_epoch_training: {}".format(steps_per_epoch_training))
print("steps_per_epoch_validation: {}".format(steps_per_epoch_validation))

getting train_generator_non_shuffle

Found 6115 images belonging to 196 classes.

getting train_generator

Found 6115 images belonging to 196 classes.

getting validation_generator_non_shuffle

Found 2029 images belonging to 196 classes.

getting validation_generator

Found 2029 images belonging to 196 classes.

getting test_generator

Found 8041 images belonging to 196 classes.

steps_per_epoch_training: 382

steps_per_epoch_validation: 126


### Define transfer learning model

In [69]:
# build the network
base_model_resnet152 = ResNet152(
    weights='imagenet', 
    input_shape=(img_width, img_height, 3), 
    include_top=False,
    backend=keras.backend,  # workaround keras issue: https://github.com/keras-team/keras-applications/issues/54#issuecomment-445097297
    layers=keras.layers, 
    models=keras.models, 
    utils=keras.utils,
)

base_model_resnet152.summary()

__________________________________________________________________________________________________

Layer (type)                    Output Shape         Param #     Connected to                     


input_4 (InputLayer)            (None, 227, 227, 3)  0                                            

__________________________________________________________________________________________________

conv1_pad (ZeroPadding2D)       (None, 233, 233, 3)  0           input_4[0][0]                    

__________________________________________________________________________________________________

conv1_conv (Conv2D)             (None, 114, 114, 64) 9472        conv1_pad[0][0]                  

__________________________________________________________________________________________________

conv1_bn (BatchNormalization)   (None, 114, 114, 64) 256         conv1_conv[0][0]                 

__________________________________________________________________________________________________


### Bottleneck features

In [70]:
if enable_resnet_152_bottleneck:
    training_y_preds_resnet152, training_y_labels_resnet152 = training_last_cnn_layer_with_labels(
        base_model_resnet152,
        train_generator_non_shuffle,
        steps_per_epoch_training
    )
    validation_y_preds_resnet152, validation_y_labels_resnet152 = training_last_cnn_layer_with_labels(
        base_model_resnet152,
        validation_generator_non_shuffle,
        steps_per_epoch_validation
    )
    

0/382

1/382

2/382

3/382

4/382

5/382

6/382

7/382

8/382

9/382

10/382

11/382

12/382

13/382

14/382

15/382

16/382

17/382

18/382

19/382

20/382

21/382

22/382

23/382

24/382

25/382

26/382

27/382

28/382

29/382

30/382

31/382

32/382

33/382

34/382

35/382

36/382

37/382

38/382

39/382

40/382

41/382

42/382

43/382

44/382

45/382

46/382

47/382

48/382

49/382

50/382

51/382

52/382

53/382

54/382

55/382

56/382

57/382

58/382

59/382

60/382

61/382

62/382

63/382

64/382

65/382

66/382

67/382

68/382

69/382

70/382

71/382

72/382

73/382

74/382

75/382

76/382

77/382

78/382

79/382

80/382

81/382

82/382

83/382

84/382

85/382

86/382

87/382

88/382

89/382

90/382

91/382

92/382

93/382

94/382

95/382

96/382

97/382

98/382

99/382

100/382

101/382

102/382

103/382

104/382

105/382

106/382

107/382

108/382

109/382

110/382

111/382

112/382

113/382

114/382

115/382

116/382

117/382

118/382

119/382

120/382

121/382

122/382

123

In [74]:
if enable_resnet_152_bottleneck:
  
    training_y_preds_flat_resnet152 = np.array(training_y_preds_resnet152).reshape(-1, 8, 8, 2048)
    print("training_y_preds_resnet152 {} -> {}".format(
        np.array(training_y_preds_resnet152).shape,
        training_y_preds_flat_resnet152.shape,
    ))    
    
    training_y_labels_flat_resnet152 = np.array(training_y_labels_resnet152).reshape(-1, 196)
    print("training_y_labels_resnet152 {} -> {}".format(
        np.array(training_y_labels_resnet152).shape,
        training_y_labels_flat_resnet152.shape,
    ))    
    
    validation_y_preds_flat_resnet152 = np.array(validation_y_preds_resnet152).reshape(-1, 8, 8, 2048)
    print("validation_y_preds_resnet152 {} -> {}".format(
        np.array(validation_y_preds_resnet152).shape,
        validation_y_preds_flat_resnet152.shape,
    ))    
    
    validation_y_labels_flat_resnet152 = np.array(validation_y_labels_resnet152).reshape(-1, 196)
    print("validation_y_labels_resnet152 {} -> {}".format(
        np.array(validation_y_labels_resnet152).shape,
        validation_y_labels_flat_resnet152.shape,
    ))    

training_y_preds_resnet152 (382, 16, 8, 8, 2048) -> (6112, 8, 8, 2048)

training_y_labels_resnet152 (382, 16, 196) -> (6112, 196)

validation_y_preds_resnet152 (126, 16, 8, 8, 2048) -> (2016, 8, 8, 2048)

validation_y_labels_resnet152 (126, 16, 196) -> (2016, 196)


In [75]:

def train_top_model_resnet(num_epochs, bottleneck_predictions_train, bottleneck_predictions_validation, train_labels, validation_labels):
    """
    Best params so far:
    
    SGD with
       - 2 4096 dense layers
       - Dropout 0.6
       - lr=0.0005, decay=1e-6, momentum=0.9
       result: loss: 0.1307 - acc: 0.9890 - val_loss: 1.7402 - val_acc: 0.5342
    """
    
    # From https://github.com/fchollet/deep-learning-models/issues/13
    #sgd = keras.optimizers.SGD(lr=0.0005, decay=1e-6, momentum=0.9)
    sgd = keras.optimizers.Adam(lr=0.0001)

    top_model = Sequential()
    top_model.add(AveragePooling2D(input_shape=(8,8,2048), pool_size=(4, 4), data_format='channels_last'))
    top_model.add(Flatten())
    top_model.add(Dense(4096, activation='sigmoid'))
    top_model.add(Dense(4096, activation='sigmoid'))
    top_model.add(Dropout(0.5))
    top_model.add(Dense(num_classes, activation='softmax'))
    
    top_model.compile(
        optimizer=sgd,
        loss='categorical_crossentropy', 
        metrics=['accuracy'],
    )
    
    #print(top_model.summary())

    top_model.fit(bottleneck_predictions_train, 
              train_labels,
              epochs=num_epochs,
              batch_size=99,  # was batch_size=batch_size, experimenting
              validation_data=(bottleneck_predictions_validation, validation_labels))
    
    return top_model

if enable_resnet_152_bottleneck:

    #top_model = train_top_model_resnet(
    #    num_epochs=500,
    #    bottleneck_predictions_train=training_y_preds_flat_resnet152,
    #    bottleneck_predictions_validation=validation_y_preds_flat_resnet152,
    #    train_labels=training_y_labels_flat_resnet152,
    #    validation_labels=validation_y_labels_flat_resnet152,
    #)
    
    top_model = train_top_model_resnet(
        num_epochs=500,
        bottleneck_predictions_train=np.array(training_y_preds_resnet152),
        bottleneck_predictions_validation=np.array(training_y_labels_resnet152),
        train_labels=np.array(validation_y_preds_resnet152),
        validation_labels=np.array(validation_y_labels_resnet152),
    )

ValueError: Error when checking input: expected average_pooling2d_3_input to have 4 dimensions, but got array with shape (382, 16, 8, 8, 2048)

### Fine tune resnet152 without bottleneck features

In [None]:

x = base_model_resnet152.output

#x = AveragePooling2D(pool_size=(4, 4), data_format='channels_last')(x)
x = AveragePooling2D(pool_size=(7, 7), data_format='channels_last')(x)

x = Flatten()(x)
        
x = Dropout(0.35)(x)
preds = Dense(num_classes, activation='softmax')(x)

combined_model_resnet152 = keras.Model(
    inputs=base_model_resnet152.input, 
    outputs=preds
)

combined_model_resnet152.summary()



In [None]:
# sgd = keras.optimizers.SGD(lr=0.0005, decay=1e-6, momentum=0.9)
sgd = keras.optimizers.SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True)

combined_model_resnet152.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

combined_model_resnet152.fit_generator(
    train_generator,
    steps_per_epoch=steps_per_epoch_training,
    epochs=50,
    validation_data=validation_generator,
    validation_steps=steps_per_epoch_validation
)

In [None]:
test_loss, test_accuracy = combined_model_resnet152.evaluate_generator(
    generator = test_generator,
    steps = steps_per_epoch_test,
    verbose = 1,
)
print("test_loss: {}, test_accuracy: {}".format(test_loss, test_accuracy))