In [1]:
import os
import argparse
import numpy as np

from PIL import Image
import tensorflow as tf
import keras

from keras.applications import VGG16
from keras.models import Model
from keras.layers import Dense, GlobalMaxPooling2D
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
from imagenet_utils import decode_predictions
from imagenet_utils import preprocess_input

from tqdm import tqdm
import glob
import matplotlib.pyplot as plt

Using TensorFlow backend.


# Data Prep

### Using ImageDataGenerator.

Eventually want to use args and argparse to take in train_dir and test_dir.

In [2]:
train_dir = 'train_CAL/'
test_dir = 'test_CAL/'

In [8]:
IM_HEIGHT = 224
IM_WIDTH = 224
NB_EPOCHS = 1
BAT_SIZE = 16
FC_SIZE = 500 # May need to train this parameter
nb_classes = 1

In [4]:
def get_nb_files(directory):
    """Get number of files by searching directory recursively"""
    cnt = 0
    for r, dirs, files in os.walk(directory):
        for dr in dirs:
            cnt += len(glob.glob(os.path.join(r, dr + "/*")))
    return cnt

In [5]:
nb_train_samples = get_nb_files(train_dir)
nb_classes = len(glob.glob(train_dir + "/*"))
nb_val_samples = get_nb_files(test_dir)

# VGG Model

In [6]:
def add_new_last_layer(base_model, nb_classes):
    """Add last layer to the convnet
    Args:
    base_model: keras model excluding top
    nb_classes: # of classes
    Returns:
    new keras model with last layer
    """
    x = base_model.output
    x = GlobalMaxPooling2D()(x)
    x = Dense(FC_SIZE, activation='relu')(x)
    predictions = Dense(nb_classes, activation='sigmoid')(x) 
    model = Model(inputs=base_model.input, outputs=predictions)
    return model

In [7]:
def setup_to_transfer_learn(model, base_model):
    """Freeze all layers and compile the model"""
    for layer in base_model.layers:
        layer.trainable = False
        model.compile(optimizer='adam',    
                    loss='binary_crossentropy', 
                    metrics=['accuracy'])

In [None]:
# ignore this for now
def setup_to_finetune(model):
    """
    If we want to fine-tine
    """
    for layer in model.layers[:NB_IV3_LAYERS_TO_FREEZE]: # need to declare num layers to train
        layer.trainable = False
    for layer in model.layers[NB_IV3_LAYERS_TO_FREEZE:]:
        layer.trainable = True
    model.compile(optimizer=keras.optimizers.Adam(lr=0.0001, momentum=0.9),   
                 loss='binary_crossentropy')

Need to tune:
* architecture type (ResNet, Inception, VGG19)
* FC_SIZE
* Adam parameters (momentum, learning rate, etc)
* image data generator transforming parameters

In [10]:
def vgg_train(train_dir, val_dir):
    
    #train_datagen =  ImageDataGenerator(shear_range=0.2, zoom_range=0.2, horizontal_flip=True, rescale=1. / 255)
    train_datagen =  ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True)
    
    test_datagen = ImageDataGenerator(
        rotation_range=30,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        vertical_flip=True
    )

    train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=(IM_WIDTH, IM_HEIGHT),
        batch_size=BAT_SIZE,
        class_mode="binary"
    )
    validation_generator = test_datagen.flow_from_directory(
        test_dir,
        target_size=(IM_WIDTH, IM_HEIGHT),
        batch_size=BAT_SIZE,
        class_mode="binary"
    )

    base_model = VGG16(weights='imagenet', include_top=False)
    model = add_new_last_layer(base_model, nb_classes)

    setup_to_transfer_learn(model, base_model)

    history = model.fit_generator(train_generator, epochs = NB_EPOCHS, steps_per_epoch = nb_train_samples, \
                                     validation_data=validation_generator, validation_steps = nb_val_samples)
    model.save("vgg_train.model")
    
    return history

h = vgg_train(train_dir, test_dir)

Found 720 images belonging to 2 classes.
Found 140 images belonging to 2 classes.
Epoch 1/1
 51/720 [=>............................] - ETA: 14913s - loss: 5.3019 - acc: 0.437 - ETA: 13803s - loss: 3.6583 - acc: 0.656 - ETA: 13270s - loss: 3.7821 - acc: 0.687 - ETA: 13093s - loss: 3.5921 - acc: 0.718 - ETA: 13195s - loss: 4.0825 - acc: 0.700 - ETA: 13113s - loss: 4.5774 - acc: 0.677 - ETA: 13125s - loss: 4.2113 - acc: 0.705 - ETA: 13093s - loss: 4.4404 - acc: 0.695 - ETA: 13044s - loss: 4.3948 - acc: 0.701 - ETA: 13042s - loss: 4.2575 - acc: 0.712 - ETA: 13137s - loss: 4.3284 - acc: 0.710 - ETA: 13146s - loss: 4.2195 - acc: 0.718 - ETA: 13101s - loss: 4.2049 - acc: 0.721 - ETA: 13045s - loss: 4.3363 - acc: 0.714 - ETA: 12990s - loss: 4.1143 - acc: 0.729 - ETA: 12941s - loss: 4.2350 - acc: 0.722 - ETA: 12864s - loss: 4.2821 - acc: 0.720 - ETA: 12797s - loss: 4.1562 - acc: 0.729 - ETA: 12844s - loss: 4.1495 - acc: 0.730 - ETA: 12767s - loss: 4.1435 - acc: 0.731 - ETA: 12694s - loss: 4.138

KeyboardInterrupt: 

In [None]:
def predict(test_dir):
    from keras.models import load_model
    test_datagen = ImageDataGenerator(vertical_flip=True)
    
    test_generator = test_datagen.flow_from_directory(
        test_dir,
        target_size=(IM_WIDTH, IM_HEIGHT),
        batch_size=BAT_SIZE,
        class_mode="binary"
    )
    
    model = load_model('vgg_train.model')
    pred = model.predict_generator(self, generator, steps, max_queue_size=10, workers=1, use_multiprocessing=False, verbose=0)
    
    return pred

In [70]:
# ignore
#preds = [model.predict(image) for image in tqdm(train_data2)]
#results = [decode_predictions(pred)[0] for pred in preds]
#results