In [1]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
import sklearn

from keras.preprocessing.image import ImageDataGenerator
from keras.applications.vgg19 import VGG19, preprocess_input
from keras import optimizers
from keras.models import Sequential, Model
from keras.layers import Dense, Flatten, Dropout, GlobalAveragePooling2D
from keras.preprocessing import image                  
from tqdm import tqdm
from sklearn.datasets import load_files
from keras.utils import np_utils
from glob import glob

%matplotlib inline

Using TensorFlow backend.


In [3]:
data_dir = '../flower_data/'
TRAIN, VAL, TEST = 'train', 'valid', 'test'

train_in_file = os.path.join(data_dir, TRAIN)
valid_in_file = os.path.join(data_dir, VAL)

input_size = (224, 224) # This size is dertermined by the size from VGG-19
channels = 3 # RGB
batch_size = 64
epochs = 10

num_train_samples = 4000
num_valid_samples= 40

In [5]:
# Data augmentation
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    horizontal_flip=True,
    rescale=0.5,
    rotation_range=25,
    zoom_range=0.4,
    width_shift_range=0.2,
    height_shift_range=0.2
)
valid_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    horizontal_flip=True,
    rescale=0.5,
    rotation_range=25,
    zoom_range=0.4,
    width_shift_range=0.2,
    height_shift_range=0.2
)

train_generator = train_datagen.flow_from_directory(train_in_file, target_size=input_size)
valid_generator = valid_datagen.flow_from_directory(valid_in_file, target_size=input_size)

Found 6552 images belonging to 102 classes.
Found 818 images belonging to 102 classes.


## Initiating model

In [None]:
vgg19 = VGG19(
    weights='imagenet',
    include_top=False,
    input_shape=(input_size[0], input_size[1], channels)
)
vgg19.summary()

In [None]:
# Since our data is similar to images in ImageNet database, and it
# can be considered as small (only few thousands of images in total),
# we can freeze the all the layers except the fully-connected (FC) layers
# since we can expect the higher-level featuers in the pre-trained model
# to be relevant to our data, so we only train them (the FC layers).

for layer in vgg19.layers:
    layer.trainable = False
    
# Now add some custom layers into our model
net = vgg19.output
net = Flatten()(net)
net = Dense(512, activation='relu')(net)
net = Dropout(0.2)(net)
net = Dense(256, activation='relu')(net)
net = Dense(train_generator.num_classes, activation='softmax')(net)

# Create our main model
predicted_model = Model(inputs=vgg19.input, outputs=net)
predicted_model.summary()

In [None]:
# Compile our model
predicted_model.compile(
    loss='categorical_crossentropy',
    optimizer=optimizers.Adam(0.001),
    metrics=['accuracy']
)

## Saving Model

In [None]:
from keras.callbacks import ModelCheckpoint, EarlyStopping

checkpoint = ModelCheckpoint(
    'vgg19.h5',
    monitor='val_loss',
    save_best_only=True,
    save_weights_only=False,
    mode='auto',
    period=10,
    verbose=2
)

early_stopping = EarlyStopping(
    monitor='val_loss', 
    mode='auto',
    min_delta=0.5, # Tuning this parameter for smallest amount to be `improvement`
    patience=1, # number of epochs with no improvement after which we stop
    verbose=2
)

## Training model

In [None]:
predicted_model.fit_generator(
    train_generator,
    samples_per_epoch=train_samples,
    epochs=epochs,
    validation_data=valid_generator,
    nb_val_samples=valid_samples,
    callbacks=[checkpoint, early_stopping]
)