In [1]:
# Imports
import keras
from keras.preprocessing.image import ImageDataGenerator
import numpy as np, pandas as pd
import matplotlib.pyplot as plt
import os, glob, random, sys, math, cv2
from sklearn.model_selection import train_test_split
from skimage import transform
from tqdm import tqdm

# Model
from model import Model

%matplotlib inline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [4]:
# Read image categories
ROOT_DIR = os.getcwd()
IMAGE_DIR = os.path.join(ROOT_DIR, 'images')
CATEGORIES = [folder for folder in os.listdir(IMAGE_DIR) 
               if os.path.isdir(os.path.join(IMAGE_DIR, folder))]
CATEGORIES.sort() # Alphabetical order
NUM_CLASSES = len(CATEGORIES)
print(CATEGORIES)

['aeschynanthus_parvifolius', 'bougainvillea_glabra', 'costus_woodsonii_maas', 'dillenia_excelsa', 'extra_dillenia_excelsa', 'extra_dillenia_indica', 'hedychium', 'hibiscus_rosa_sinensis', 'ixora_congesta', 'jasminum_sambac', 'lycoris', 'papilionanthe_miss_joaqium', 'plumeria']


In [5]:
IMAGE_DIM = (512, 512,)

# Read images and labels into arrays
images = []
labels = []
for label, category in enumerate(CATEGORIES):
    print("Processing {} images...".format(category))
    folder_path = os.path.join(IMAGE_DIR, category)
    # Read images in subfolder
    for image_file_path in tqdm(glob.glob(folder_path + "/*.jpg")):
        image_data = cv2.imread(image_file_path)
        if image_data is None:
            print("Faulty image {}; Ignoring...".format(image_file_path))
            continue
        # Resize to constant dimensions
        image_data = transform.resize(image_data, IMAGE_DIM)
        images.append(image_data)
        labels.append(label+1)

# Change labels to one-hot vector of length NUM_CLASSES instead of integers
labels = keras.utils.to_categorical(labels)

print("Read {} images and {} labels.".format(len(images), len(labels))) # Should be same number

  warn("The default mode, 'constant', will be changed to 'reflect' in "
  2%|▏         | 2/100 [00:00<00:05, 18.41it/s]

Processing aeschynanthus_parvifolius images...


100%|██████████| 100/100 [00:05<00:00, 19.98it/s]
  3%|▎         | 3/100 [00:00<00:06, 15.32it/s]

Processing bougainvillea_glabra images...


100%|██████████| 100/100 [00:08<00:00, 11.65it/s]
  0%|          | 0/82 [00:00<?, ?it/s]

Processing costus_woodsonii_maas images...


100%|██████████| 82/82 [00:12<00:00,  6.57it/s]
  5%|▌         | 2/39 [00:00<00:02, 16.42it/s]

Processing dillenia_excelsa images...


100%|██████████| 39/39 [00:02<00:00, 15.26it/s]
  0%|          | 0/43 [00:00<?, ?it/s]

Processing extra_dillenia_excelsa images...


100%|██████████| 43/43 [00:04<00:00, 10.55it/s]
  0%|          | 0/48 [00:00<?, ?it/s]

Processing extra_dillenia_indica images...


100%|██████████| 48/48 [00:06<00:00,  7.75it/s]
  3%|▎         | 3/99 [00:00<00:05, 17.46it/s]

Processing hedychium images...


100%|██████████| 99/99 [00:08<00:00, 11.21it/s]
  2%|▏         | 2/98 [00:00<00:04, 19.92it/s]

Processing hibiscus_rosa_sinensis images...


100%|██████████| 98/98 [00:12<00:00,  8.00it/s]
  4%|▍         | 3/71 [00:00<00:02, 29.42it/s]

Processing ixora_congesta images...


100%|██████████| 71/71 [00:03<00:00, 21.65it/s]
  0%|          | 0/99 [00:00<?, ?it/s]

Processing jasminum_sambac images...


100%|██████████| 99/99 [00:08<00:00, 11.95it/s]
  3%|▎         | 2/76 [00:00<00:04, 17.44it/s]

Processing lycoris images...


100%|██████████| 76/76 [00:09<00:00,  7.98it/s]
  2%|▏         | 2/99 [00:00<00:04, 19.94it/s]

Processing papilionanthe_miss_joaqium images...


 22%|██▏       | 22/99 [00:01<00:03, 21.39it/s]

Faulty image /Users/qinghao1/Documents/CS/ML/Projects/cs3244/images/papilionanthe_miss_joaqium/170c8f5dd143438a8e291505b50cbd67.jpg; Ignoring...


100%|██████████| 99/99 [00:05<00:00, 16.65it/s]
  3%|▎         | 2/78 [00:00<00:04, 18.49it/s]

Processing plumeria images...


100%|██████████| 78/78 [00:06<00:00, 12.90it/s]

Read 1031 images and 1031 labels.





In [10]:
# Split into training and test sets
train_test_split_ratio = 0.9 # Proportion in train set
x_train, x_test, y_train, y_test = train_test_split(images, labels, train_size = train_test_split_ratio)
print("{} training images, {} testing images".format(len(x_train), len(x_test)))

# Convert to numpy arrays
x_train, x_test, y_train, y_test = np.array(x_train), np.array(x_test), np.array(y_train), np.array(y_test)



927 training images, 104 testing images


In [12]:
# Save arrays to file
if not os.path.exists('data'):
    os.makedirs('data')
np.save('data/train_images', x_train)
np.save('data/train_labels', y_train)
np.save('data/test_images', x_test)
np.save('data/test_labels', y_test)

In [13]:
# Image data augmentation to increase effective dataset size
datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        rescale=1./255, # Rescale to range 0..1
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        vertical_flip=True,
        fill_mode='wrap' # I.e. tiling
)

In [25]:
# Model
base_model = keras.applications.densenet.DenseNet121(include_top=True, weights=None, input_shape=IMAGE_DIM+(3,), classes=NUM_CLASSES)
# base_model.summary()
model = keras.models.Model(inputs=base_model.input, outputs=base_model.get_layer(name='fc1000').output)
model.compile(optimizer = keras.optimizers.Nadam(), loss='categorical_crossentropy', metrics=['accuracy'])

# Callback to save model weights
model_checkpoint = keras.callbacks.ModelCheckpoint('weights.{epoch:02d}.h5', verbose=1)

debugt


In [28]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_9 (InputLayer)            (None, 512, 512, 3)  0                                            
__________________________________________________________________________________________________
zero_padding2d_17 (ZeroPadding2 (None, 518, 518, 3)  0           input_9[0][0]                    
__________________________________________________________________________________________________
conv1/conv (Conv2D)             (None, 256, 256, 64) 9408        zero_padding2d_17[0][0]          
__________________________________________________________________________________________________
conv1/bn (BatchNormalization)   (None, 256, 256, 64) 256         conv1/conv[0][0]                 
__________________________________________________________________________________________________
conv1/relu

conv4_block16_1_conv (Conv2D)   (None, 32, 32, 128)  94208       conv4_block16_0_relu[0][0]       
__________________________________________________________________________________________________
conv4_block16_1_bn (BatchNormal (None, 32, 32, 128)  512         conv4_block16_1_conv[0][0]       
__________________________________________________________________________________________________
conv4_block16_1_relu (Activatio (None, 32, 32, 128)  0           conv4_block16_1_bn[0][0]         
__________________________________________________________________________________________________
conv4_block16_2_conv (Conv2D)   (None, 32, 32, 32)   36864       conv4_block16_1_relu[0][0]       
__________________________________________________________________________________________________
conv4_block16_concat (Concatena (None, 32, 32, 768)  0           conv4_block15_concat[0][0]       
                                                                 conv4_block16_2_conv[0][0]       
__________

In [26]:
# Train model

# Hyper parameters
BATCH_SIZE = 8
EPOCHS = 100
STEPS_PER_EPOCH = len(x_train) // BATCH_SIZE

model.fit_generator(
    datagen.flow(x_train, y_train, batch_size = BATCH_SIZE),
    steps_per_epoch = STEPS_PER_EPOCH,
    epochs = EPOCHS,
    callbacks = [model_checkpoint]
)

Epoch 1/100


ValueError: Error when checking target: expected fc1000 to have shape (13,) but got array with shape (14,)