In [1]:
# Imports
import keras
from keras.preprocessing.image import ImageDataGenerator
import numpy as np, pandas as pd
import matplotlib.pyplot as plt
import os, glob, random, sys, math, cv2
from sklearn.model_selection import train_test_split
from skimage import transform
from tqdm import tqdm

# Model
from model import Model

%matplotlib inline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# Read image categories
ROOT_DIR = os.getcwd()
IMAGE_DIR = os.path.join(ROOT_DIR, 'images')
CATEGORIES = [folder for folder in os.listdir(IMAGE_DIR) 
               if os.path.isdir(os.path.join(IMAGE_DIR, folder))]
NUM_CLASSES = len(CATEGORIES)
print(CATEGORIES)

['dillenia_excelsa', 'aeschynanthus_parvifolius', 'ixora_congesta', 'plumeria', 'bougainvillea_glabra', 'hedychium', 'jasminum_sambac', 'lycoris', 'hibiscus_rosa_sinensis', 'papilionanthe_miss_joaqium']


In [10]:
IMAGE_DIM = (512, 512,)

# Read images and labels into arrays
images = []
labels = []
for label, category in enumerate(CATEGORIES):
    print("Processing {} images...".format(category))
    folder_path = os.path.join(IMAGE_DIR, category)
    # Read images in subfolder
    for image_file_path in tqdm(glob.glob(folder_path + "/*.jpg")):
        image_data = cv2.imread(image_file_path)
        if image_data is None:
            print("Faulty image {}; Ignoring...".format(image_file_path))
            continue
        # Resize to constant dimensions
        image_data = transform.resize(image_data, IMAGE_DIM)
        images.append(image_data)
        labels.append(label+1)

# Change labels to one-hot vector of length NUM_CLASSES instead of integers
labels = keras.utils.to_categorical(labels)

print("Read {} images and {} labels.".format(len(images), len(labels))) # Should be same number

  warn("The default mode, 'constant', will be changed to 'reflect' in "
  3%|▎         | 1/38 [00:00<00:04,  8.09it/s]

0
Processing dillenia_excelsa images...


100%|██████████| 38/38 [00:02<00:00, 13.81it/s]
  4%|▍         | 3/77 [00:00<00:03, 18.53it/s]

1
Processing aeschynanthus_parvifolius images...


100%|██████████| 77/77 [00:04<00:00, 18.50it/s]
  6%|▌         | 4/71 [00:00<00:02, 29.26it/s]

2
Processing ixora_congesta images...


100%|██████████| 71/71 [00:03<00:00, 19.70it/s]
  3%|▎         | 2/78 [00:00<00:03, 19.95it/s]

3
Processing plumeria images...


100%|██████████| 78/78 [00:05<00:00, 13.23it/s]
  2%|▏         | 2/86 [00:00<00:06, 13.82it/s]

4
Processing bougainvillea_glabra images...


100%|██████████| 86/86 [00:07<00:00, 10.98it/s]
  2%|▏         | 2/93 [00:00<00:07, 12.85it/s]

5
Processing hedychium images...


100%|██████████| 93/93 [00:08<00:00, 11.24it/s]
  0%|          | 0/81 [00:00<?, ?it/s]

6
Processing jasminum_sambac images...


100%|██████████| 81/81 [00:06<00:00, 13.06it/s]
  3%|▎         | 2/76 [00:00<00:03, 19.97it/s]

7
Processing lycoris images...


100%|██████████| 76/76 [00:08<00:00,  8.59it/s]
  3%|▎         | 3/86 [00:00<00:03, 25.47it/s]

8
Processing hibiscus_rosa_sinensis images...


100%|██████████| 86/86 [00:11<00:00,  7.39it/s]
  3%|▎         | 2/79 [00:00<00:04, 16.06it/s]

9
Processing papilionanthe_miss_joaqium images...


100%|██████████| 79/79 [00:04<00:00, 17.68it/s]

Read 765 images and 765 labels.





In [11]:
# Split into training and test sets
train_test_split_ratio = 0.9 # Proportion in train set
x_train, x_test, y_train, y_test = train_test_split(images, labels, train_size = train_test_split_ratio)
print("{} training images, {} testing images".format(len(x_train), len(x_test)))

[0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
688 training images, 77 testing images




In [None]:
# Save arrays to file
if not os.path.exists('data'):
    os.makedirs('data')
np.save(x_train, 'data/train_images')
np.save(y_train, 'data/train_labels')
np.save(x_test, 'data/test_images')
np.save(y_test, 'data/test_labels')

In [None]:
# Image data augmentation to increase effective dataset size
datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        rescale=1./255, # Rescale to range 0..1
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        vertical_flip=True,
        fill_mode='wrap' # I.e. tiling
)

In [None]:
# Model
model = keras.keras.applications.densenet.DenseNet121(include_top=True, weights='imagenet', input_shape=IMAGE_DIM+(3,), pooling=None, classes=1000)
model_checkpoint = keras.ModelCheckpoint('weights.{epoch:02d}.h5', verbose=1)

# Possibly load trained weights here

In [None]:
# Train model

# Hyper parameters
BATCH_SIZE = 8
EPOCHS = 100
STEPS_PER_EPOCH = len(x_train) // BATCH_SIZE

model.fit_generator(
    datagen.flow(x_train, y_train, batch),
    steps_per_epoch = STEPS_PER_EPOCH,
    epochs = EPOCHS,
    callbacks = [model_checkpoint]
)