In [14]:
import tensorflow as tf
tf.config.set_visible_devices([], 'GPU')
from tensorflow.keras.preprocessing.image import (load_img,
                                                  img_to_array,
                                                  ImageDataGenerator)

from tensorflow.keras.applications.vgg16 import (preprocess_input,
                                                 decode_predictions,
                                                 VGG16)


from tensorflow.keras.layers import (Flatten, 
                                     Dense, 
                                     Dropout, 
                                     BatchNormalization)

from tensorflow.keras.models import Model

from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras.optimizers import SGD, Adam

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import classification_report

from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import os

Import image data, create lists of images and labels

In [2]:
path = os.path.join("..",
                    "in",
                    "Tobacco3482")

In [3]:
data = []
labels = []

In [6]:
def process_images():
    i = 0
    for folder in sorted(os.listdir(path)):
        label_path = os.path.join(path, folder)
        for img in os.listdir(label_path):
            if not img.endswith('.db'):
                image = load_img(os.path.join(label_path, img), target_size=(224, 224))
                image = img_to_array(image)
                image = preprocess_input(image)
                data.append(image)
                labels.append(i)
            else:
                continue
        i += 1
    return np.array(data), np.array(labels)

In [7]:
data, labels = process_images()

Split train/test data 80/20, binarize labels (and create list of label strings for later)

In [8]:
(X_train, X_test, y_train, y_test) = train_test_split(data,
                                                    labels, 
                                                    test_size=0.2)

In [9]:
# create one-hot encodings
lb = LabelBinarizer()
y_train = lb.fit_transform(y_train)
y_test = lb.fit_transform(y_test)

In [10]:
labelNames = []
for label in sorted(os.listdir(path)):
    labelNames.append(label)

Load VGG16 model; freeze convolutional layers; remove existing classification layers and add new

In [11]:
tf.keras.backend.clear_session()

In [22]:
# load model without classifier layers
model = VGG16(include_top=False, 
              pooling='avg',
              input_shape=(224, 224, 3))

# mark loaded layers as not trainable
for layer in model.layers:
    layer.trainable = False

# add new classifier layers
flat1 = Flatten()(model.layers[-1].output)
class1 = Dense(128, 
               activation='relu')(flat1)
output = Dense(10, 
               activation='softmax')(class1)

# define new model
model = Model(inputs=model.inputs, 
              outputs=output)

# Compile
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.01,
    decay_steps=10000,
    decay_rate=0.9)
sgd = SGD(learning_rate=lr_schedule)
model.compile(optimizer=sgd,
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# summarize
model.summary()

Data augmentation

In [23]:
# flip along x axis (mirror image)
datagen = ImageDataGenerator(horizontal_flip=True, 
                             rotation_range=20,
                             validation_split=0.1)

Train

In [20]:
H = model.fit(X_train, y_train, 
            validation_split=0.1,
            batch_size=128,
            epochs=10,
            verbose=1)

Epoch 1/10


ValueError: Input 0 of layer "functional_9" is incompatible with the layer: expected shape=(None, 32, 32, 3), found shape=(None, 224, 224, 3)

In [24]:
# fit the data generator to our images
datagen.fit(X_train)
# fits the model on batches with real-time data augmentation:
H = model.fit(datagen.flow(X_train, y_train, 
                           batch_size=128), 
              validation_data = datagen.flow(X_train, y_train, 
                                             batch_size=128, 
                                             subset = "validation"),
              epochs=5)

Epoch 1/5


  self._warn_if_super_not_called()


[1m 1/22[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m16:43[0m 48s/step - accuracy: 0.0859 - loss: 8.7412

KeyboardInterrupt: 

Evaluate


In [None]:
plot_history(H, 5)

In [None]:
predictions = model.predict(X_test, batch_size=128)
print(classification_report(y_test.argmax(axis=1),
                            predictions.argmax(axis=1),
                            target_names=labelNames))