In [4]:
import argparse
import os

import numpy as np
import pandas as pd

# tf tools
import tensorflow as tf

# scikit-learn
from sklearn.metrics import classification_report

# VGG16 model
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input

# layers
from tensorflow.keras.layers import BatchNormalization, Dense, Dropout, Flatten

# generic model object
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD

# optimizers
from tensorflow.keras.optimizers.schedules import ExponentialDecay

# image processsing
from tensorflow.keras.preprocessing.image import (
    ImageDataGenerator,
    img_to_array,
    load_img,
)

In [5]:
datagenerator = ImageDataGenerator(
    preprocessing_function=preprocess_input, validation_split=0.2
)

datagenerator_simple_aug = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    horizontal_flip=True,
    rotation_range=20,
    validation_split=0.2,
)

datagenerator_very_aug = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    horizontal_flip=True,
    rotation_range=20,
    zoom_range=[0, 1.25],
    width_shift_range=0.2,
    height_shift_range=0.2,
    validation_split=0.2,
)

In [13]:
gen = datagenerator_very_aug.flow_from_directory(
                directory= os.path.join("Warp-C", "train"),
                target_size = (224, 224),
                color_mode = 'rgb',
                shuffle = False, 
                save_to_dir = 'augmented_images',
                save_prefix = 'augmented',
                subset = 'training')

Found 7060 images belonging to 4 classes.


array([0, 0, 0, ..., 3, 3, 3], dtype=int32)

In [14]:
from sklearn.utils import class_weight
def create_class_weights(traingenerator):
    '''

    '''

    # reset the traingenerator to get classes in the right order

    class_weights = class_weight.compute_class_weight(class_weight = 'balanced',
                                                 classes = np.unique(traingenerator.classes),
                                                 y = traingenerator.classes)

    class_weights = dict(zip(np.unique(traingenerator.classes), class_weights))

    return class_weights

In [15]:
weights = create_class_weights(gen)
print(weights)

{0: 3.394230769230769, 1: 4.9164345403899725, 2: 3.922222222222222, 3: 0.3079741755365556}


In [16]:
np.unique(gen.classes)

array([0, 1, 2, 3], dtype=int32)

In [28]:
def build_model():
    """
    Build a convolutional neural network using the pretrained VGG16 model as feature extractor. Model has two classification layers and a final output layer.
    Code is adapted from the Session 9 notebook of the Visual Analytics course at AU, 2023.

    Returns:
    A compiled model that can be fit and used for a classification task.

    """

    # load model without classifier layers
    model = VGG16(include_top=False, pooling="avg", input_shape=(224, 224, 3))

    # mark loaded layers as not trainable
    for layer in model.layers:
        layer.trainable = False

    # add new classifier layers
    flat1 = Flatten()(model.layers[-1].output)
    bn = BatchNormalization()(flat1)
    class1 = Dense(256, activation="relu")(bn)
    class2 = Dense(128, activation="relu")(class1)
    output = Dense(4, activation="softmax")(class2)

    # define new model
    model = Model(inputs=model.inputs, outputs=output)

    # compile
    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=0.01, decay_steps=10000, decay_rate=0.9
    )
    sgd = SGD(learning_rate=lr_schedule)

    model.compile(optimizer=sgd, loss="categorical_crossentropy", metrics=["accuracy"])

    return model

In [29]:
test_datagenerator = ImageDataGenerator(preprocessing_function = preprocess_input)

test_gen = test_datagenerator.flow_from_directory(        
                directory= os.path.join("Warp-C", "test"),
                target_size = (224, 224),
                color_mode = 'rgb',
                shuffle = False) 

Found 1521 images belonging to 4 classes.


{'cardboard': 0, 'glass': 1, 'metal': 2, 'plastic': 3}

In [31]:
dir(test_gen)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__next__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_filepaths',
 '_flow_index',
 '_get_batches_of_transformed_samples',
 '_keras_api_names',
 '_keras_api_names_v1',
 '_set_index_array',
 'allowed_class_modes',
 'batch_index',
 'batch_size',
 'class_indices',
 'class_mode',
 'classes',
 'color_mode',
 'data_format',
 'directory',
 'dtype',
 'filenames',
 'filepaths',
 'image_data_generator',
 'image_shape',
 'index_array',
 'index_generator',
 'interpolation',
 'keep_aspect_ratio',
 'labels',
 'lock',
 'n',
 'next',
 'num_classes',
 'on_epoch_end',
 'reset',
 'sample_weight',
 'samples',
 'save_format',
 'save_prefix',

In [45]:
test_gen.filepaths

['Warp-C/test/cardboard/test_crops_cardboard_juice-cardboard_Monitoring_photo_2_test_25-Mar_11-28-21_01.jpg',
 'Warp-C/test/cardboard/test_crops_cardboard_juice-cardboard_Monitoring_photo_2_test_25-Mar_11-39-30_01.jpg',
 'Warp-C/test/cardboard/test_crops_cardboard_juice-cardboard_Monitoring_photo_2_test_25-Mar_11-45-00_01.jpg',
 'Warp-C/test/cardboard/test_crops_cardboard_juice-cardboard_Monitoring_photo_2_test_25-Mar_12-06-41_01.jpg',
 'Warp-C/test/cardboard/test_crops_cardboard_juice-cardboard_Monitoring_photo_2_test_25-Mar_12-07-45_01.jpg',
 'Warp-C/test/cardboard/test_crops_cardboard_juice-cardboard_Monitoring_photo_2_test_25-Mar_12-09-18_01.jpg',
 'Warp-C/test/cardboard/test_crops_cardboard_juice-cardboard_Monitoring_photo_2_test_25-Mar_12-13-05_01.jpg',
 'Warp-C/test/cardboard/test_crops_cardboard_juice-cardboard_Monitoring_photo_2_test_25-Mar_12-27-49_01.jpg',
 'Warp-C/test/cardboard/test_crops_cardboard_juice-cardboard_Monitoring_photo_2_test_25-Mar_12-27-49_02.jpg',
 'Warp-C/t

In [35]:
    datagenerator_very_aug = ImageDataGenerator(
        preprocessing_function=preprocess_input,
        horizontal_flip=True,
        rotation_range=20,
        zoom_range=[0, 1.25],
        width_shift_range=0.2,
        height_shift_range=0.2,
        validation_split=0.2,
)
    
    train_gen = datagenerator_very_aug.flow_from_directory(
                directory= os.path.join("Warp-C", "train"),
                target_size = (224, 224),
                color_mode = 'rgb',
                classes = ['cardboard', 'glass', 'metal', 'plastic'],
                shuffle = True, 
                save_to_dir = 'augmented_images',
                save_prefix = 'augmented',
                subset = 'training')

    val_gen = datagenerator_very_aug.flow_from_directory(
                directory= os.path.join("Warp-C", "train"),
                target_size = (224, 224),
                color_mode = 'rgb',
                shuffle = True, 
                save_to_dir = 'augmented_images',
                save_prefix = 'augmented',
                subset = 'validation')

Found 7060 images belonging to 4 classes.
Found 1763 images belonging to 4 classes.


In [74]:
test_gen.reset()

In [75]:
np.unique(test_gen.classes, return_counts = True)

(array([0, 1, 2, 3], dtype=int32), array([ 162,   86,   98, 1175]))

In [81]:
test_card = os.listdir(os.path.join("Warp-C", "test", "cardboard"))

In [80]:
files = test_gen.filenames[0:162]

len(files)

162

False

['test_crops_cardboard_milk-cardboard_Monitoring_photo_test_25-Mar_11-13-24_01.jpg',

'cardboard/test_crops_cardboard_juice-cardboard_Monitoring_photo_2_test_25-Mar_11-28-21_01.jpg'

In [79]:
train_gen.filenames[0]

'cardboard/train_crops_cardboard_juice-cardboard_POSAD_1_13-Sep_05-38-54_02.jpg'

train_crops_cardboard_juice-cardboard_Monitoring_photo_04-Mar_04-26-41_01.jpg

In [89]:
test_card = sorted(os.listdir(os.path.join("Warp-C", "test", "cardboard")))

In [104]:
test_card[0]

'.ipynb_checkpoints'

In [92]:
test_gen.reset()

In [102]:
test_gen.filenames[100]

'cardboard/test_crops_cardboard_milk-cardboard_Monitoring_photo_test_25-Mar_11-16-54_01.jpg'

In [97]:
test_gen.filenames[0] == test_card[1]

False

In [95]:
type(test_gen.filenames[0])

str

In [101]:
test_card[101]

'test_crops_cardboard_milk-cardboard_Monitoring_photo_test_25-Mar_11-16-54_01.jpg'