In [1]:
import numpy as np
import matplotlib.pyplot as plt
import re

# used to look at images
from skimage import io, color, filters, feature
from skimage.transform import resize, rotate
from PIL import Image, ImageOps

# to read in the .mat files
import scipy.io as sio 


# tensorflow - for CNN
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import categorical_crossentropy

# used to import the image folders
from tensorflow.keras.preprocessing import image, image_dataset_from_directory

from tensorflow.keras.models import Model
from keras.layers.convolutional import *
from keras.layers.normalization import BatchNormalization

from keras.applications.imagenet_utils import preprocess_input, decode_predictions

### Used the train_list.mat file to create dictionary of label names and class names

In [2]:
train_dict = sio.loadmat('extras/lists/train_list.mat')

In [3]:
# use regex to create list of breed names from train_dict
breeds = []
for i in np.unique(train_dict['labels']):
    # get one file - first 0 is just first file with the label
    # second 0 is just the file path
    file = train_dict['file_list'][train_dict['labels'] == i][0][0]
    # regex pattern
    # n[0-9]+- denotes a pattern that starts with n, has the group 0-9 an unknown amount of times and a hyphen
    # OR a pattern that starts with non-whitespace (\W for the forward slash), n[0-9]+, an underscore
    # [0-9] some amount of times and ends with .jpg
    pattern = re.compile(r'n[0-9]+-|\Wn[0-9]+_[0-9]+.jpg')
    # replace this pattern for each file with an empty string and append to breeds list
    breeds.append(pattern.sub('', file))

In [4]:
# create empty dictionary for labels
breed_key = dict()
for i in np.unique(train_dict['labels']):
    # labels start at 1, so to match python idx, subtract 1 from breeds
    breed_key[i] = breeds[i-1]
    
# breed_key

### Load in the images and place them into the appropriate batches

In [5]:
len(breeds)

120

In [6]:
train_path1 = 'extras/Images/train1'
train_path2 = 'extras/Images/train2'
train_path3 = 'extras/Images/train3'
train_path4 = 'extras/Images/train4'
train_path5 = 'extras/Images/train5'

valid_path = 'extras/Images/valid'
test_path = 'extras/Images/test'

In [14]:
# this is the augementation used for training, testing, and validation data
datagen = image.ImageDataGenerator()

train_batch1 = datagen.flow_from_directory(train_path1, target_size = (224, 224),
                                          classes = breeds, batch_size = 5)
train_batch2 = datagen.flow_from_directory(train_path2, target_size = (224, 224),
                                          classes = breeds, batch_size = 5)
train_batch3 = datagen.flow_from_directory(train_path3, target_size = (224, 224),
                                          classes = breeds, batch_size = 5)
train_batch4 = datagen.flow_from_directory(train_path4, target_size = (224, 224),
                                          classes = breeds, batch_size = 5)
train_batch5 = datagen.flow_from_directory(train_path5, target_size = (224, 224),
                                          classes = breeds, batch_size = 5)


valid_batch = datagen.flow_from_directory(valid_path, target_size = (224, 224),
                                         classes = breeds, batch_size = 5)
test_batch = datagen.flow_from_directory(test_path, target_size = (224, 224),
                                        classes = breeds, batch_size = 5)

Found 2469 images belonging to 120 classes.
Found 2469 images belonging to 120 classes.
Found 2469 images belonging to 120 classes.
Found 2469 images belonging to 120 classes.
Found 2469 images belonging to 120 classes.
Found 4113 images belonging to 120 classes.
Found 4122 images belonging to 120 classes.


## Creating VGG16 model

In [8]:
# transfer learning model
vgg16_model = tf.keras.applications.vgg16.VGG16()

In [9]:
# Make Sequential Model out of the base VGG16 Model
analyzer_model = Sequential()

for layer in vgg16_model.layers[:-1]:
    analyzer_model.add(layer)
    
# Sequential Model almost the same as VGG16 model, but no input or predictions layer

In [10]:
# freeze layers that have already been trained
for layer in analyzer_model.layers[:-1]:
    layer.trainable = False

In [11]:
# add final prediction layer (dense) to predict the 120 classes
analyzer_model.add(Dense(120, activation = 'relu'))

In [12]:
analyzer_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 56, 56, 256)       2

To do:
- Resize images to be 244 x 244
- Add predictions layer

In [13]:
# compile model
optimizer = Adam(lr = 0.0001)
analyzer_model.compile(optimizer = optimizer,
                      loss = categorical_crossentropy,
                      metrics = ['accuracy'])

In [None]:
# batch_size = 32 is good starting point
# steps_per_epoch = training_size / batch_size
analyzer_model.fit(train_batch1, batch_size = 32, epochs = 8)

In [None]:
analyzer_model.fit(train_batch2, batch_size = 32, epochs = 8)

In [None]:
analyzer_model.fit(train_batch3, batch_size = 32, epochs = 8)

In [None]:
analyzer_model.fit(train_batch4, batch_size = 32, epochs = 8)

In [None]:
analyzer_model.fit(train_batch5, validation_batch = valid_batch, batch_size = 32, epochs = 8)