In [None]:
import numpy as np
import pandas as pd
import os
import csv
import cv2
import random
import tarfile
import multiprocessing as mp
import seaborn as sns
import tqdm
import requests
import sklearn.model_selection as skms
import torch
import torch.utils.data as td
import torch.nn.functional as F
import torchvision as tv
import torchvision.transforms.functional as TF
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from pandas import DataFrame, Series
from PIL import Image
import imageio
#Import data from drive
from google.colab import drive
from sklearn.model_selection import train_test_split
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Conv2D,MaxPooling2D,Dense,Flatten,Dropout
import numpy as np
import pandas as pd 
import os
#from keras.layers.normalization import BatchNormalization
#from tensorflow.keras.layers import BatchNormalization

# define constants
#DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
#OUT_DIR = 'results'
#RANDOM_SEED = 42

In [None]:
import os
data = os.listdir('/content/drive/MyDrive/Project/Cropped/')
print (data)
print("Number of Dog Breeds in Dataset is :", (len(data)))


**There are total 120 folders, each belonging to 1 of the 120 dog breeds**

In [None]:
# Get the dog labels
breeds = [breed.split('-',1)[1] for breed in data] # get labels by splitting the folder name at dash
breeds[:15] # view some of the labels

**Then, for each of the images, we get the full path to the image (stored in X), as well as its associated label/class/breed (stored in y). This allows us to load the images easily.**

In [None]:
from itertools import chain

X = []

y = []

fullpaths = ['/content/drive/MyDrive/Project/Cropped/{}'.format(dog_class) for dog_class in data]

for counter, fullpath in enumerate(fullpaths):
    for imgname in os.listdir(fullpath):
        X.append([fullpath + '/' + imgname])
        y.append(breeds[counter])
        
X = list(chain.from_iterable(X)) # unnest the lists and join together into one list

len(X) # number of pictures

In [None]:
# Random shuffle the images for learning
import random
# shuffle X and y
combined = list(zip(X, y))
random.shuffle(combined)
X[:], y[:] = zip(*combined)

In [None]:
# Choose a subset to test code
X = X[:4500]
y = y[:4500]

In [None]:
# Convert labels to one-hot encoded labels
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
# Label and one-hot encoding y labels
le = LabelEncoder()
le.fit(y)
y_ohe = to_categorical(le.transform(y), len(breeds))
y_ohe = np.array(y_ohe)

In [None]:
from sklearn.model_selection import train_test_split

from keras.preprocessing.image import img_to_array, load_img

img_data = np.array([img_to_array(load_img(img, target_size = (224,224)))
                     for img in X]) # load, resize images, and store as array

x_train, x_test, y_train, y_test = train_test_split(img_data, y_ohe,
                                                   test_size = 0.2,
                                                   stratify=np.array(y), # stratify makes sure that proportion of each class in the output is same as the input
                                                   random_state = 2) 

x_train, x_val, y_train, y_val = train_test_split(x_train, y_train,
                                                 test_size = 0.2,
                                                 stratify=np.array(y_train),
                                                 random_state = 2)
print('Training Dataset Size: ', x_train.shape)
print('Validation Dataset Size: ', x_val.shape)
print('Testing Dataset Size: ', x_test.shape)
print('Training Label Size: ', y_train.shape)
print('Validation Label Size: ', y_val.shape)
print('Testing Label Size: ', y_test.shape)

# clear some space from memory
import gc
del img_data
gc.collect()

In [None]:
# Data Augmentation

from keras.applications.inception_v3 import preprocess_input
from keras.preprocessing.image import ImageDataGenerator

batch_size = 64

# Create train generator
train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input, # only use rescale=1./255 if training from scratch
                                  rotation_range = 30,
                                  width_shift_range = 0.2,
                                  height_shift_range = 0.2,
                                  horizontal_flip = True) # CHECK

train_generator = train_datagen.flow(x_train, y_train,
                                     shuffle = False, batch_size = batch_size, seed = 1)

# Create validation generator
val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input) # do not augment validation data

val_generator = val_datagen.flow(x_val, y_val,
                                shuffle = False, batch_size = batch_size, seed = 1)

In [None]:
img_id = 16

dog_generator = train_datagen.flow(x_train[img_id:img_id+1], y_train[img_id:img_id+1],
                                     shuffle = False, batch_size = batch_size, seed = 1)

plt.figure(figsize=(30,20))
dogs = [next(dog_generator) for i in range(0,5)]
for counter, dog in enumerate(dogs): 
    plt.subplot(1, 5, counter+1)
    plt.imshow(dog[0][0])
    #plt.axis('off')
    
plt.show()

In [None]:
# Build Model Using Pre-trained Model

from keras import models
from keras import layers
from tensorflow.keras.optimizers import Adam
from keras.layers import GlobalAveragePooling2D, Dense, Flatten, Dropout
from keras.applications.inception_v3 import InceptionV3
from keras.utils.np_utils import to_categorical
from keras.utils.vis_utils import plot_model

# load InceptionV3 pre-trained model
base_model = InceptionV3(weights = 'imagenet', include_top = False, input_shape = (224,224,3))

model = models.Sequential()
model.add(base_model) # add pre_trained layers
model.add(GlobalAveragePooling2D())
#model.add(Flatten()) # flatten to 1-D vector to prepare for fully connected layers
model.add(Dropout(0.3))
model.add(Dense(512, activation = 'relu'))
model.add(Dense(512, activation = 'relu'))
model.add(Dense(len(breeds), activation = 'softmax'))


# Freeze pre-trained layers
print('Number of trainable weights before freezing the base layer:', len(model.trainable_weights))
model.layers[0].trainable = False
print('Number of trainable weights after freezing the base layer:', len(model.trainable_weights))


In [None]:
# Compile the Model

model.compile(Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy']) 
model.summary()

In [None]:
plot_model(model, to_file='model.png', show_shapes=True, show_layer_names=True)

In [None]:
# Train Model

train_steps_per_epoch = x_train.shape[0] // batch_size

val_steps_per_epoch = x_val.shape[0] // batch_size

epochs = 20

history = model.fit_generator(train_generator,
                             steps_per_epoch = train_steps_per_epoch,
                             validation_data = val_generator,
                             validation_steps = val_steps_per_epoch,
                             epochs = epochs, verbose = 1)

In [None]:
# Plot Accuracy and Loss 
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
t = f.suptitle('Transfer Learning Performance', fontsize=12)
f.subplots_adjust(top=0.85, wspace=0.3)

epoch_list = list(range(1,epochs+1))
ax1.plot(epoch_list, history.history['accuracy'], label='Train Accuracy')
ax1.plot(epoch_list, history.history['val_accuracy'], label='Validation Accuracy')
ax1.set_xticks(np.arange(0, epochs+1, 5))
ax1.set_ylabel('Accuracy Value')
ax1.set_xlabel('Epoch')
ax1.set_title('Accuracy')
l1 = ax1.legend(loc="best")

ax2.plot(epoch_list, history.history['loss'], label='Train Loss')
ax2.plot(epoch_list, history.history['val_loss'], label='Validation Loss')
ax2.set_xticks(np.arange(0, epochs+1, 5))
ax2.set_ylabel('Loss Value')
ax2.set_xlabel('Epoch')
ax2.set_title('Loss')
l2 = ax2.legend(loc="best")