<a href="https://colab.research.google.com/github/marwankashef/Deep_Learning_Logo_Recognition/blob/master/Logo_Recognition_Using_Deep_Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Check Core Library Versions

In [0]:
from keras import __version__
from tensorflow.keras import backend as K

print('Using Keras version:', __version__, 'backend:', K.backend())

In [0]:
from tensorflow import __version__
from tensorflow.keras import backend as K

print('Using Tensorflow version:', __version__, 'backend:', K.backend())

### New Library Installations

In [0]:
!pip install hyperas

In [0]:
!pip install h5py pyyaml

In [0]:
pip install split-folders tqdm

In [0]:
pip install python-resize-image

### Library Imports


In [0]:
# UX

import warnings
with warnings.catch_warnings():
    warnings.filterwarnings("ignore",category=DeprecationWarning)

from tqdm import tqdm

# Numpy
import numpy as np
from numpy import mean

# Pandas
import pandas as pd

# Matplotlib
import matplotlib.pyplot as plt

# Directory-based

from glob import glob
import os
import pathlib
import signal

# General Pythonic Operations

import IPython.display as display
from PIL import Image
from google.colab import files

import re
import json
import shutil
import math


# Sci-Kit Learn

from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import OneHotEncoder

# Tensorflow

import tensorflow as tf

from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras import optimizers
from tensorflow.keras import backend as K

from tensorflow.keras.preprocessing.image import ImageDataGenerator

from keras.models import Sequential
from keras.models import load_model
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras import backend as K

# Splitting Folders into Train/Test/Split

import split_folders

# Transfer Learning

from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications import VGG19
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications import Xception

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer

# Hyperas

from hyperas import optim
from hyperopt import Trials, STATUS_OK, tpe
from hyperas.distributions import choice, uniform
import h5py

# Google Colab-specific

from tensorboardcolab import *

# For Computing Image Similarity

from skimage.measure import compare_ssim as ssim 
import cv2
import random
from matplotlib.image import imread
from scipy.spatial import distance
from PIL import Image
from resizeimage import resizeimage
from tqdm import tnrange, tqdm_notebook

## **1. Import and Extract Data**

In [0]:
# Code used to import data sitting in Gdrive into the Colab session

from google.colab import drive

drive.mount('/content/drive',force_remount=True)

In [0]:
# Unzipping the full Logo Dataset from the Gdrive

!unzip -o "/content/drive/My Drive/Logo_Dataset.zip"

## **2. Explore the Image Dataset**

In [0]:
# Looking into the Logo Dataset to gauge whether the folders are coming in as expected

path_of_data = '/content/Logo_Dataset'
data_directory = pathlib.Path(path_of_data)
!ls {path_of_data}

In [0]:
# Restructuring the list above to better inspect

os.chdir(path_of_data)
logo_list = list(filter(lambda x: os.path.isdir(x), os.listdir()))
for logo in logo_list:
  print(logo)

In [0]:
# Looping between the various directories in Logo Dataset to gauge the total number of folders

folder_counter = 0
image_counter = 0

for folder in os.listdir(path_of_data):
  folder_counter = folder_counter + 1
  for image in os.listdir(path_of_data):
    image_counter = image_counter + 1

print("There are " + str(folder_counter) + " folders")

In [0]:
# Looping in each directory to gauge the total number of images in the entire dataset

data_dir = pathlib.Path(path_of_data)
total_image_count = len(list(data_dir.glob('*/*.jpg')))
print("There are",total_image_count,"total images in this folder")

In [0]:
# Looping in each directory and printing the number of files in the directory to gauge any corrupted or temp files that might be coming through

images_per_folder = 0  # total files

for dirpath, dirnames, filenames in os.walk(path_of_data):
    image_count = len(filenames)
    images_per_folder += image_count
    name_output = str(dirpath)
    name_output = name_output.split('/')[3:]
    print("Files in ", str(name_output), ":", image_count)

## **3. Create Training/Validation/Test Sets from the Data**

In [0]:
# Redirecting path to general folder space

os.chdir(r"/content/")

In [0]:
# Double-checking that you're in the right directory

os.getcwd()

In [0]:
# Split the image data into training, testing, and validation sets

output_dir = '/content/'

train_ratio = 0.6
validation_ratio = 0.2
test_ratio = 0.2

# To only split into training and validation set, set a tuple to 'ratio'
split_folders.ratio(path_of_data, output = output_dir, seed=1337, ratio=(train_ratio, validation_ratio, test_ratio)) # default values

## **4. Creating CNN Model From Scratch**

In [0]:
# Creating variables referring to the paths of the training, testing, and validation sets to be used by the ImageDataGenerator

train_dir = '/content/train'
validation_dir = '/content/val'
test_dir = '/content/test'

In [0]:
# Set Batch, Image Input, and Class Size

BATCH_SIZE = 32
IMG_HEIGHT = 256
IMG_WIDTH = 256
CHANNELS = 3
num_classes = 270

In [0]:
# Setting up ImageDataGenerator

image_generator = ImageDataGenerator(
    rescale=1./255,
    shear_range = 0.20, # displaces fixed points of images
    zoom_range=0.2, # zooming factor into images
    rotation_range=40,  # randomly rotate images in the range
    width_shift_range=0.2,  # randomly shifts images horizontally
    height_shift_range=0.2,  # randomly shifts images vertically
    horizontal_flip=True)  # randomly flips images

# Setting up generalized generator

test_datagen = ImageDataGenerator(rescale=1./255)

# Setup test generator

train_generator = image_generator.flow_from_directory(
      train_dir,
      target_size = (IMG_HEIGHT,IMG_WIDTH),
      shuffle = True,
      batch_size = BATCH_SIZE,
      color_mode="rgb",
      class_mode = 'categorical',
      seed=42
)

# Setup validation generator

validation_generator = test_datagen.flow_from_directory(
      validation_dir,
      target_size = (IMG_HEIGHT,IMG_WIDTH),
      shuffle = True,
      batch_size = BATCH_SIZE,
      color_mode="rgb",
      class_mode = 'categorical',
      seed=42
)

# Setup test generator

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    shuffle=False,
    batch_size=1,
    color_mode="rgb",
    class_mode=None,
    seed=42
)

### Model Architecture

In [0]:
model = models.Sequential()

model.add(layers.Conv2D(filters=32, kernel_size=(5,5), padding="same", activation='relu', input_shape=(IMG_HEIGHT,IMG_WIDTH,CHANNELS)))
model.add(layers.Conv2D(filters=32, kernel_size=(5,5),  padding="same", activation='relu'))
model.add(layers.Dropout(0.2))
model.add(layers.MaxPooling2D((2,2)))

model.add(layers.Conv2D(filters=32, kernel_size=(5,5), padding="same", activation='relu'))
model.add(layers.Conv2D(filters=32, kernel_size=(5,5), activation='relu'))
model.add(layers.Dropout(0.2))
model.add(layers.MaxPooling2D((2,2)))

model.add(layers.Conv2D(filters=64, kernel_size=(5,5), padding="same", activation='relu'))
model.add(layers.Conv2D(filters=64, kernel_size=(5,5), activation='relu'))
model.add(layers.Dropout(0.2))
model.add(layers.MaxPooling2D((2,2)))

model.add(layers.Flatten())

model.add(layers.Dense(512, activation='relu'))
#model.add(layers.Dropout(0.2)) 

model.add(layers.Dense(num_classes, activation='softmax'))

In [0]:
# Uncomment the following code if you'd like to load the weights of a prior run

#os.chdir('/content/drive/My Drive')

#model.load_weights("weights-improvement-58-0.44.hdf5")

In [0]:
# Preview what the architecture looks like

model.summary()

In [0]:
# Compiling the model above - must be run in order for training to commence

model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.RMSprop(lr=1e-4), 
              metrics=['acc'])

In [0]:
# Creating Folder to Store Checkpoints

model_path = './model'

!if [ -d $model_path ]; then echo 'Directory Exists'; else mkdir $model_path; fi

filepath = model_path + "/weights-improvement-{epoch:02d}-{val_acc:.2f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='auto', period=1)

In [0]:
# Early Stopping

early = EarlyStopping(monitor='val_acc', patience=15, verbose=1, mode='auto')


In [0]:
# Loading Early Stopping and Checkpoint into list for generator

callbacks_list = [early, checkpoint]

In [0]:
# Initiating Training Generator

n_epoch = 120
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=validation_generator.n//validation_generator.batch_size

history_object = model.fit_generator(
    generator=train_generator,
    steps_per_epoch=STEP_SIZE_TRAIN,
    validation_data=validation_generator,
    validation_steps=STEP_SIZE_VALID,
    callbacks=callbacks_list,
    epochs=n_epoch)


In [0]:
# Plot the training and validation accuracy

pd.DataFrame(history_object.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0,1)
plt.show()

In [0]:
# Will display performance on test-set, displaying the tuple [loss, acc]

model.evaluate_generator(validation_generator, steps=STEP_SIZE_VALID)

In [0]:
# Setup prediction generator

STEP_SIZE_TEST=test_generator.n//test_generator.batch_size

test_generator.reset()
predictions = model.predict_generator(test_generator,steps=STEP_SIZE_TEST, verbose=1)

In [0]:
predicted_class_indices=np.argmax(predictions,axis=1)

In [0]:
# Since the test generator outputs an array of probability, the following code splits these arrays to display what the network guessed vs ground truth

labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]

In [0]:
filenames=test_generator.filenames
results=pd.DataFrame({"Filename":filenames,
                      "Predictions":predictions})

In [0]:
# Separate DataFrame into Actual vs Predicted Columns for Comparison

results[['Actual','Filename']] = results.Filename.str.split("/",expand=True)

In [0]:
# Tag cases where columns are equal/non-equal (where the actual = predicted or actual =! predicted)

results['Correct Prediction?'] = np.where(results['Predictions'] == results['Actual'], 'True','False')

In [0]:
# Double check that DataFrame looks as expected

results
results.to_csv("results.csv",index=False)

In [0]:
# Convert Boolean Values to Numeric Total Counts

create_prediction_count = results['Correct Prediction?'].value_counts()

In [0]:
# Isolate the cases of correct predictions

correct_predictions = create_prediction_count['True']

In [0]:
test_accuracy = ((correct_predictions)/(total_image_count*test_ratio))

In [0]:
print(test_accuracy)

In [0]:
files.download("results.csv")

In [0]:
model.save('model3.h5')

In [0]:
files.download("model3.h5")

In [0]:
os.kill(os.getpid(), signal.SIGKILL)

## **5. Transfer Learning**


### VGG16

In [0]:
# Creating variables referring to the paths of the training, testing, and validation sets to be used by the ImageDataGenerator

train_dir = '/content/train'
validation_dir = '/content/val'
test_dir = '/content/test'

In [0]:
# Set Batch, Image Input, and Class Size

BATCH_SIZE = 32
IMG_HEIGHT = 256
IMG_WIDTH = 256
CHANNELS = 3
num_classes = 270

In [0]:
# Setting up ImageDataGenerator

image_generator = ImageDataGenerator(
    rescale=1./255,
    shear_range = 0.20, # displaces fixed points of images
    zoom_range=0.2, # zooming factor into images
    rotation_range=40,  # randomly rotate images in the range
    width_shift_range=0.2,  # randomly shifts images horizontally
    height_shift_range=0.2,  # randomly shifts images vertically
    horizontal_flip=True)  # randomly flips images

# Setting up generalized generator

test_datagen = ImageDataGenerator(rescale=1./255)

# Setup test generator
train_generator = image_generator.flow_from_directory(
      train_dir,
      target_size = (IMG_HEIGHT,IMG_WIDTH),
      shuffle = True,
      batch_size = BATCH_SIZE,
      color_mode="rgb",
      class_mode = 'categorical',
      seed=42
)

# Setup validation generator

validation_generator = test_datagen.flow_from_directory(
      validation_dir,
      target_size = (IMG_HEIGHT,IMG_WIDTH),
      shuffle = True,
      batch_size = BATCH_SIZE,
      color_mode="rgb",
      class_mode = 'categorical',
      seed=42
)

# Setup test generator

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    shuffle=False,
    batch_size=1,
    color_mode="rgb",
    class_mode=None,
    seed=42
)

In [0]:
# Insert VGG16 Base Network

VGG16_conv_base = VGG16(weights = 'imagenet',
                  include_top = False,
                  input_shape = (IMG_HEIGHT,IMG_WIDTH,CHANNELS))

In [0]:
# Preview what the architecture looks like

VGG16_conv_base.summary()

In [0]:
# Setting up transfer learning neural network with VGG16 base

VGG16_model = models.Sequential()
VGG16_model.add(VGG16_conv_base)
VGG16_model.add(layers.Flatten())
VGG16_model.add(layers.Dense(256,activation='relu'))
VGG16_model.add(layers.Dense(num_classes,activation='softmax'))

In [0]:
# Preview what the architecture looks like

VGG16_model.summary()

In [0]:
# Compiling the model above - must be run in order for training to commence

VGG16_model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.RMSprop(lr=1e-4), 
              metrics=['acc'])

In [0]:
# Creating Folder to Store Checkpoints

model_path = './vgg16_model'

!if [ -d $model_path ]; then echo 'Directory Exists'; else mkdir $model_path; fi

VGG16_filepath = model_path + "/VGG16_weights-improvement-{epoch:02d}-{val_acc:.2f}.hdf5"
VGG16_checkpoint = ModelCheckpoint(VGG16_filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='auto', period=1)

In [0]:
# Early Stopping

VGG16_early = EarlyStopping(monitor='val_acc', patience=10, verbose=1, mode='auto')

In [0]:
# Loading Early Stopping and Checkpoint into list for generator

VGG16_callbacks_list = [VGG16_early, VGG16_checkpoint]

In [0]:
# Initiating Training Generator

VGG16_n_epoch = 1
VGG16_STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
VGG16_STEP_SIZE_VALID=validation_generator.n//validation_generator.batch_size

VGG16_history = VGG16_model.fit_generator(
    generator=train_generator,
    steps_per_epoch=VGG16_STEP_SIZE_TRAIN,
    validation_data=validation_generator,
    validation_steps=VGG16_STEP_SIZE_VALID,
    callbacks=VGG16_callbacks_list,
    epochs=VGG16_n_epoch)

In [0]:
# Plot the training and validation accuracy

pd.DataFrame(VGG16_history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0,1)
plt.show()

In [0]:
# Will display performance on test-set, displaying the tuple [loss, acc]

VGG16_model.evaluate_generator(validation_generator, steps=VGG16_STEP_SIZE_VALID)

In [0]:
# Setup prediction generator

VGG16_STEP_SIZE_TEST=test_generator.n//test_generator.batch_size

test_generator.reset()
VGG16_predictions = VGG16_model.predict_generator(test_generator,steps=VGG16_STEP_SIZE_TEST, verbose=1)

In [0]:
VGG16_predicted_class_indices=np.argmax(VGG16_predictions,axis=1)

In [0]:
# Since the test generator outputs an array of probability, the following code splits these arrays to display what the network guessed vs ground truth

VGG16_labels = (train_generator.class_indices)
VGG16_labels = dict((v,k) for k,v in VGG16_labels.items())
VGG16_predictions = [VGG16_labels[k] for k in VGG16_predicted_class_indices]

In [0]:
VGG16_filenames=test_generator.filenames
VGG16_results=pd.DataFrame({"Filename":VGG16_filenames,
                      "Predictions":VGG16_predictions})

In [0]:
# Separate DataFrame into Actual vs Predicted Columns for Comparison

VGG16_results[['Actual','Filename']] = VGG16_results.Filename.str.split("/",expand=True)

In [0]:
# Tag cases where columns are equal/non-equal (where the actual = predicted or actual =! predicted)

VGG16_results['Correct Prediction?'] = np.where(VGG16_results['Predictions'] == VGG16_results['Actual'], 'True','False')

In [0]:
# Double check that DataFrame looks as expected

VGG16_results

In [0]:
# Convert Boolean Values to Numeric Total Counts

VGG16_create_prediction_count = VGG16_results['Correct Prediction?'].value_counts()

In [0]:
# Isolate the cases of correct predictions

VGG16_correct_predictions = VGG16_create_prediction_count['True']

In [0]:
VGG16_test_accuracy = ((VGG16_correct_predictions)/(total_image_count*test_ratio))

In [0]:
print(VGG16_test_accuracy)

In [0]:
# Uncomment the following if you'd like to download the actual versus prediction results

files.download("results_VGG16.csv")

### VGG19

In [0]:
# Creating variables referring to the paths of the training, testing, and validation sets to be used by the ImageDataGenerator

train_dir = '/content/train'
validation_dir = '/content/val'
test_dir = '/content/test'

In [0]:
# Set Batch, Image Input, and Class Size

BATCH_SIZE = 32
IMG_HEIGHT = 256
IMG_WIDTH = 256
CHANNELS = 3
num_classes = 270

In [0]:
# Setting up ImageDataGenerator

image_generator = ImageDataGenerator(
    rescale=1./255,
    shear_range = 0.20, # displaces fixed points of images
    zoom_range=0.2, # zooming factor into images
    rotation_range=40,  # randomly rotate images in the range
    width_shift_range=0.2,  # randomly shifts images horizontally
    height_shift_range=0.2,  # randomly shifts images vertically
    horizontal_flip=True)  # randomly flips images

# Setting up generalized generator

test_datagen = ImageDataGenerator(rescale=1./255)

# Setup test generator

train_generator = image_generator.flow_from_directory(
      train_dir,
      target_size = (IMG_HEIGHT,IMG_WIDTH),
      shuffle = True,
      batch_size = BATCH_SIZE,
      color_mode="rgb",
      class_mode = 'categorical',
      seed=42
)

# Setup validation generator

validation_generator = test_datagen.flow_from_directory(
      validation_dir,
      target_size = (IMG_HEIGHT,IMG_WIDTH),
      shuffle = True,
      batch_size = BATCH_SIZE,
      color_mode="rgb",
      class_mode = 'categorical',
      seed=42
)

# Setup test generator

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    shuffle=False,
    batch_size=1,
    color_mode="rgb",
    class_mode=None,
    seed=42
)

In [0]:
# Insert VGG19 Base Network

VGG19_conv_base = VGG19(weights = 'imagenet',
                  include_top = False,
                  input_shape = (256,256,3))

In [0]:
# Preview what the architecture looks like

VGG19_conv_base.summary()

In [0]:
# Setting up transfer learning neural network with VGG19 base

VGG19_model = models.Sequential()
VGG19_model.add(VGG19_conv_base)
VGG19_model.add(layers.Flatten())
VGG19_model.add(layers.Dense(256,activation='relu'))
VGG19_model.add(layers.Dense(num_classes,activation='softmax'))

In [0]:
# Preview what the architecture looks like

VGG19_model.summary()

In [0]:
# Compiling the model above - must be run in order for training to commence

VGG19_model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.RMSprop(lr=1e-4), 
              metrics=['acc'])

In [0]:
# Creating Folder to Store Checkpoints

model_path = './vgg19_model'

!if [ -d $model_path ]; then echo 'Directory Exists'; else mkdir $model_path; fi

VGG19_filepath = model_path + "/VGG19_weights-improvement-{epoch:02d}-{val_acc:.2f}.hdf5"
VGG19_checkpoint = ModelCheckpoint(VGG19_filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='auto', period=1)

In [0]:
# Early Stopping

VGG19_early = EarlyStopping(monitor='val_acc', patience=10, verbose=1, mode='auto')

In [0]:
# Loading Early Stopping and Checkpoint into list for generator

VGG19_callbacks_list = [VGG19_early, VGG19_checkpoint]

In [0]:
# Initiating Training Generator

VGG19_n_epoch = 50
VGG19_STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
VGG19_STEP_SIZE_VALID=validation_generator.n//validation_generator.batch_size

VGG19_history = VGG19_model.fit_generator(
    generator=train_generator,
    steps_per_epoch=VGG19_STEP_SIZE_TRAIN,
    validation_data=validation_generator,
    validation_steps=VGG19_STEP_SIZE_VALID,
    callbacks=VGG19_callbacks_list,
    epochs=VGG19_n_epoch)

In [0]:
# Plot the training and validation accuracy

pd.DataFrame(VGG19_history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0,1)
plt.show()

In [0]:
# Will display performance on test-set, displaying the tuple [loss, acc]

VGG19_model.evaluate_generator(validation_generator, steps=VGG19_STEP_SIZE_VALID)

In [0]:
# Setup prediction generator

VGG19_STEP_SIZE_TEST=test_generator.n//test_generator.batch_size

test_generator.reset()
VGG19_predictions = VGG19_model.predict_generator(test_generator,steps=VGG19_STEP_SIZE_TEST, verbose=1)

In [0]:
VGG19_predicted_class_indices=np.argmax(VGG19_predictions,axis=1)

In [0]:
# Since the test generator outputs an array of probability, the following code splits these arrays to display what the network guessed vs ground truth

VGG19_labels = (train_generator.class_indices)
VGG19_labels = dict((v,k) for k,v in VGG19_labels.items())
VGG19_predictions = [VGG19_labels[k] for k in VGG19_predicted_class_indices]

In [0]:
VGG19_filenames=test_generator.filenames
VGG19_results=pd.DataFrame({"Filename":VGG19_filenames,
                      "Predictions":VGG19_predictions})

In [0]:
# Separate DataFrame into Actual vs Predicted Columns for Comparison

VGG19_results[['Actual','Filename']] = VGG19_results.Filename.str.split("/",expand=True)

In [0]:
# Tag cases where columns are equal/non-equal (where the actual = predicted or actual =! predicted)

VGG19_results['Correct Prediction?'] = np.where(VGG19_results['Predictions'] == VGG19_results['Actual'], 'True','False')

In [0]:
# Double check that DataFrame looks as expected

VGG19_results

#Option to export to CSV - uncomment if needed 
results.to_csv("results_VGG19.csv",index=False)

In [0]:
# Convert Boolean Values to Numeric Total Counts

VGG19_create_prediction_count = VGG19_results['Correct Prediction?'].value_counts()

In [0]:
# Isolate the cases of correct predictions

VGG19_correct_predictions = VGG19_create_prediction_count['True']

In [0]:
VGG19_test_accuracy = ((VGG19_correct_predictions)/(total_image_count*test_ratio))

In [0]:
print(VGG19_test_accuracy)

In [0]:
# Uncomment the following if you'd like to download the actual versus prediction results

files.download("results_VGG19.csv")

### Inception V3

In [0]:
# Creating variables referring to the paths of the training, testing, and validation sets to be used by the ImageDataGenerator

train_dir = '/content/train'
validation_dir = '/content/val'
test_dir = '/content/test'

In [0]:
# Set Batch, Image Input, and Class Size

BATCH_SIZE = 32
IMG_HEIGHT = 256
IMG_WIDTH = 256
CHANNELS = 3
num_classes = 270

In [0]:
# Setting up ImageDataGenerator

image_generator = ImageDataGenerator(
    rescale=1./255,
    shear_range = 0.20, # displaces fixed points of images
    zoom_range=0.2, # zooming factor into images
    rotation_range=40,  # randomly rotate images in the range
    width_shift_range=0.2,  # randomly shifts images horizontally
    height_shift_range=0.2,  # randomly shifts images vertically
    horizontal_flip=True)  # randomly flips images

# Setting up generalized generator

test_datagen = ImageDataGenerator(rescale=1./255)

# Setup test generator

train_generator = image_generator.flow_from_directory(
      train_dir,
      target_size = (IMG_HEIGHT,IMG_WIDTH),
      shuffle = True,
      batch_size = BATCH_SIZE,
      color_mode="rgb",
      class_mode = 'categorical',
      seed=42
)

# Setup validation generator

validation_generator = test_datagen.flow_from_directory(
      validation_dir,
      target_size = (IMG_HEIGHT,IMG_WIDTH),
      shuffle = True,
      batch_size = BATCH_SIZE,
      color_mode="rgb",
      class_mode = 'categorical',
      seed=42
)

# Setup test generator

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    shuffle=False,
    batch_size=1,
    color_mode="rgb",
    class_mode=None,
    seed=42
)

In [0]:
# Insert InceptionV3 Base Network

InceptionV3_conv_base = InceptionV3(weights = 'imagenet',
                  include_top = False,
                  input_shape = (256,256,3))

In [0]:
# Preview what the architecture looks like

InceptionV3_conv_base.summary()

In [0]:
# Setting up transfer learning neural network with InceptionV3 base

InceptionV3_model = models.Sequential()
InceptionV3_model.add(InceptionV3_conv_base)
InceptionV3_model.add(layers.Flatten())
InceptionV3_model.add(layers.Dense(256,activation='relu'))
InceptionV3_model.add(layers.Dense(num_classes,activation='softmax'))

In [0]:
# Preview what the architecture looks like

InceptionV3_model.summary()

In [0]:
# Compiling the model above - must be run in order for training to commence

InceptionV3_model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.RMSprop(lr=1e-4), 
              metrics=['acc'])

In [0]:
# Creating Folder to Store Checkpoints

model_path = './InceptionV3_model'

!if [ -d $model_path ]; then echo 'Directory Exists'; else mkdir $model_path; fi

InceptionV3_filepath = model_path + "/InceptionV3_weights-improvement-{epoch:02d}-{val_acc:.2f}.hdf5"
InceptionV3_checkpoint = ModelCheckpoint(InceptionV3_filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='auto', period=1)

In [0]:
# Early Stopping

InceptionV3_early = EarlyStopping(monitor='val_acc', patience=10, verbose=1, mode='auto')

In [0]:
# Loading Early Stopping and Checkpoint into list for generator

InceptionV3_callbacks_list = [InceptionV3_early, InceptionV3_checkpoint]

In [0]:
# Initiating Training Generator

InceptionV3_n_epoch = 50
InceptionV3_STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
InceptionV3_STEP_SIZE_VALID=validation_generator.n//validation_generator.batch_size

InceptionV3_history = InceptionV3_model.fit_generator(
    generator=train_generator,
    steps_per_epoch=InceptionV3_STEP_SIZE_TRAIN,
    validation_data=validation_generator,
    validation_steps=InceptionV3_STEP_SIZE_VALID,
    callbacks=InceptionV3_callbacks_list,
    epochs=InceptionV3_n_epoch)

In [0]:
# Plot the training and validation accuracy

pd.DataFrame(InceptionV3_history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0,1)
plt.show()

In [0]:
# Will display performance on test-set, displaying the tuple [loss, acc]

InceptionV3_model.evaluate_generator(validation_generator, steps=InceptionV3_STEP_SIZE_VALID)

In [0]:
# Setup prediction generator

InceptionV3_STEP_SIZE_TEST=test_generator.n//test_generator.batch_size

test_generator.reset()
InceptionV3_predictions = InceptionV3_model.predict_generator(test_generator,steps=InceptionV3_STEP_SIZE_TEST, verbose=1)

In [0]:
InceptionV3_predicted_class_indices=np.argmax(InceptionV3_predictions,axis=1)

In [0]:
# Since the test generator outputs an array of probability, the following code splits these arrays to display what the network guessed vs ground truth

InceptionV3_labels = (train_generator.class_indices)
InceptionV3_labels = dict((v,k) for k,v in InceptionV3_labels.items())
InceptionV3_predictions = [InceptionV3_labels[k] for k in InceptionV3_predicted_class_indices]

In [0]:
InceptionV3_filenames=test_generator.filenames
InceptionV3_results=pd.DataFrame({"Filename":InceptionV3_filenames,
                      "Predictions":InceptionV3_predictions})

In [0]:
# Separate DataFrame into Actual vs Predicted Columns for Comparison

InceptionV3_results[['Actual','Filename']] = InceptionV3_results.Filename.str.split("/",expand=True)

In [0]:
# Tag cases where columns are equal/non-equal (where the actual = predicted or actual =! predicted)

InceptionV3_results['Correct Prediction?'] = np.where(InceptionV3_results['Predictions'] == InceptionV3_results['Actual'], 'True','False')

In [0]:
# Double check that DataFrame looks as expected

InceptionV3_results

#Option to export to CSV - uncomment if needed 
InceptionV3_results.to_csv("InceptionV3_results.csv",index=False)

In [0]:
# Convert Boolean Values to Numeric Total Counts

InceptionV3_create_prediction_count = InceptionV3_results['Correct Prediction?'].value_counts()

In [0]:
# Isolate the cases of correct predictions

InceptionV3_correct_predictions = InceptionV3_create_prediction_count['True']

In [0]:
InceptionV3_test_accuracy = ((InceptionV3_correct_predictions)/(total_image_count*test_ratio))

In [0]:
print(InceptionV3_test_accuracy)

In [0]:
# Uncomment the following if you'd like to download the actual versus prediction results

files.download("InceptionV3_results.csv")

### ResNet

In [0]:
# Creating variables referring to the paths of the training, testing, and validation sets to be used by the ImageDataGenerator

train_dir = '/content/train'
validation_dir = '/content/val'
test_dir = '/content/test'

In [0]:
# Set Batch, Image Input, and Class Size

BATCH_SIZE = 32
IMG_HEIGHT = 256
IMG_WIDTH = 256
CHANNELS = 3
num_classes = 270

In [0]:
# Setting up ImageDataGenerator

image_generator = ImageDataGenerator(
    rescale=1./255,
    shear_range = 0.20, # displaces fixed points of images
    zoom_range=0.2, # zooming factor into images
    rotation_range=40,  # randomly rotate images in the range
    width_shift_range=0.2,  # randomly shifts images horizontally
    height_shift_range=0.2,  # randomly shifts images vertically
    horizontal_flip=True)  # randomly flips images

# Setting up generalized generator

test_datagen = ImageDataGenerator(rescale=1./255)

# Setup test generator

train_generator = image_generator.flow_from_directory(
      train_dir,
      target_size = (IMG_HEIGHT,IMG_WIDTH),
      shuffle = True,
      batch_size = BATCH_SIZE,
      color_mode="rgb",
      class_mode = 'categorical',
      seed=42
)

# Setup validation generator

validation_generator = test_datagen.flow_from_directory(
      validation_dir,
      target_size = (IMG_HEIGHT,IMG_WIDTH),
      shuffle = True,
      batch_size = BATCH_SIZE,
      color_mode="rgb",
      class_mode = 'categorical',
      seed=42
)

# Setup test generator

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    shuffle=False,
    batch_size=1,
    color_mode="rgb",
    class_mode=None,
    seed=42
)

In [0]:
# Insert ResNet50 Base Network

ResNet50_conv_base = ResNet50(weights = 'imagenet',
                  include_top = False,
                  input_shape=(IMG_HEIGHT,IMG_WIDTH,CHANNELS))

In [0]:
# Preview what the architecture looks like

ResNet50_conv_base.summary()

In [0]:
# Setting up transfer learning neural network with ResNet50 base

ResNet50_model = models.Sequential()
ResNet50_model.add(ResNet50_conv_base)
ResNet50_model.add(layers.Flatten())
ResNet50_model.add(layers.Dense(256,activation='relu'))
ResNet50_model.add(layers.Dense(num_classes,activation='softmax'))

In [0]:
# Preview what the architecture looks like

ResNet50_model.summary()

In [0]:
# Compiling the model above - must be run in order for training to commence

ResNet50_model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.RMSprop(lr=1e-4), 
              metrics=['acc'])

In [0]:
# Creating Folder to Store Checkpoints

model_path = './ResNet50_model'

!if [ -d $model_path ]; then echo 'Directory Exists'; else mkdir $model_path; fi

ResNet50_filepath = model_path + "/ResNet50_weights-improvement-{epoch:02d}-{val_acc:.2f}.hdf5"
ResNet50_checkpoint = ModelCheckpoint(ResNet50_filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='auto', period=1)

In [0]:
# Early Stopping

ResNet50_early = EarlyStopping(monitor='val_acc', patience=10, verbose=1, mode='auto')

In [0]:
# Loading Early Stopping and Checkpoint into list for generator

ResNet50_callbacks_list = [ResNet50_early, ResNet50_checkpoint]

In [0]:
# Initiating Training Generator

ResNet50_n_epoch = 50
ResNet50_STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
ResNet50_STEP_SIZE_VALID=validation_generator.n//validation_generator.batch_size

ResNet50_history = ResNet50_model.fit_generator(
    generator=train_generator,
    steps_per_epoch=ResNet50_STEP_SIZE_TRAIN,
    validation_data=validation_generator,
    validation_steps=ResNet50_STEP_SIZE_VALID,
    callbacks=ResNet50_callbacks_list,
    epochs=ResNet50_n_epoch)

In [0]:
# Plot the training and validation accuracy

pd.DataFrame(ResNet50_history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0,1)
plt.show()

In [0]:
# Will display performance on test-set, displaying the tuple [loss, acc]

ResNet50_model.evaluate_generator(validation_generator, steps=ResNet50_STEP_SIZE_VALID)

In [0]:
# Setup prediction generator

ResNet50_STEP_SIZE_TEST=test_generator.n//test_generator.batch_size

test_generator.reset()
ResNet50_predictions = ResNet50_model.predict_generator(test_generator,steps=ResNet50_STEP_SIZE_TEST, verbose=1)

In [0]:
ResNet50_predicted_class_indices=np.argmax(ResNet50_predictions,axis=1)

In [0]:
# Since the test generator outputs an array of probability, the following code splits these arrays to display what the network guessed vs ground truth

ResNet50_labels = (train_generator.class_indices)
ResNet50_labels = dict((v,k) for k,v in ResNet50_labels.items())
ResNet50_predictions = [ResNet50_labels[k] for k in ResNet50_predicted_class_indices]

In [0]:
ResNet50_filenames=test_generator.filenames
ResNet50_results=pd.DataFrame({"Filename":ResNet50_filenames,
                      "Predictions":ResNet50_predictions})

In [0]:
# Separate DataFrame into Actual vs Predicted Columns for Comparison

ResNet50_results[['Actual','Filename']] = ResNet50_results.Filename.str.split("/",expand=True)

In [0]:
# Tag cases where columns are equal/non-equal (where the actual = predicted or actual =! predicted)

ResNet50_results['Correct Prediction?'] = np.where(ResNet50_results['Predictions'] == ResNet50_results['Actual'], 'True','False')

In [0]:
# Double check that DataFrame looks as expected

ResNet50_results

#Option to export to CSV - uncomment if needed 
results.to_csv("results_ResNet50.csv",index=False)

In [0]:
# Convert Boolean Values to Numeric Total Counts

ResNet50_create_prediction_count = ResNet50_results['Correct Prediction?'].value_counts()

In [0]:
# Isolate the cases of correct predictions

ResNet50_correct_predictions = ResNet50_create_prediction_count['True']

In [0]:
ResNet50_test_accuracy = ((ResNet50_correct_predictions)/(total_image_count*test_ratio))

In [0]:
print(ResNet50_test_accuracy)

In [0]:
# Uncomment the following if you'd like to download the actual versus prediction results

files.download("results_ResNet50.csv")

### Xception

In [0]:
# Creating variables referring to the paths of the training, testing, and validation sets to be used by the ImageDataGenerator

train_dir = '/content/train'
validation_dir = '/content/val'
test_dir = '/content/test'

In [0]:
# Set Batch, Image Input, and Class Size

BATCH_SIZE = 32
IMG_HEIGHT = 256
IMG_WIDTH = 256
CHANNELS = 3
num_classes = 270

In [0]:
# Setting up ImageDataGenerator

image_generator = ImageDataGenerator(
    rescale=1./255,
    shear_range = 0.20, # displaces fixed points of images
    zoom_range=0.2, # zooming factor into images
    rotation_range=40,  # randomly rotate images in the range
    width_shift_range=0.2,  # randomly shifts images horizontally
    height_shift_range=0.2,  # randomly shifts images vertically
    horizontal_flip=True)  # randomly flips images

# Setting up generalized generator

test_datagen = ImageDataGenerator(rescale=1./255)

# Setup test generator

train_generator = image_generator.flow_from_directory(
      train_dir,
      target_size = (IMG_HEIGHT,IMG_WIDTH),
      shuffle = True,
      batch_size = BATCH_SIZE,
      color_mode="rgb",
      class_mode = 'categorical',
      seed=42
)

# Setup validation generator

validation_generator = test_datagen.flow_from_directory(
      validation_dir,
      target_size = (IMG_HEIGHT,IMG_WIDTH),
      shuffle = True,
      batch_size = BATCH_SIZE,
      color_mode="rgb",
      class_mode = 'categorical',
      seed=42
)

# Setup test generator

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    shuffle=False,
    batch_size=1,
    color_mode="rgb",
    class_mode=None,
    seed=42
)

In [0]:
# Insert Xception Base Network

Xception_conv_base = Xception(include_top=False,
                              weights='imagenet',
                              input_shape=(IMG_HEIGHT,IMG_WIDTH,CHANNELS))

In [0]:
# Preview what the architecture looks like

Xception_conv_base.summary()

In [0]:
# Setting up transfer learning neural network with Xception base

Xception_model = models.Sequential()
Xception_model.add(Xception_conv_base)
Xception_model.add(layers.Flatten())
Xception_model.add(layers.Dense(256,activation='relu'))
Xception_model.add(layers.Dense(num_classes,activation='softmax'))

In [0]:
# Preview what the architecture looks like

Xception_model.summary()

In [0]:
# Compiling the model above - must be run in order for training to commence

Xception_model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.RMSprop(lr=1e-4), 
              metrics=['acc'])

In [0]:
# Creating Folder to Store Checkpoints

model_path = './Xception_model'

!if [ -d $model_path ]; then echo 'Directory Exists'; else mkdir $model_path; fi

Xception_filepath = model_path + "/Xception_weights-improvement-{epoch:02d}-{val_acc:.2f}.hdf5"
Xception_checkpoint = ModelCheckpoint(Xception_filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='auto', period=1)

In [0]:
# Early Stopping

Xception_early = EarlyStopping(monitor='val_acc', patience=10, verbose=1, mode='auto')

In [0]:
# Loading Early Stopping and Checkpoint into list for generator

Xception_callbacks_list = [Xception_early, Xception_checkpoint]

In [0]:
# Initiating Training Generator

Xception_n_epoch = 50
Xception_STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
Xception_STEP_SIZE_VALID=validation_generator.n//validation_generator.batch_size

Xception_history = Xception_model.fit_generator(
    generator=train_generator,
    steps_per_epoch=Xception_STEP_SIZE_TRAIN,
    validation_data=validation_generator,
    validation_steps=Xception_STEP_SIZE_VALID,
    callbacks=Xception_callbacks_list,
    epochs=Xception_n_epoch)

In [0]:
# Plot the training and validation accuracy

pd.DataFrame(Xception_history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0,1)
plt.show()

In [0]:
# Will display performance on test-set, displaying the tuple [loss, acc]

Xception_model.evaluate_generator(validation_generator, steps=Xception_STEP_SIZE_VALID)

In [0]:
# Setup prediction generator

Xception_STEP_SIZE_TEST=test_generator.n//test_generator.batch_size

test_generator.reset()
Xception_predictions = Xception_model.predict_generator(test_generator,steps=Xception_STEP_SIZE_TEST, verbose=1)

In [0]:
Xception_model_predicted_class_indices=np.argmax(Xception_predictions,axis=1)

In [0]:
# Since the test generator outputs an array of probability, the following code splits these arrays to display what the network guessed vs ground truth

Xception_labels = (train_generator.class_indices)
Xception_labels = dict((v,k) for k,v in Xception_labels.items())
Xception_predictions = [Xception_labels[k] for k in Xception_model_predicted_class_indices]

In [0]:
Xception_model_filenames=test_generator.filenames
Xception_results=pd.DataFrame({"Filename":Xception_model_filenames,
                      "Predictions":Xception_predictions})

In [0]:
# Separate DataFrame into Actual vs Predicted Columns for Comparison

Xception_results[['Actual','Filename']] = Xception_results.Filename.str.split("/",expand=True)

In [0]:
# Tag cases where columns are equal/non-equal (where the actual = predicted or actual =! predicted)

Xception_results['Correct Prediction?'] = np.where(Xception_results['Predictions'] == Xception_results['Actual'], 'True','False')

In [0]:
# Double check that DataFrame looks as expected

Xception_results

#Option to export to CSV - uncomment if needed 
results.to_csv("results_Xception.csv",index=False)

In [0]:
# Convert Boolean Values to Numeric Total Counts

Xception_create_prediction_count = Xception_results['Correct Prediction?'].value_counts()

In [0]:
# Isolate the cases of correct predictions

Xception_correct_predictions = Xception_create_prediction_count['True']

In [0]:
Xception_test_accuracy = ((Xception_correct_predictions)/(total_image_count*test_ratio))

In [0]:
print(Xception_test_accuracy)

In [0]:
# Uncomment the following if you'd like to download the actual versus prediction results

files.download("results_Xception.csv")

## **Computing Image Similarity**

### Image Similarity Through MSE & SSIM

In [0]:
def mse(imageA, imageB):
    err= np.sum((imageA.astype("float") - imageB.astype("float")) ** 2)
    err /= float(imageA.shape[0] * imageA.shape[1])
    return err

In [0]:
def compare_image(imageA, imageB, title):
    m= mse(imageA, imageB)
    s= ssim(imageA, imageB)
    fig= plt.figure(title)
    plt.suptitle("MSE: %.2f, SSIM: %.2f" % (m, s))    
    
    # displays first image with MSE & SSIM metrics
 
    ax = fig.add_subplot(1, 2, 1)
    plt.imshow(imageA, cmap = plt.cm.gray)
    plt.axis("off")

    # displays second image with MSE & SSIM metrics

    ax = fig.add_subplot(1, 2, 2)
    plt.imshow(imageB, cmap = plt.cm.gray)
    plt.axis("off")
    
    plt.show()

In [0]:
# Loads images from subset logo dataset

logo_1= cv2.imread('/content/drive/My Drive/Similarity/Logos_21.png')
logo_2= cv2.imread('/content/drive/My Drive/Similarity/Logos_1.png')
logo_3= cv2.imread('/content/drive/My Drive/Similarity/Logos_196.png')

logo_compare_1= cv2.imread('/content/drive/My Drive/Similarity/Logos_68.png')
logo_compare_2= cv2.imread('/content/drive/My Drive/Similarity/Logos_103.png')
logo_compare_3= cv2.imread('/content/drive/My Drive/Similarity/Logos_46.png')

In [0]:
# Converts the images to grayscale to better apply the SSIM

logo_1 = cv2.cvtColor(logo_1, cv2.COLOR_BGR2GRAY)
logo_2 = cv2.cvtColor(logo_2, cv2.COLOR_BGR2GRAY)
logo_3 = cv2.cvtColor(logo_3, cv2.COLOR_BGR2GRAY)

logo_compare_1 = cv2.cvtColor(logo_compare_1, cv2.COLOR_BGR2GRAY)
logo_compare_2 = cv2.cvtColor(logo_compare_2, cv2.COLOR_BGR2GRAY)
logo_compare_3 = cv2.cvtColor(logo_compare_3, cv2.COLOR_BGR2GRAY)

In [0]:
# labelling the ouputs with the comparison labels

fig = plt.figure("Logo Comparison")
images = ("First Logo", logo_1), ("Second Logo", logo_2), ("Third Logo", logo_3),('First Comparison Logo', logo_compare_1), ('Second Comparison Logo', logo_compare_2), ('Third Comparison Logo', logo_compare_3)

for (i, (name, image)) in enumerate(images):

    ax = fig.add_subplot(1, 6, i + 1)

    plt.imshow(image, cmap = plt.cm.gray)
    plt.axis("off")

plt.show();

In [0]:
compare_image(logo_1, logo_1, 'same logo')

In [0]:
compare_image(logo_1, logo_compare_1, 'Logo 1 Comparison')
compare_image(logo_2, logo_compare_2, 'Logo 2 Comparison')
compare_image(logo_3, logo_compare_3, 'Logo 3 Comparison')

### Computing Image Similarity by using Euclidean Distance

In [0]:
logo_dataset_png_path = ['/content/drive/My Drive/Similarity/Logos_'+str(i)+'.png' for i in range(1,248)]

In [0]:
logo_pics = [imread(path) for path in logo_dataset_png_path]

In [0]:
# resizes input image

def resize(new_image):

  with open(new_image, 'r+b') as f:
    with Image.open(f) as original_image:
      return np.array(original_image.resize([256,256]))

In [0]:
# displays the input image and dataset match side-by-side

def show_two_logos(input_logo, logo_from_dataset):
  fig,(ax1, ax2) = plt.subplots(1,2, figsize = (10, 5))
  for ax, title in zip([ax1,ax2],['Input Image','Closest Match']):
      ax1.imshow(input_logo)
      ax2.imshow(logo_pics[logo_from_dataset-1])
      ax.set_title(title)
  plt.show()


In [0]:
# computes euclidean distance

def euclidean_distance(input_logo, logo_from_dataset):
  first, second = input_logo, logo_pics[logo_from_dataset-1]
  dist = 0 
  for i in range(256):
    for j in range(256):
      dist += distance.euclidean(first[i][j], second[i][j])
  return np.mean(dist)

In [0]:
# loops through images to compute euclidean distances between element-wise pixels

def iterate_through_logos(input_logo):
  return [(euclidean_distance(input_logo, i+1), i+1) for i in range (10)]

In [0]:
# composite function that will find the closest image to the input image

def find_best_matching_logo(logo_image):
  input_logo = resize(logo_image)
  distances = iterate_through_logos(input_logo)
  return show_two_logos(input_logo, (sorted(distances)[1][1]))

In [0]:
find_best_matching_logo('/content/drive/My Drive/Deloitte.png')