In [None]:
# Imports

import numpy as np
import pandas as pd
import tensorflow as tf
from glob import glob # finds all the pathnames matching a specified pattern according to the rules used by the Unix shell
import cv2 # computer vision library for reading images
from IPython.display import Image
import os, warnings
import matplotlib.pyplot as plt
from matplotlib import gridspec

# Tensorflow
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing

#Keras
from keras import applications
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.applications import InceptionResNetV2
from tensorflow.keras.applications.inception_resnet_v2 import preprocess_input
from tensorflow.keras.preprocessing import image

from sklearn.model_selection import train_test_split

# Reproducability
def set_seed(seed=31415):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
set_seed()

# Set Matplotlib defaults
plt.rc('figure', autolayout=True)
plt.rc('axes', labelweight='bold', labelsize='large',
       titleweight='bold', titlesize=18, titlepad=10)
plt.rc('image', cmap='magma')
warnings.filterwarnings("ignore") # to clean up output cells

In [None]:
# Path to the data
PATH = "../input/dog-breed-identification"
TRAIN_PATH = os.path.join(PATH, 'train/*')
TEST_PATH = os.path.join(PATH, 'test/*')
LABELS_PATH = os.path.join(PATH, 'labels.csv')
DOG_IMAGES_PATH = os.path.join('../input/dog-images/*')

# Set the parameters for the Keras model
SIZE = 299
NUM_CLASSES = 120
BATCH_SIZE = 128
EPOCHS = 50

In [None]:
def build_model():
    '''
    Builds a pretrained model on the imagenet dataset
    Model InceptionRestNetV2 with freezed weights
    Sets the input shape to (SIZE, SIZE, 3)
    Sets the output shape to 120 classes
    '''
    base_model = InceptionResNetV2(
    include_top=False,
    weights="imagenet",
    input_tensor=None,
    input_shape=(SIZE, SIZE, 3),
    pooling=None)
    base_model.trainable = False

    model = keras.models.Sequential([
        base_model,
        keras.layers.GlobalAveragePooling2D(),
        keras.layers.Dropout(0.2),
        keras.layers.Dense(120, activation='softmax')
    ])
    return model

In [None]:
def read_image(path):
    '''
    Read the image.
    Resize the image.
    Transform the image in an array.
    Preprocessing
    '''
    img = image.load_img(path, target_size=(SIZE, SIZE))
    img = image.img_to_array(img)
    img = preprocess_input(img)
    # Return 3D image array
    return img


def parse_data(x, y):
    x = x.decode()
    num_class = NUM_CLASSES
    image = read_image(x)
    label = [0] * num_class
    label[y] = 1
    label = np.array(label)
    label = label.astype(np.int32)
    return image, label

def tf_parse(x, y):
    x, y = tf.numpy_function(parse_data, [x, y], [tf.float32, tf.int32])
    x.set_shape((SIZE, SIZE, 3))
    y.set_shape((NUM_CLASSES))
    return x, y

def tf_dataset(x, y):
    dataset = tf.data.Dataset.from_tensor_slices((x, y))
    dataset = dataset.map(tf_parse)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.repeat()
    return dataset

In [None]:
# Get the number of breeds
labels_df = pd.read_csv(LABELS_PATH)
breed = labels_df['breed'].unique()
print('Number of breeds: ', len(breed))

In [None]:
# Set a unique id for each breed
breed_to_id = {name: i for i, name in enumerate(breed)}
id_to_breed = {i: name for i, name in enumerate(breed)}

In [None]:
# Create a list of labels ids from each image
ids = glob(TRAIN_PATH)
labels = []

for image_id in ids:
    image_id = image_id.split('/')[-1].split('.')[0]
    breed_name = list(labels_df[labels_df.id == image_id]['breed'])[0]
    breed_idx = breed_to_id[breed_name]
    labels.append(breed_idx)

In [None]:
# Split the dataset
x_train, x_valid, y_train, y_valid = train_test_split(ids, labels, test_size=0.2, random_state=42)

In [None]:
# Create the datasets
train_dataset = tf_dataset(x_train, y_train)
valid_dataset = tf_dataset(x_valid, y_valid)

In [None]:
# Instanciate the model
model = build_model()
model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['acc'])
model.summary()

In [None]:
# Set callbacks
callbacks = [
    EarlyStopping(
    patience=5,
    min_delta=0.001,
    restore_best_weights=True,
    ),
    ModelCheckpoint('model.h5', verbose=1, save_best_only=True),
    ReduceLROnPlateau(factor=0.1, patience=5, min_lr=1e-6)
]
train_steps = (len(x_train)//BATCH_SIZE) + 1
valid_steps = (len(x_valid)//BATCH_SIZE) + 1

# Training
history = model.fit(train_dataset,
          steps_per_epoch=train_steps,
          validation_steps=valid_steps,
          validation_data=(valid_dataset),
          epochs=EPOCHS,
          callbacks=callbacks
         )

In [None]:
history_frame = pd.DataFrame(history.history)
history_frame.loc[:, ['loss', 'val_loss']].plot()
history_frame.loc[:, ['acc', 'val_acc']].plot();

In [None]:
def decode_predictions(preds, top=5, class_list_path=None):
    results = []
    top_indices = (-preds).argsort()[0]
    for i in range(top):
        breed = id_to_breed.get(top_indices[i])
        prob = preds[0][top_indices[i]]
        results.append((breed, prob))
    return results

## Testing with friends and family dogs

In [None]:
images = glob(DOG_IMAGES_PATH)

for i in range(len(images)):
    img_path = images[i]
    display(Image(filename=img_path, width=SIZE, height=SIZE))
    x = read_image(img_path)
    x = np.expand_dims(x, axis=0)

    preds = model.predict(x)
    print('Predicted:', decode_predictions(preds, top=3))