In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
import tensorflow_hub as hub

In [None]:
labels_csv = pd.read_csv("../input/dog-breed-identification/labels.csv")
print(labels_csv.describe())
print(labels_csv.head())

In [None]:
# How many images are there of each breed?
labels_csv["breed"].value_counts().plot.bar(figsize = (25,10));

## Getting images and their labels

Since we've got the image ID's and their labels in a DataFrame (labels_csv), we'll use it to create:

* A list a filepaths to training images
* An array of all labels
* An array of all unique labels

We'll only create a list of filepaths to images rather than importing them all to begin with. This is because working with filepaths (strings) is much efficient than working with images.

In [None]:
from IPython.display import display,Image
#Image("../input/dog-breed-identification/train/000bec180eb18c7604dcecc8fe0dba07.jpg")

In [None]:
# Create pathnames from image ID's
filenames = ["../input/dog-breed-identification/train/"+fname+".jpg" for fname in labels_csv["id"]]

# Check the first 10 filenames
filenames[:10]

In [None]:
import os
os.listdir("../input/dog-breed-identification/train/")[:5]

In [None]:
# Check whether number of filenames matches number of actual image files
import os
if len(os.listdir("../input/dog-breed-identification/train/")) == len(filenames):
    print("Filenames match actual amount of files!")
else:
    print("Filenames do not match actual amount of files, check the target directory.")

In [None]:
# Check an image directly from a filepath
Image(filenames[9000])

In [None]:
# Now we've got our image filepaths together, let's get the labels.
# Take them from labels_csv and turn them into a NumPy array.

labels = np.array(labels_csv["breed"])
labels[:10]

we should have the same amount of images and labels.
Finally, since a machine learning model can't take strings as input (what labels currently is), we'll have to convert our labels to numbers.
To begin with, we'll find all of the unique dog breed names.
Then we'll go through the list of labels and compare them to unique breeds and create a list of booleans indicating which one is the real label (True) and which ones aren't (False).

In [None]:
# Find the unique label values
unique_breeds = np.unique(labels_csv["breed"])
len(unique_breeds)

In [None]:
# Turn every label into a boolean array

boolean_labels = [label == np.array(unique_breeds) for label in labels]
boolean_labels[:2]

In [None]:
# Example: Turning a boolean array into integers
print(labels[0])
print(np.where(unique_breeds == labels[0])[0][0]) # index where label occurs 
print(boolean_labels[0].argmax()) # index where label occurs in boolean array
print(boolean_labels[0].astype(int))

## Creating our own validation set

In [None]:
X = filenames
Y = boolean_labels

In [None]:
# Let's start experimenting with 1000 and increase it as we need.
# Set number of images to use for experimenting
NUM_IMAGES = 1000 

In [None]:
# Import train_test_split from Scikit-Learn
from sklearn.model_selection import train_test_split

# Split them into training and validation using NUM_IMAGES 
X_train,X_val,y_train,y_val = train_test_split(X[:NUM_IMAGES],Y[:NUM_IMAGES],test_size=0.2,random_state=42)
len(X_train) , len(X_val) , len(y_train) , len(y_val)

In [None]:
# Check out the training data (image file paths and labels)
X_train[:2],y_train[:2]

## Preprocessing images (turning images into Tensors)
To preprocess our images into Tensors , write a function which does a few things:

1. Takes an image filename as input.
1. Uses TensorFlow to read the file and save it to a variable, image.
1. Turn our image (a jpeg file) into Tensors.
1. Resize the image to be of shape (224, 224).
1. Return the modified image.

In [None]:
# Convert image to NumPy array
from matplotlib.pyplot import imread
image = imread(filenames[42]) # read in an image
image.shape

In [None]:
# convert it to a Tensor using tf.constant().
tf.constant(image)[:2]

In [None]:
# Define image size
IMG_SIZE = 224

def process_image(image_path):
    """
  Takes an image file path and turns it into a Tensor.
  """
    # Read in image file
    image = tf.io.read_file(image_path)
    # Turn the jpeg image into numerical Tensor with 3 colour channels (Red, Green, Blue)
    image = tf.image.decode_jpeg(image,channels = 3)
    # Convert the colour channel values from 0-225 values to 0-1 values
    image = tf.image.convert_image_dtype(image,tf.float32)
    # Resize the image to our desired size (224, 224)
    image = tf.image.resize(image,size = [IMG_SIZE,IMG_SIZE])
    return image

## Creating data batches

In [None]:
# Create a simple function to return a tuple (image, label)
def get_image_label(image_path,label):
    """
  Takes an image file path name and the associated label,
  processes the image and returns a tuple of (image, label).
  """
    image = process_image(image_path)
    return image,label

In [None]:
# Define the batch size
BATCH_SIZE=32

# Create a function to turn data into batches
def create_data_batches(x,y=None,batch_size=BATCH_SIZE,valid_data=False,test_data=False):
    """
    create batches of data out of image (x),label (y) pairs
    Shuffles the data if its training data but doesn't shuffle if its validation data.
    Also accepts the data as inputs(no labels) 
    """
    
    if test_data:
        print("Creating test data batches.....")
        data = tf.data.Dataset.from_tensor_slices((tf.constant(x))) #only filepaths
        data_batch = data.map(process_image).batch(BATCH_SIZE)
        return data_batch
    
    elif valid_data:
        print("Creating valid data batches.....")
        data = tf.data.Dataset.from_tensor_slices((tf.constant(x), #only filepaths
                                                  tf.constant(y))) #only labels
        data_batch = data.map(get_image_label).batch(BATCH_SIZE)
        return data_batch
    
    else:
        print("Creating train data batches")
        data = tf.data.Dataset.from_tensor_slices((tf.constant(x),tf.constant(y)))
        # Shuffling pathnames and labels before mapping image processor function is faster than shuffling images
        data = data.shuffle(buffer_size=len(x))
        
        data_batch = data.map(get_image_label).batch(BATCH_SIZE)
        return data_batch

In [None]:
# Create training and validation data batches
train_data = create_data_batches(X_train,y_train)
val_data = create_data_batches(X_val,y_val,valid_data=True)

In [None]:
# Check out the different attributes of our data batches
train_data.element_spec,val_data.element_spec

## Visualizing data batches

In [None]:
import matplotlib.pyplot as plt

# Create a function for viewing images in a data batch

def show_25_images(images,labels):
    plt.figure(figsize=(10,10))
    # Loop through 25 
    for i in range(25):
        # Create subplots (5 rows, 5 columns)
        ax=plt.subplot(5,5,i+1)
        # Display an image
        plt.imshow(images[i])
        # Add the image label as the title
        plt.title(unique_breeds[labels[i].argmax()])
        # Turn gird lines off
        plt.axis("off")

So to view data in a batch, we've got to unwind it.

We can do so by calling the `as_numpy_iterator()` method on a data batch.

This will turn our a data batch into something which can be iterated over.

Passing an iterable to `next()` will return the next item in the iterator.

In [None]:
# Visualize training images from the training data batch
train_images,train_labels = next(train_data.as_numpy_iterator())
show_25_images(train_images,train_labels)

In [None]:
# Visualize validation images from the validation data batch
val_images, val_labels = next(val_data.as_numpy_iterator())
show_25_images(val_images, val_labels)

## Creating and training a model

* We'll use an existing model from TensorFlow Hub.
* Using a pretrained machine learning model is often referred to as `transfer learning`.
*  mobilenet_v2_130_224 model,this model takes an input of images in the shape 224, 224 .The model has been trained in the domain of image classification.

## Building a model

Before we build a model, there are a few things we need to define:

* The input shape (images, in the form of Tensors) to our model.
* The output shape (image labels, in the form of Tensors) of our model.
* The URL of the model we want to use.

In [None]:
# Setup input shape to the model
INPUT_SHAPE = [None,IMG_SIZE,IMG_SIZE,3] #batch,height,width,channels

# Setup output shape of the model
OUTPUT_SHAPE = len(unique_breeds) # number of unique labels

# Setup model URL from TensorFlow Hub
MODEL_URL = "https://tfhub.dev/google/imagenet/mobilenet_v2_130_224/classification/5"

Let's create a function which:

* Takes the input shape, output shape and the model we've chosen's URL as parameters.
* Defines the layers in a Keras model in a sequential fashion (do this first, then this, then that).
* Compiles the model (says how it should be evaluated and improved).
* Builds the model (tells it what kind of input shape it'll be getting).
* Returns the model.

In [None]:
# Create a function which builds a Keras model
def create_model(input_shape=INPUT_SHAPE,output_shape=OUTPUT_SHAPE,model_url=MODEL_URL):
    print("Building a model with:",MODEL_URL)
    
    # Setup the model layers
    model = tf.keras.Sequential([
        hub.KerasLayer(MODEL_URL), #Layer1 (input layer)
        tf.keras.layers.Dense(units=OUTPUT_SHAPE,activation="softmax") # Layer 2 (output layer)
    ])
    
    # Compile the model
    model.compile(
        loss = tf.keras.losses.CategoricalCrossentropy(),
        optimizer = tf.keras.optimizers.Adam(),
        metrics=["accuracy"]
    )
    
    # Build the model
    model.build(INPUT_SHAPE)
    
    return model

What's happening here?
### Setting up the model layers

The first layer we use is the model from TensorFlow Hub `hub.KerasLayer(MODEL_URL)`. So our first layer is actually an entire model (many more layers). This **input layer** takes in our images and finds patterns in them based on the patterns `mobilenet_v2_130_224` has found.

The next layer (`tf.keras.layers.Dense()`) is the **output layer** of our model. It brings all of the information discovered in the input layer together and outputs it in the shape we're after, 120 (the number of unique labels we have).

The `activation="softmax"` parameter tells the output layer, we'd like to assign a probability value to each of the 120 labels somewhere between 0 & 1. The higher the value, the more the model believes the input image should have that label. If we were working on a binary classification problem, we'd use `activation="sigmoid"`.

### Compiling the model

* **loss** - Getting to 0 means the model is learning perfectly.
* **optimizer** -Adam is the optimizer,the one telling you how to lower the loss function.Other optimizers include RMSprop and Stochastic Gradient Descent.
* **metrics** - Giving the accuracy of how well our model is predicting the correct image label.

### Building the model

We use `model.build()` whenever we're using a layer from TensorFlow Hub to tell our model what input shape it can expect.

In this case, the input shape is **[None, IMG_SIZE, IMG_SIZE, 3] or [None, 224, 224, 3] or [batch_size, img_height, img_width, color_channels]**.

Batch size is left as `None` as this is inferred from the data we pass the model. In our case, it'll be 32.

We can call `summary()` on our model to get idea of what our model looks like.

The non-trainable parameters are the patterns learned by `mobilenet_v2_130_224` and the trainable parameters are the ones in the dense layer.

In [None]:
# Create a model and check its details
model = create_model()
model.summary()

## Creating callbacks

Callbacks are helper functions a model can use during training to do things such as save a models progress, check a models progress or stop training early if a model stops improving.

### Early Stopping Callback

Early stopping helps prevent overfitting by stopping a model when a certain evaluation metric stops improving. If a model trains for too long, it can do so well at finding patterns in a certain dataset that it's not able to use those patterns on another dataset it hasn't seen before (doesn't generalize).

In [None]:
# Create early stopping (once our model stops improving, stop training)
early_stopping = tf.keras.callbacks.EarlyStopping(monitor="val_accuracy",
                                                  patience=3) # stops after 3 rounds of no improvements

## Training a model(subset of 1000 images)

In [None]:
# How many rounds should we get the model to look through the data?
NUM_EPOCHS = 100

In [None]:
# Build a function to train and return a trained model
def train_model():
    """
  Trains a given model and returns the trained version.
  """
    # create a model
    model = create_model()
    
    # Fit the model to the data passing it the callbacks we created
    
    model.fit(x=train_data,
             epochs=NUM_EPOCHS,
             validation_data=val_data,
             validation_freq=1, # check validation metrics every epoch
             callbacks = [early_stopping])
    
    return model

In [None]:
# Fit the model to the data
model = train_model()

## Making and evaluating predictions using a trained model

In [None]:
# Make predictions on the validation data

predictions = model.predict(val_data,verbose=1) # verbose shows us how long there is to go
predictions

In [None]:
# Check the shape of predictions
predictions.shape

In this case, making predictions on the validation data (200 images) returns an array (predictions) of arrays, each containing 120 different values (one for each unique dog breed).

These different values are the probabilities or the likelihood the model has predicted a certain image being a certain breed of dog. 

In [None]:
# First prediction
print(predictions[0])
print(f",max value (probability of predictions): {np.max(predictions[0])}") # the max probability value predicted by the model
print(f"Sum:{np.sum(predictions[0])}") #because we used softmax activation in our model, this will be close to 1
print(f"Max Index: {np.argmax(predictions[0])}") # the index of where the max value in predictions[0] occurs
print(f"Predicted label : {unique_breeds[np.argmax(predictions[0])]}")

In [None]:
# Turn prediction probabilities into their respective label (easier to understand)
def get_pred_label(prediction_probabilities):
    """
  Turns an array of prediction probabilities into a label.
  """
    return unique_breeds[np.argmax(prediction_probabilities)]

# Get a predicted label based on an array of prediction probabilities

pred_label = get_pred_label(predictions[0])
pred_label

Since our validation data (`val_data`) is in batch form, to get a list of validation images and labels, we'll have to unbatch it (`using unbatch()`) and then turn it into an iterator using `as_numpy_iterator()`

In [None]:
# Create a function to unbatch a batched dataset
def unbatchify(data):
    """
  Takes a batched dataset of (image, label) Tensors and returns separate arrays
  of images and labels.
  """
    images = []
    labels = []
    
    # Loop through unbatched data
    
    for image , label in data.unbatch().as_numpy_iterator():
        images.append(image)
        labels.append(unique_breeds[np.argmax(label)])
    return images,labels                  
                      
# Unbatchify the validation data
    
val_images,val_labels = unbatchify(val_data)
val_images[0],val_labels[0]                      

The first function will:

* Take an array of prediction probabilities, an array of truth labels, an array of images and an integer.
* Convert the prediction probabilities to a predicted label.
* Plot the predicted label, its predicted probability, the truth label and target image on a single plot.

In [None]:
def plot_pred(prediction_probabilities, labels, images, n=8):
  """
  View the prediction, ground truth label and image for sample n.
  """
  pred_prob, true_label, image = prediction_probabilities[n], labels[n], images[n]
  
  # Get the pred label
  pred_label = get_pred_label(pred_prob)
  
  # Plot image & remove ticks
  plt.imshow(image)
  plt.xticks([])
  plt.yticks([])

  # Change the color of the title depending on if the prediction is right or wrong
  if pred_label == true_label:
    color = "green"
  else:
    color = "red"

  plt.title("{} {:2.0f}% ({})".format(pred_label,
                                      np.max(pred_prob)*100,
                                      true_label),
                                      color=color)

In [None]:
# View an example prediction, original image and truth label
plot_pred(prediction_probabilities=predictions,labels=val_labels,images=val_images)

The function will:

* Take an input of a prediction probabilities array, a ground truth labels array and an integer.
* Find the predicted label using get_pred_label().
* Find the top 10:
  * Prediction probabilities indexes
  * Prediction probabilities values
  * Prediction labels
* Plot the top 10 prediction probability values and labels, coloring the true label green.

In [None]:
def plot_pred_conf(prediction_probabilities,labels,n=1):
    """
  Plots the top 10 highest prediction confidences along with
  the truth label for sample n.
  """
    pred_prob,true_label = prediction_probabilities[n],labels[n]
    
    # Get the predicted label
    pred_label = get_pred_label(pred_prob)
    
    # Find the top 10 prediction confidence indexes
    top_10_pred_indexes = pred_prob.argsort()[-10:][::-1]
    # Find the top 10 prediction confidence values
    top_10_pred_values = pred_prob[top_10_pred_indexes]
    # Find the top 10 prediction labels
    top_10_pred_labels = unique_breeds[top_10_pred_indexes]
    
    # Setup plot
    top_plot = plt.bar(np.arange(len(top_10_pred_labels)),
                      top_10_pred_values,
                      color="grey")
    
    plt.xticks(np.arange(len(top_10_pred_labels)),
              labels=top_10_pred_labels,
              rotation="vertical")
    
    # Change color of true label
    if np.isin(true_label,top_10_pred_labels):
        top_plot[np.argmax(top_10_pred_labels == true_label)].set_color('green')
    else:    
         pass
    

In [None]:
plot_pred_conf(prediction_probabilities = predictions,labels=val_labels,n=9)

In [None]:
# Let's check a few predictions and their different values
i_multiplier = 0
num_rows = 3
num_cols = 2
num_images = num_rows*num_cols
plt.figure(figsize=(5*2*num_cols,5*num_rows))
for i in range(num_images):
    plt.subplot(num_rows,2*num_cols,2*i+1)
    plot_pred(prediction_probabilities=predictions,
            labels=val_labels,
            images=val_images,
            n=i+i_multiplier)
    
    plt.subplot(num_rows,2*num_cols,2*i+2)
    plot_pred_conf(prediction_probabilities=predictions,
                labels=val_labels,
                n=i+i_multiplier)
plt.tight_layout(h_pad=1.0)
plt.show()    

## Saving and reloading a model

In [None]:
def save_model(model,suffix=None):
    print("Saving model...")
    model.save("model_"+suffix+".h5")

In [None]:
def load_model(model_path):
    print("Loading saved model......")
    model = tf.keras.models.load_model(model_path,
                                      custom_objects={"KerasLayer":hub.KerasLayer})
    return model

In [None]:
# Save our model trained on 1000 images
save_model(model,suffix="1000-images-Adam")

In [None]:
# Load our model trained on 1000 images
model_1000_images = load_model("./model_1000-images-Adam.h5")

In [None]:
# Evaluate the pre-saved model
model.evaluate(val_data)

In [None]:
# Evaluate the loaded model
model_1000_images.evaluate(val_data)

## Training a model (on the full data)

In [None]:
len(X), len(Y)

In [None]:
# Turn full training data in a data batch
full_data=create_data_batches(X,Y)

In [None]:
# Instantiate a new model for training on the full dataset
full_model = create_model()

In [None]:
# Create full model callbacks
# Early stopping callback
# Note: No validation set when training on all the data, therefore can't monitor validation accruacy
full_model_early_stopping = tf.keras.callbacks.EarlyStopping(monitor="accuracy",patience=3)

In [None]:
# Fit the full model to the full training data
full_model.fit(x=full_data,
              epochs=NUM_EPOCHS,
              callbacks=[full_model_early_stopping])

## Saving and reloading the full model

In [None]:
# Save model to file
save_model(full_model,suffix="all-image-Adam")

In [None]:
# Load in the full model
loaded_full_model = load_model("./model_all-image-Adam.h5")

## Making predictions on the test dataset

To make predictions on the test data, we'll:

* Get the test image filenames.
* Convert the filenames into test data batches using `create_data_batches()` and setting the `test_data` parameter to True (since there are no labels with the test images).
* Make a predictions array by passing the test data batches to the `predict()` function.

In [None]:
test_filenames = ["../input/dog-breed-identification/test/"+fname for fname in os.listdir("../input/dog-breed-identification/test")]
test_filenames[:10]

In [None]:
# How many test images are there?
len(test_filenames)

In [None]:
# Create test data batch
test_data = create_data_batches(test_filenames, test_data=True)

In [None]:
# Make predictions on test data batch using the loaded full model
test_predictions = loaded_full_model.predict(test_data,
                                             verbose=1)

In [None]:
# Check out the test predictions
test_predictions[:1]

## Making predictions on custom images

If we want to make predictions on our own custom images, we have to pass them to the model in the same format the model was trained on.

To do so, we'll:

Get the filepaths of our own images.
Turn the filepaths into data batches using `create_data_batches()`. And since our custom images won't have labels, we set the `test_data` parameter to `True`.
Pass the custom image data batch to our model's `predict()` method.
Convert the prediction output probabilities to prediction labels.
Compare the predicted labels to the custom images.

In [None]:
# Get custom image filepaths
custom_filenames = ["../input/dogimages2/"+fname for fname in os.listdir("../input/dogimages2")]
custom_filenames[:3]

In [None]:
# Turn custom image into batch (set to test data because there are no labels)
custom_data = create_data_batches(custom_filenames, test_data=True)

In [None]:
# Make predictions on the custom data
custom_preds = loaded_full_model.predict(custom_data)

In [None]:
# Get custom image prediction labels
custom_preds_labels = [get_pred_label(custom_preds[i]) for i in range(len(custom_preds))] 

In [None]:
# Get custom images
custom_images=[]
# Loop through unbatched data
for images in custom_data.unbatch().as_numpy_iterator():
    custom_images.append(images)

In [None]:
# Check custom image predictions
plt.figure(figsize=(10,10))
for i,image in enumerate(custom_images):
    plt.subplot(1,3,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.title(custom_preds_labels[i])
    plt.imshow(image)