In [1]:
%load_ext autoreload
%autoreload 2
    
import so_ml_tools as soml

import tensorflow as tf
import matplotlib.pyplot as plt

from keras import Sequential, Model
from keras.optimizers import Adam
from keras.layers import Dense, RandomFlip, RandomZoom, RandomHeight, \
    RandomWidth, RandomRotation, Input, GlobalAveragePooling2D
from keras.losses import categorical_crossentropy
from keras.utils import image_dataset_from_directory

2024-01-30 16:20:36.823088: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-01-30 16:20:36.823109: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-01-30 16:20:36.823127: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


## Transfer learning with TensorFlow Part 3: Scaling up (Food vision mini)

We've seen the power of transfer learning feature extraction and fine-tuning, now it's time to scale up to all of the classes in Food101 (101 total classes of food).

Our goal is to beat the original Food101 paper with 10% of the training (leveraging our power of deep learning).

## 101 Food Classes: working with less data

The data we're downloading comes from the origial food 101-dataset but has been preprocessed using the image_data_modification notebook - https://github.com/mrdbourke/tensorflow-deep-learning/blob/main/extras/image_data_modification.ipynb

In [2]:
soml.util.io.download_file(source='https://storage.googleapis.com/ztm_tf_course/food_vision/101_food_classes_10_percent.zip', filepath='./101_food_classes_10_percent.zip')
soml.util.io.extract_zip(filepath='./101_food_classes_10_percent.zip', folder='./data')

train_dir = './data/101_food_classes_10_percent/train'
test_dir = './data/101_food_classes_10_percent/test'

Download of https://storage.googleapis.com/ztm_tf_course/food_vision/101_food_classes_10_percent.zip skipped, file ./101_food_classes_10_percent.zip exists.
Extraction of ./101_food_classes_10_percent.zip started.
Extraction of ./101_food_classes_10_percent.zip completed.


In [3]:
soml.util.io.list_dir_summary(folder='./data/101_food_classes_10_percent')

There are 2 directories, and 0 in ./data/101_food_classes_10_percent
There are 101 directories, and 0 in ./data/101_food_classes_10_percent/test
There are 0 directories, and 250 in ./data/101_food_classes_10_percent/test/fish_and_chips
There are 0 directories, and 250 in ./data/101_food_classes_10_percent/test/poutine
There are 0 directories, and 250 in ./data/101_food_classes_10_percent/test/caprese_salad
There are 0 directories, and 250 in ./data/101_food_classes_10_percent/test/gnocchi
There are 0 directories, and 250 in ./data/101_food_classes_10_percent/test/omelette
There are 0 directories, and 250 in ./data/101_food_classes_10_percent/test/bread_pudding
There are 0 directories, and 250 in ./data/101_food_classes_10_percent/test/shrimp_and_grits
There are 0 directories, and 250 in ./data/101_food_classes_10_percent/test/carrot_cake
There are 0 directories, and 250 in ./data/101_food_classes_10_percent/test/pancakes
There are 0 directories, and 250 in ./data/101_food_classes_10_pe

In [4]:
# Setup data inputs
IMG_SIZE = (224, 224)

train_data_all_10_percent = image_dataset_from_directory(directory=train_dir,
                                                         label_mode='categorical',
                                                         image_size=IMG_SIZE,
                                                         batch_size=16)

test_data_all_10_percent = image_dataset_from_directory(directory=test_dir,
                                                         label_mode='categorical',
                                                         image_size=IMG_SIZE,
                                                         batch_size=16,
                                                         shuffle=False) # Don't shuffle test data for prediction analysis

Found 7575 files belonging to 101 classes.


2024-01-30 16:20:44.448168: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:268] failed call to cuInit: CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE: forward compatibility was attempted on non supported HW
2024-01-30 16:20:44.448261: E tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:312] kernel version 535.146.2 does not match DSO version 535.154.5 -- cannot find working devices in this configuration


Found 25250 files belonging to 101 classes.


# Train a big dog model with transfer learning on 10% of 101 Food classes.

Here are the steps we are going to do first.
* Create a ModelCheckpoint callback
* Create a data augmentation layer to build data augmentation right into the model
* Build a headless (no top layers) Functional EfficientNetB0 backboned-model (we'll create our own output layer)
* Compile our model
* Feature extract for 5 full passes (5 epochs on the train dataset and validate on 15% of the test data, to save epoch time)

In [5]:
# Create checkpoint fallback
model_checkpoint_callback = soml.tf.fit.callback.model_checkpoint_callback(
    experiment_name="101_classes_10_percent_data_model_checkpoint",
    save_weights_only=True,
    metric='val_accuracy',
    save_best_only=True)

In [6]:
# Create data augmentation layer
data_augementation_layer = Sequential([
    RandomFlip("horizontal"),
    RandomRotation(.2),
    RandomHeight(.2),
    RandomWidth(.2),
    RandomZoom(.2),
    # Rescaling(1/255.) # Only neccesary for some models, but not for EfficientNetB0, But neccesary for ResNet50V2
], name="data_augmentation")

In [7]:
# Setup base model and freeze its layers (this will extract features)
base_model = tf.keras.applications.efficientnet_v2.EfficientNetV2B0(include_top=False)
base_model.trainable = False

# Setup model architecture with trainable layers on top
input_layer = Input(shape=IMG_SIZE + (3,), name="input_layer")
x = data_augementation_layer(input_layer) # Augment images
x = base_model(x, training=False) # Put the base model in inference mode so weights need to stay frozen
x = GlobalAveragePooling2D(name="global_average_pooling_2d")(x)
output_layer = Dense(len(train_data_all_10_percent.class_names), name="output_layer", activation="softmax")(x)

model = Model(input_layer, output_layer)

In [8]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_layer (InputLayer)    [(None, 224, 224, 3)]     0         
                                                                 
 data_augmentation (Sequent  (None, None, None, 3)     0         
 ial)                                                            
                                                                 
 efficientnetv2-b0 (Functio  (None, None, None, 1280   5919312   
 nal)                        )                                   
                                                                 
 global_average_pooling_2d   (None, 1280)              0         
 (GlobalAveragePooling2D)                                        
                                                                 
 output_layer (Dense)        (None, 101)               129381    
                                                             

In [9]:
# Compile the model
model.compile(loss=categorical_crossentropy,
                optimizer=Adam(),
                metrics=["accuracy"])

In [10]:
# Fit the model
history_all_classes_10_percent = model.fit(train_data_all_10_percent,
                                           steps_per_epoch=len(train_data_all_10_percent),
                                           epochs=5,
                                           validation_data=test_data_all_10_percent,
                                           validation_steps=int(0.15 * len(test_data_all_10_percent)), # Validate on only 15% of test data
                                           callbacks=[model_checkpoint_callback])

Epoch 1/5
 84/474 [====>.........................] - ETA: 25s - loss: 4.3695 - accuracy: 0.0818

KeyboardInterrupt: 

In [None]:
fine_tuning_results = model.evaluate(test_data_all_10_percent)
fine_tuning_results

In [None]:
soml.tf.model.plot.plot_history(history=history_all_classes_10_percent)

Seems to be overfitting, the validation accuracy is flattening but the training accuracy is still increasing.

## Fine-tuning

In [None]:
# Unfreeze all of the layers in the baseline.
base_model.trainable = False

soml.tf.model.layer.set_trainable_on_last_n_layers(model=base_model, n=5, trainable=True)

soml.tf.model.layer.list_model(model=base_model)

In [None]:
# Recompile model with lower learning rate
model.compile(loss=categorical_crossentropy,
              optimizer=Adam(learning_rate=0.0001),
              metrics=["accuracy"])

In [None]:
# Fine tune for 5 more epochs.

fine_tune_epochs = 10 # Model has already done 5 epochs (feature extraction), this is the total number of epochs we're after (feature extraction is 5, fine-tune is 5 so 10 in total


history_all_classes_10_percent_fine_tune = model.fit(train_data_all_10_percent,
                                                     steps_per_epoch=len(train_data_all_10_percent),
                                                     epochs=fine_tune_epochs,
                                                     initial_epoch=history_all_classes_10_percent.epoch[-1] + 1,
                                                     validation_data=test_data_all_10_percent,
                                                     validation_steps=int(0.15 * len(test_data_all_10_percent)))

In [None]:
all_classes_10_percent_fine_tune_results = model.evaluate(test_data_all_10_percent)
all_classes_10_percent_fine_tune_results

In [None]:
soml.tf.model.plot.plot_consecutive_histories(histories=[history_all_classes_10_percent,history_all_classes_10_percent_fine_tune], labels=["base", "fine-tune"] )

## Saving and loading our model

To use our model in an external application we'll need to save it and export it somewhere.

In [None]:
# Save our fine tuned model
model.save("./models/101_food_classes_10_percent_saved_big_dog_model", save_format="h5")

# This will fail, due to a bug that is still present in TF version 2.12.0

In [None]:
# Load and evaluate saved model
loaded_model = tf.keras.models.load_model("./models/101_food_classes_10_percent_saved_big_dog_model")

In [None]:
# Evaluate loaded model and compare performance to pre-saved model
loaded_model_result = loaded_model.evaluate(test_data_all_10_percent)
loaded_model_result

In [None]:
# The results from our loaded model (above) should be very similar to the results below.
all_classes_10_percent_fine_tune_results

In [None]:
soml.util.io.download_file(source='https://storage.googleapis.com/ztm_tf_course/food_vision/06_101_food_class_10_percent_saved_big_dog_model.zip', filepath='./06_101_food_class_10_percent_saved_big_dog_model.zip')
soml.util.io.extract_zip(filepath='./06_101_food_class_10_percent_saved_big_dog_model.zip')
model = tf.keras.models.load_model('./data/06_101_food_class_10_percent_saved_big_dog_model')

In [None]:
loaded_accuracy = model.evaluate(test_data_all_10_percent)

In [None]:
loaded_accuracy

In [None]:
# Make predictions with model
pred_probs = model.predict(test_data_all_10_percent, verbose=1) # set verbosity to see how long it will take 

In [None]:
# How many predictions are there?
len(pred_probs)

In [None]:
# What's the shape of our predictions?
pred_probs.shape

In [None]:
# How do they look?
pred_probs[:10]

In [None]:
# We get one prediction probability per class
print(f"Number of prediction probabilities for sample 0: {len(pred_probs[0])}")
print(f"What prediction probability sample 0 looks like:\n {pred_probs[0]}")
print(f"The class with the highest predicted probability by the model for sample 0: {pred_probs[0].argmax()}")

In [None]:
# Note: This might take a minute or so due to unravelling 790 batches
y_labels = soml.tf.dataset.get_labels(dataset=test_data_all_10_percent)

In [None]:
pred_classes = pred_probs.argmax(axis=1)
pred_classes

In [None]:
len(y_labels)

In [None]:
# Get accuracy score by comparing predicted classes to ground truth labels
from sklearn.metrics import accuracy_score
sklearn_accuracy = accuracy_score(y_labels, pred_classes)
sklearn_accuracy

In [None]:
# Does the evaluate method compare to the Scikit-Learn measured accuracy?
import numpy as np

# Remember the loaded_accurac consists of two values, second value is the accuracy.
print(f"Close? {np.isclose(loaded_accuracy[1], sklearn_accuracy)} | Difference: {loaded_accuracy[1] - sklearn_accuracy}")

In [None]:
# Get the class names
class_names = test_data_all_10_percent.class_names
class_names

In [None]:
soml.multiclass.plot.confusion_matrix(y_true=y_labels, y_pred=pred_probs, class_names=class_names, figsize=(100, 100), text_size=20)

## Let's keep the evaluation train going, time for a classification report

Scikit-learn has a helpful function for acquiring many different classification metrics per class (e.g. precision, recall and F1) called classifiction report.

https://scikit-learn.org/stable/modules/generated/sklearn.metrics.classification_report.html

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_true=y_labels, y_pred=pred_classes, target_names=class_names))

The numbers above give a greate class-by-class evaluation of our model's predictions but with so many classes, they're quite hard to understand.

Let's visualize the data.

## Let's plot all of our classes F1-scores...

In [None]:
soml.multiclass.plot.f1_score_chart(y_true=y_labels, y_pred=pred_probs, class_names=class_names, figsize=(8,40))

## Visualizing predictions on custom images

Now, this is the real test, how does our model go on food images not even in our test dataset (images of our own)

To visualize our model's predictions on our own images we'l need a function to load and preprocess images, specifically it will need to:
* Read in a target image filepath using t.f.io.read_file_path
* Turn the image into a Tensor using tf.io.decode_image
* Resize the image tensor to be the same size as the images our model has trained on using tf.image.rezise()
* Scale the image to get all of the pixel values between 0 & 1 (if necessary)

In [None]:
# See function definition in mli.load_image_as_tensor

Now we've got our function to load and prepare target images, let's now write some code to visualize inmages, their target label and our model's predictions.

Specifically, we'll write some code to:
1. Load a few random images from the test dataset
2. Make predictions on the loaded images
3. Plot the original image(s) along with the model's predictions, prediction probability and truth label

In [None]:
# Make preds on a series of images
import os
import random

plt.figure(figsize=(17, 10))
for i in range(3):
  # Choose a random image from a random class
  class_name = random.choice(class_names)
  filename = random.choice(os.listdir(os.path.join(test_dir, class_name)))
  filepath = os.path.join(test_dir, class_name, filename)
  
  # Load the image and make predictions
  img = soml.util.io.load_image_as_tensor(filename=filepath, scale=False)
  pred_prob = model.predict(tf.expand_dims(img, axis=0)) # Get prediction probabilities array
  pred_class = class_names[pred_prob.argmax()] # Get highest probability index
  
  # Plot images
  rows = 1
  cols = 3
  plt.subplot(rows, cols, i+1)
  plt.imshow(img)
  if class_name == pred_class: # If predicted class matches true class make tekst green
    title_color = "g"
  else:
    title_color = "r"
  plt.title(f"actual: {class_name}, pred: {pred_class}, prob: {pred_prob.max():.2f}", c=title_color)
  plt.axis(False)


## Finding the most wrong predictions

To find out where our model is most wrong, let's write some code to find out the following:
1. Get all of the image file paths in the test dataset using list_files() method.
2. Create a pandas DataFrame of the image filepaths, grund truth labels, predicted classes (from our model), max predictions probabilities, ground truth class names.
3. Use our DataFrame to find all the wrong predictions (where the ground truth label doesn';t math the prediction).
4. Sort the DataFrame based on wrong predictions (have the highest prediction probability predictions at the top)./
5. Visualize the images with the highest prediction probabilities but have the wrong predction.

In [None]:
# Get all of the image file paths in the test dataset.
import os

filepaths = []
for filepath in test_data_all_10_percent.list_files(os.path.join(test_dir, '*', '*.jpg'), shuffle=False):
  filepaths.append(filepath.numpy())

filepaths[:10]

In [None]:
import pandas as pd
pred_pd = pd.DataFrame({"image_path": filepaths,
                        "y_true": y_labels,
                        "y_pred": pred_classes,
                        "pred_conf": pred_probs.max(axis=1),
                        "y_true_classname" : [class_names[i] for i in y_labels],
                        "y_pred_classn": [class_names[i] for i in pred_classes]})

pred_pd

In [None]:
# 3. Find out in our DataFrame which predictions are wrong
pred_pd["pred_correct"] = pred_pd["y_true"] == pred_pd["y_pred"]
pred_pd.head()

In [None]:
# 4. Sort our dataframe to have most wrong predictions at the top.
pred_pd_top_100 = pred_pd[pred_pd["pred_correct"] == False ].sort_values("pred_conf", ascending=False)[:100]
pred_pd_top_100

In [None]:
# 5. Visualize the test data samples which have the wrong prediction but highes pred probability.
# Make preds on a series of images
images_to_view = 9
start_index = 0
plt.figure(figsize=(20, 10))
for i, row in enumerate(pred_pd_top_100[start_index:start_index+images_to_view].itertuples()):
  plt.subplot(3, 3, i+1)
  img = soml.util.io.load_image_as_tensor(row[1], scale=False)
  _, _, _, _, pred_prob, y_true_classname, y_pred_classname, _ = row # Only interested in a few parameters of each row.
  plt.imshow(img)
  plt.title(f"actual: {y_true_classname}, pred: {y_pred_classname}, prob: {pred_prob:.2f}")
  plt.axis(False)

## Test out the big dog model on our own custom images

In [None]:
# Get custom images
soml.util.io.download_file(source='https://storage.googleapis.com/ztm_tf_course/food_vision/custom_food_images.zip',
                  filepath='custom_food_images.zip')
soml.util.io.extract_zip(filepath='custom_food_images.zip')
soml.util.io.list_dir_summary(folder='custom_food_images')

In [None]:
# Get the custom food images file path
custom_food_images = ["custom_food_images/" + img_path for img_path in os.listdir("/content/custom_food_images")]
custom_food_images

In [None]:
# Make and plot custom food images
for filename in custom_food_images:
  img = soml.util.io.load_image_as_tensor(filename, scale=False)
  pred_prob = model.predict(tf.expand_dims(img, axis=0)) # Make prediction on image with shape [1, 224, 224, 3]
  pred_class = class_names[pred_prob.argmax()] # Get the index with the highest probability
  # Plot the appropriate information
  plt.figure()
  plt.imshow(img)
  plt.title(f"pred: {pred_class}, probs: {pred_prob.max():.2f}")
  plt.axis(False)




## Assignment: Perform first model but with mixed_precision enabled

In [None]:
from keras import mixed_precision
from keras.layers import Activation

# Configure system to use mixed-precision
policy = mixed_precision.Policy("mixed_float16")
mixed_precision.set_global_policy(policy)

# Setup base model and freeze its layers (this will extract features)
base_model = tf.keras.applications.efficientnet_v2.EfficientNetV2B0(include_top=False)
base_model.trainable = False

# Setup model architecture with trainable layers on top
input_layer = Input(shape=IMG_SIZE + (3,), name="input_layer")
x = data_augementation_layer(input_layer) # Augment images
x = base_model(x, training=False) # Put the base model in inference mode so weights need to stay frozen
x = GlobalAveragePooling2D(name="global_average_pooling_2d")(x)

# Very important, when using mixed precision, make sure the activation is done seperatly especially on
# the last layer.
#
# see: https://www.tensorflow.org/guide/mixed_precision
x = Dense(len(train_data_all_10_percent.class_names), name="output_layer")(x)
output_layer = Activation('softmax', dtype='float32')(x)

model = Model(input_layer, output_layer)

# Compile the model
model.compile(loss=categorical_crossentropy,
                optimizer=Adam(),
                metrics=["accuracy"])

model.summary()

In [None]:
# Fit the model
history_all_classes_10_percent = model.fit(train_data_all_10_percent,
                                           steps_per_epoch=len(train_data_all_10_percent),
                                           epochs=5,
                                           validation_data=test_data_all_10_percent,
                                           validation_steps=int(0.15 * len(test_data_all_10_percent)), # Validate on only 15% of test data
                                           callbacks=[model_checkpoint_callback])

In [None]:
# Evaluate teh model                                           
mixed_precision_results = model.evaluate(test_data_all_10_percent)
mixed_precision_results

In [None]:
# compare with fine tuning results
fine_tuning_results

In [None]:
soml.tf.model.plot.plot_history(history=history_all_classes_10_percent)