# Imports and data batching

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import pathlib
import random
import os
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.layers.experimental import preprocessing

In [None]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

In [None]:
dataset_dir = "../input/pistachio-image-dataset/Pistachio_Image_Dataset/Pistachio_Image_Dataset/"

In [None]:
# For replicable results
SEED = 0
tf.random.set_seed(SEED)
np.random.seed(SEED)

In [None]:
# Size of the images is (600,600). This is the default input size for EfficientNetB7 
IMAGE_SIZE = (600, 600)
# Default batch size
BATCH_SIZE = 32
# Images are grayscale
COLOR_MODE = "rgb"
# 20% test split
VAL_SPLIT = 0.2
# Number of batches for a smaller train dataset
SMALL_DATASET_BATCHES = 5

In [None]:
train_data_all = tf.keras.preprocessing.image_dataset_from_directory(
    dataset_dir,
    label_mode='categorical',
    validation_split=VAL_SPLIT,
    subset="training",
    seed=SEED,
    color_mode=COLOR_MODE,
    image_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
)
test_data = tf.keras.preprocessing.image_dataset_from_directory(
    dataset_dir,
    validation_split=VAL_SPLIT,
    subset="validation",
    label_mode='categorical',
    seed=SEED,
    color_mode=COLOR_MODE,
    image_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
)

In [None]:
# small train dataset with BATCH_SIZE*SMALL_DATASET_BATCHES images
train_data_small = train_data_all.take(SMALL_DATASET_BATCHES)

In [None]:
class_names = train_data_all.class_names
class_names

# Explore Random images

In [None]:
def view_random_image(target_dir, target_class):
    target_folder = target_dir + target_class
    random_image = random.sample(os.listdir(target_folder), 1)
    img = mpimg.imread(target_folder + "/" + random_image[0])
    plt.imshow(img)
    plt.title(target_class)
    plt.axis("off");
    print(f"Image shape: {img.shape}")
    return img

In [None]:
img = view_random_image(dataset_dir, class_names[0])

In [None]:
img = view_random_image(dataset_dir, class_names[1])

# Baseline Model

In [None]:
base_model = tf.keras.applications.EfficientNetB7(include_top=False)
base_model.trainable = False
inputs = tf.keras.layers.Input(shape=(IMAGE_SIZE + (3,)), name="input_layer")
# Efficient net model has the normalizing layer builtin
x = base_model(inputs)
x = tf.keras.layers.GlobalAveragePooling2D(name="global_average_pooling_layer")(x)
outputs = tf.keras.layers.Dense(2, activation="softmax", name="output_layer")(x)

In [None]:
model_0 = tf.keras.Model(inputs, outputs)

In [None]:
model_0.compile(loss="categorical_crossentropy", 
                optimizer="adam", 
                metrics=["accuracy"])

In [None]:
EPOCHS = 10
history_0 = model_0.fit(train_data_small,
                      epochs=EPOCHS,
                      validation_data=test_data,
                      validation_steps=int(0.1 * len(test_data)),
                      verbose=True,
                      )

## Evaluation

In [None]:
pd.DataFrame(history_0.history).plot(figsize=(10, 7));

In [None]:
loss_0, acc_0 = model_0.evaluate(test_data)

# Tentative model #1: Introducing data augmentation

In this test model we'll add a data augmentation layer before feeding the model with training images, this wi'll provide the model with more and more varied data to learn from

## Model creation

In [None]:
data_augmentation = tf.keras.Sequential([
                                         preprocessing.RandomFlip("horizontal_and_vertical"),  
                                         preprocessing.RandomRotation(1),
                                         preprocessing.RandomZoom(0.1),
], name="data_augmentation")

In [None]:
base_model = tf.keras.applications.EfficientNetB7(include_top=False)
base_model.trainable = False
inputs = tf.keras.layers.Input(shape=(IMAGE_SIZE + (3,)), name="input_layer")
x = data_augmentation(inputs)
x = base_model(x, training=False)
x = tf.keras.layers.GlobalAveragePooling2D(name="pooling_layer")(x)
outputs = tf.keras.layers.Dense(2, activation="softmax", name="output_layer")(x)

In [None]:
model_1 = tf.keras.Model(inputs, outputs)
model_1.compile(loss="categorical_crossentropy",
                optimizer="adam",
                metrics=["accuracy"])

In [None]:
history_1 = model_1.fit(train_data_small,
                      epochs=EPOCHS,
                      validation_data=test_data,
                      validation_steps=int(0.1 * len(test_data)),
                      verbose=True,
                      )

## Evaluation

In [None]:
pd.DataFrame(history_1.history).plot(figsize=(10, 7));

In [None]:
loss_1, acc_1 = model_1.evaluate(test_data)

Model 1 verdict: The accuracy is only slightly better than without data augmentation, but giving the nature of the images there's no harm in keeping it as the model will be exposed to perfectly valid example images. this will vastly increase the number of training images and will help the model to learn what patterns make each class unique, instead of learning the training images. The loss and validation loss keep going down after the first epoches, meaning there's still margin to improve accuracy when giving the model more training time.

 * Image augmentation pros:
  * Virtually infinite images to train with
  * Avoids overfitting

 * Image augmentation cons:
  * Increased processing time
  * Slower learning

# Tentative model #2: Adjusting the learning rate

To find a good learning rate we're going to fit the baseline model scheduling an increasingly bigger learning rate from 0.0001 to 0.1.
Analyzing the losses for each learning rate we can pick the optimal learning rate to minimize the maximum loss per epoch

## Ideal learning rate estimation

In [None]:
# same model as model_0
base_model = tf.keras.applications.EfficientNetB7(include_top=False)
base_model.trainable = False
inputs = tf.keras.layers.Input(shape=(IMAGE_SIZE + (3,)), name="input_layer")
x = base_model(inputs)
x = tf.keras.layers.GlobalAveragePooling2D(name="global_average_pooling_layer")(x)
outputs = tf.keras.layers.Dense(2, activation="softmax", name="output_layer")(x)

In [None]:
model_lr_test = tf.keras.Model(inputs, outputs)
model_lr_test.compile(loss="categorical_crossentropy",
                      optimizer="adam",
                      metrics=["accuracy"])

In [None]:
# traverse a set of learning rate values starting from 1e-4, increasing by 10**(epoch/20) every epoch
lr_scheduler = tf.keras.callbacks.LearningRateScheduler(lambda epoch: 1e-4 * 10**(epoch/20)) 

In [None]:
history_lr_test = model_lr_test.fit(train_data_small,
                                    epochs=100, 
                                    callbacks=[lr_scheduler], 
                                    verbose=False)

In [None]:
# Plot the learning rate versus the loss
lrs = 1e-4 * (10 ** (np.arange(100)/20))
loss = history_lr_test.history["loss"]
low_loss = min(loss)
low_lr_loss_index = loss.index(low_loss)
low_lr = 1e-4 * (10 ** (low_lr_loss_index/20))
plt.figure(figsize=(10, 7))
plt.semilogx(lrs, loss)
plt.scatter(low_lr, low_loss, alpha=0.9, c='r', s=50)
plt.scatter(low_lr/10, low_loss, alpha=0.9, c='g', s=50)
plt.annotate(f"LR FOR LOWEST LOSS = {round(low_lr, 4)}", (low_loss/10 , low_lr+2))

plt.xlabel("Learning Rate")
plt.ylabel("Loss")
plt.grid()
plt.title("Learning rate vs. loss");

The estimated optimal learning rate is between 0.0398 and 0.00398. We're picking 0.025

## Model creation

In [None]:
# same model as model_0
base_model = tf.keras.applications.EfficientNetB7(include_top=False)
base_model.trainable = False
inputs = tf.keras.layers.Input(shape=(IMAGE_SIZE + (3,)), name="input_layer")
x = base_model(inputs)
x = tf.keras.layers.GlobalAveragePooling2D(name="global_average_pooling_layer")(x)
outputs = tf.keras.layers.Dense(2, activation="softmax", name="output_layer")(x)

In [None]:
LEARNING_RATE = 0.025
model_2 = tf.keras.Model(inputs, outputs)
model_2.compile(loss="categorical_crossentropy",
                optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
                metrics=["accuracy"])

In [None]:
history_2 = model_2.fit(train_data_small,
                          epochs=EPOCHS,
                          validation_data=test_data,
                          validation_steps=int(0.1 * len(test_data)),
                          verbose=True,
                          )

## Evaluation

In [None]:
pd.DataFrame(history_2.history).plot(figsize=(10, 7));

In [None]:
loss_2, acc_2 = model_2.evaluate(test_data)

Model 2 verdict: Good improvement on accuracy after a careful learning rate choice

 * Learning rate adjustment pros:
  * Better results after fewer epoches, and thus, processing time

 * Learning rate adjustment cons:
  * Being higher than the default (0.001) the loss can lose control and start rising in fewer epoches too. A early stop might be necessary in the final design

# Final model

Based on the previous tests, the final model features include:
 * Training with the whole dataset
 * A data augmentation layer
 * Training for the same amount of epochs
 * A starting learning rate of 0.025

## Model creation

In [None]:
base_model = tf.keras.applications.EfficientNetB7(include_top=False)
base_model.trainable = False
inputs = tf.keras.layers.Input(shape=(IMAGE_SIZE + (3,)), name="input_layer")
x = data_augmentation(inputs)
x = base_model(x, training=False)
x = tf.keras.layers.GlobalAveragePooling2D(name="pooling_layer")(x)
outputs = tf.keras.layers.Dense(2, activation="softmax", name="output_layer")(x)

In [None]:
LEARNING_RATE = 0.025
model_final = tf.keras.Model(inputs, outputs)
model_final.compile(loss="categorical_crossentropy",
                    optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
                    metrics=["accuracy"])

In [None]:
EPOCHS = 10
history_final = model_final.fit(train_data_all,
                                  epochs=EPOCHS,
                                  validation_data=test_data,
                                  validation_steps=int(0.1 * len(test_data)),
                                  verbose=True,
                                  )

## Evaluation

In [None]:
pd.DataFrame(history_final.history).plot(figsize=(10, 7));

In [None]:
loss_final, acc_final = model_final.evaluate(test_data)

# Conclusions

In [None]:
names = ["Base model", "Using data augmentation", "Higher learning rate", "All of above"]
m0 = ["10%", 10, loss_0, round(acc_0 * 100, 3)]
m1 = ["10%", 10, loss_1, round(acc_1 * 100, 3)]
m2 = ["10%", 10, loss_2, round(acc_2 * 100, 3)]
m3 = ["100%", 10, loss_final, round(acc_final * 100, 3)]

In [None]:
pd.DataFrame([m0, m1, m2, m3], index=names, columns=["Used dataset", "Epochs", "Loss", "Accuracy %"])

With the power of a pre-trained model, a custom classifier can deliver good results with no changes at all, but further improvement is in the fine details. By testing on a small dataset and changing hyperparameters or adding layers of complexity we can save processing time, those improvements _should_ carry to the full dataset training, ending with a very accurate model.