# Initialization

### Loading dependencies

In [None]:
import os
import random
import numpy as np
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.efficientnet import preprocess_input as preprocess_input_efficientnet

from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

import math

tfk = tf.keras
tfkl = tf.keras.layers

print(tf.__version__)

### Set seed for reproducibility

In [None]:
# Random seed for reproducibility
seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

### Suppress warnings

In [None]:
import warnings
import logging

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)
tf.get_logger().setLevel('INFO')
tf.autograph.set_verbosity(0)

tf.get_logger().setLevel(logging.ERROR)
tf.get_logger().setLevel('ERROR')
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

## Plants dataset

In [None]:
dataset_dir = './datasetNOTEST'
training_dir = os.path.join(dataset_dir, 'train')
validation_dir = os.path.join(dataset_dir, 'val')
test_dir = os.path.join(dataset_dir, 'test')

In [None]:
# Set the labels
labels = [
          'Species1',   # 0
          'Species2',   # 1
          'Species3',   # 2
          'Species4',   # 3
          'Species5',   # 4
          'Species6',   # 5
          'Species7',   # 6
          'Species8',   # 7
          ]

## Models metadata

In [None]:
# Input Parameters
img_w = 96
img_h = 96
input_shape = (96, 96, 3)
classes = 8

# Training Parameters
epochs = 200
batch_size = 32

# Earlystopping Parameters
early_stopping = True
patience_epochs = 10

#Augmentation
apply_augmentation = True
train_size = 2829
val_size = 713

# Data Augmentation

In [None]:
from PIL import Image, ImageEnhance

def preprocessing(img):
    img = tf.image.adjust_saturation(img,3.5)
    img = tf.image.adjust_contrast(img, 1)
    img = tf.keras.utils.array_to_img(img)   
    return img

In [None]:
valid_data_gen = ImageDataGenerator(preprocessing_function=preprocessing)
test_data_gen = ImageDataGenerator(preprocessing_function=preprocessing)

valid_gen = valid_data_gen.flow_from_directory(directory=validation_dir,
                                               target_size=(img_w,img_h),
                                               color_mode='rgb',
                                               classes=None, # can be set to labels
                                               class_mode='categorical',
                                               batch_size=batch_size,
                                               shuffle=False,
                                               seed=seed)


In [None]:
# Create an instance of ImageDataGenerator with Data Augmentation
if apply_augmentation:
    train_data_gen = ImageDataGenerator(width_shift_range=50,
                                        height_shift_range=50,
                                        zoom_range=0.3,
                                        horizontal_flip=True,
                                        vertical_flip=True,
                                        #brightness_range=[1, 1.8],
                                        fill_mode='nearest',
                                        preprocessing_function=preprocessing)
else: 
    train_data_gen = ImageDataGenerator(preprocessing_function=preprocessing)

# Obtain a data generator with the 'ImageDataGenerator.flow_from_directory' method
train_gen = train_data_gen.flow_from_directory(directory=training_dir,
                                                target_size=(img_w,img_h),
                                                color_mode='rgb',
                                                classes=None, # can be set to labels
                                                class_mode='categorical',
                                                batch_size=batch_size,
                                                shuffle=True,
                                                seed=seed)

In [None]:
# Load a single image as our example
img_path = 'test.jpg'
img = tfk.utils.load_img(img_path, target_size=(96,96))

# Generate distorted images
from tensorflow.keras.utils import img_to_array
from tensorflow.keras.utils import array_to_img

images = [img]
img_arr = img_to_array(img)
img_arr = img_arr.reshape((1,) + img_arr.shape)
for batch in train_data_gen.flow(img_arr, batch_size=1):
    images.append( array_to_img(batch[0]) )
    if len(images) >= 4:
        break

#y test image after preprocessing
f, xyarr = plt.subplots(2,2)
xyarr[0,0].imshow(images[0])
xyarr[0,1].imshow(images[1])
xyarr[1,0].imshow(images[2])
xyarr[1,1].imshow(images[3])
plt.show()

In [None]:
# Compute the class weights
from  sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight(
            class_weight='balanced',
            classes=np.unique(train_gen.classes), 
            y=train_gen.classes)
# Keras requires a dictionary
class_weights = {i : class_weights[i] for i in range(len(class_weights))}

# Transfer Learning

In [None]:
# Download and plot the EfficientNetB3 model
supernet = tfk.applications.EfficientNetB3(
    include_top=False,
    weights="imagenet",
    input_shape=input_shape
)


In [None]:
# Use the supernet as feature extractor
supernet.trainable = False

inputs = tfk.Input(shape=input_shape)

# Supernet exctrction level
x = supernet(inputs)
x = tfkl.GlobalAveragePooling2D(name='GAP')(x)

#1st Hidden layer
x = tfkl.Dense(
    128,
    kernel_initializer = tfk.initializers.GlorotUniform(seed))(x)
#x = tfkl.BatchNormalization()(x)
x = tfkl.ReLU()(x)
#x = tfkl.Dropout(0.2, seed=seed)(x)

x = tfkl.Dense(
    64,
    kernel_initializer = tfk.initializers.GlorotUniform(seed))(x)
#x = tfkl.BatchNormalization()(x)
x = tfkl.ReLU()(x)
#x = tfkl.Dropout(0.2, seed=seed)(x)

x = tfkl.Dense(
    32,
    kernel_initializer = tfk.initializers.GlorotUniform(seed))(x)
#x = tfkl.BatchNormalization()(x)
x = tfkl.ReLU()(x)
#x = tfkl.Dropout(0.2, seed=seed)(x)

# Output layer
outputs = tfkl.Dense(
    classes,
    activation='softmax',
    kernel_initializer = tfk.initializers.GlorotUniform(seed))(x)


# Connect input and output through the Model class
model = tfk.Model(inputs=inputs, outputs=outputs, name='model')

model.summary()

In [None]:
# Utility function to create folders and callbacks for training
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

def create_callbacks() :
    callbacks = []

    # Early Stopping -----------------------------------------------------
    if early_stopping:
        es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='auto', patience=40, restore_best_weights=True)
        callbacks.append(es_callback)
    
    # Checkpointer
    checkpointer = ModelCheckpoint(filepath='./checkpoint/efficientnetB3.h5', verbose=1, 
                                    save_best_only=True, monitor = "val_accuracy", mode = "auto",)
    callbacks.append(checkpointer)
    
    # Learning Rate Scheduler --------------------------------------------
    #LRS_callback = tf.keras.callbacks.LearningRateScheduler(scheduler)
    reduce_lr = ReduceLROnPlateau(monitor="val_loss",
                                  factor=tf.math.exp(-0.1),
                                  patience=2,
                                  min_delta=0.001,
                                  verbose=2)

    callbacks.append(reduce_lr)
    
    return callbacks

In [None]:
callbacks = create_callbacks()

In [None]:
# Check Transfer Learning accuracy in 10 epochs with learning rate 1e-3

model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(1e-3), metrics='accuracy')

history = model.fit(
    #steps_per_epoch= math.ceil(train_size / batch_size) * 3,
    #validation_steps= math.ceil(val_size / batch_size) * 3,
    x = train_gen,
    batch_size = batch_size,
    epochs = 100,
    validation_data = valid_gen,
    class_weight = class_weights,
    callbacks = [tfk.callbacks.EarlyStopping(monitor='val_loss', mode='auto', patience=20, restore_best_weights=True)]
).history

In [None]:
# Evaluate on test set

test_metrics = model.evaluate(valid_gen, return_dict=True)
print("Test metrics without fine tuning")
print(test_metrics)

In [None]:
for layer in model.get_layer('efficientnetb3').layers:
    print(layer.name, layer.trainable)

In [None]:
model.get_layer('efficientnetb3').trainable = True

# Freeze the first 162 layers
for i, layer in enumerate(model.get_layer('efficientnetb3').layers[:162]):
  layer.trainable=False

#for i, layer in enumerate(model.get_layer('efficientnetb0').layers):
#   print(i, layer.name, layer.trainable)

model.summary()

In [None]:
# Compile the model
model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(1e-4), metrics=['accuracy'])

# Train the model
history = model.fit(
    #steps_per_epoch= math.ceil(train_size / batch_size) * 3,
    #validation_steps= math.ceil(val_size / batch_size) * 3,
    x = train_gen,
    batch_size = batch_size,
    epochs = epochs,
    validation_data = valid_gen,
    class_weight = class_weights,
    callbacks = callbacks
).history

In [None]:
# Print Confusion Matrix and Classification Report (Precision, Recall, and F1-score) on the validation set
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns

Y_prediction = model.predict_generator(valid_gen, len(valid_gen))
# Convert predictions classes to one hot vectors 
Y_pred_classes = np.argmax(Y_prediction,axis = 1) 
# Convert validation observations to one hot vectors
Y_true = valid_gen.classes
# compute the confusion matrix
confusion_mtx = confusion_matrix(Y_true, Y_pred_classes)
class_report = classification_report(Y_true, Y_pred_classes, 
                                     target_names=valid_gen.class_indices.keys())  # target_names must be ordered depending on the class labels
print('Confusion Matrix:')
print(confusion_mtx)
print()
print('Classification Report:')
print(class_report)

In [None]:
# Predict basic_model
print("Basic model: ")
model_test_metrics = model.evaluate(valid_gen, return_dict=True)

In [None]:
# Plot loss
plt.figure(figsize=(15,5))

plt.plot(history['loss'], alpha=.3, color='#4D61E2', linestyle='--')
plt.plot(history['val_loss'], label='Basic model', alpha=.8, color='#4D61E2')

plt.legend(loc='upper left')
plt.title('Categorical Crossentropy')
plt.grid(alpha=.3)


# Plot accuracy
plt.figure(figsize=(15,5))
plt.plot(history['accuracy'], alpha=.3, color='#4D61E2', linestyle='--')
plt.plot(history['val_accuracy'], label='Basic model', alpha=.8, color='#4D61E2')


plt.legend(loc='upper left')
plt.title('Accuracy')
plt.grid(alpha=.3)

plt.show()