# SETTING UP

In [None]:
import tensorflow as tf
import numpy as np
import os
import random
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.metrics import confusion_matrix

tfk = tf.keras
tfkl = tf.keras.layers
print(tf.__version__)

In [None]:
# Random seed for reproducibility
seed = 9112022

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

In [None]:
import warnings
import logging

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)
tf.get_logger().setLevel('INFO')
tf.autograph.set_verbosity(0)

tf.get_logger().setLevel(logging.ERROR)
tf.get_logger().setLevel('ERROR')
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

In [None]:
dataset_dir = '/kaggle/input/split8020/data_finalV2'

In [None]:
labels = ['Species1',        # 0
          'Species2',        # 1
          'Species3',        # 2
          'Species4',        # 3
          'Species5',        # 4
          'Species6',        # 5
          'Species7',        # 6
          'Species8']        # 7

# DATA GENERATOR

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.efficientnet import preprocess_input

# IMPORTANT: good batch size between 64-512 according to the labs -> 8 was too small
# IMPORTANT: preprocessing is done directly by ImageDataGenerator


train_data_gen = ImageDataGenerator(rotation_range=10,
                                    height_shift_range=20,
                                    width_shift_range=20,
                                    zoom_range=0.1,
                                    horizontal_flip=True,
                                    vertical_flip=True,
                                    brightness_range = [0.5, 1.5],
                                    fill_mode ='wrap',
                                    preprocessing_function = preprocess_input)

val_data_gen = ImageDataGenerator(preprocessing_function = preprocess_input)

train_gen = train_data_gen.flow_from_directory(directory=os.path.join(dataset_dir,'train'),
                                               target_size=(96,96),
                                               color_mode='rgb',
                                               classes=None, # can be set to labels
                                               class_mode='categorical',
                                               batch_size = 16,
                                               shuffle=True,
                                               seed=seed)

val_gen = val_data_gen.flow_from_directory(directory=os.path.join(dataset_dir,'val'),
                                               target_size=(96,96),
                                               color_mode='rgb',
                                               classes=None, # can be set to labels
                                               class_mode='categorical',
                                               batch_size = 16,
                                               shuffle=False,
                                               seed=seed)

# MODEL DEVELOPMENT

## Transfer learning

In [None]:
effnet =tf.keras.applications.EfficientNetB0(
    include_top=False,
    input_shape=(224,224,3),
    weights= 'imagenet'
)

In [None]:
# Use the supernet as feature extractor
effnet.trainable = False

inputs = tfk.Input(shape=(96,96,3))

x = tf.keras.layers.Resizing(224,224,interpolation='bicubic')(inputs)

x = effnet(x)

x = tfkl.GlobalAveragePooling2D(name='gap')(x)

x = tfkl.Dropout(0.5, seed=seed)(x)

x = tfkl.Dense(
        units = 1024, 
        activation = 'relu', 
        kernel_initializer = tfk.initializers.HeUniform(seed))(x)

x = tfkl.Dropout(0.15, seed=seed)(x)

outputs = tfkl.Dense(
        units = 8, 
        activation = 'softmax', 
        kernel_initializer = tfk.initializers.GlorotUniform(seed),
        kernel_regularizer = tfk.regularizers.L2(l2 = 5e-5),
        name = 'output_layer')(x)

# Connect input and output through the Model class
tl_model = tfk.Model(inputs=inputs, outputs=outputs, name = modelin)

# Compile the model
tl_model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(lr=1e-3), metrics='accuracy')
tl_model.summary()

In [None]:
callbacks = []

callbacks.append(tfk.callbacks.EarlyStopping(monitor='val_accuracy',mode='max',patience=20,restore_best_weights=True))

In [None]:
# Train the model
history = tl_model.fit(
    x = train_gen,
    epochs = 200,
    validation_data = val_gen,
    callbacks = callbacks
).history

### Performance

In [None]:
#validation performances
tl_model.evaluate(val_gen, steps=len(val_gen), verbose=0)


In [None]:
plt.figure(figsize=(15,5))
plt.plot(history['loss'], label='Training', alpha=.8, color='#ff7f0e')
plt.plot(history['val_loss'], label='Validation', alpha=.8, color='#4D61E2')
plt.title('Loss')
plt.legend(loc='upper right')
plt.grid(alpha=.3)

plt.figure(figsize=(15,5))
plt.plot(history['accuracy'], label='Training', alpha=.8, color='#ff7f0e')
plt.plot(history['val_accuracy'], label='Validation', alpha=.8, color='#4D61E2')
plt.legend(loc='upper right')
plt.title('Accuracy')
plt.grid(alpha=.3)

plt.show()

In [None]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
# Compute the confusion matrix
Y_pred = tl_model.predict_generator(val_gen)
y_pred = np.argmax(Y_pred, axis=-1)

cm= confusion_matrix(val_gen.classes, y_pred)

disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
disp.plot(cmap=plt.cm.Blues)
plt.show()

print('Classification Report')
target_names = labels
print(classification_report(val_gen.classes, y_pred, target_names=target_names))

In [None]:
tl_model.save('TLModelVI')

## Fine tuning

In [None]:
del tl_model

In [None]:
ft_model = tfk.models.load_model('./TLModelVI')
ft_model.summary()

In [None]:
#validation performances
ft_model.evaluate(val_gen, steps=len(val_gen), verbose=0)

In [None]:
ft_model.get_layer('efficientnetb0').trainable = True


ft_model.summary()



In [None]:
ft_model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(1e-4), metrics='accuracy')

In [None]:
callbacks = []

callbacks.append(tfk.callbacks.EarlyStopping(monitor='val_accuracy',mode='max',patience=20,restore_best_weights=True))

In [None]:
# Train the model
history = ft_model.fit(
    x = train_gen,
    epochs = 200,
    #class_weight = class_weights,
    validation_data = val_gen,
    callbacks = callbacks
).history

In [None]:
#validation performances
ft_model.evaluate(val_gen, steps=len(val_gen), verbose=0)

In [None]:
#LOSS AND ACCURACY PLOT

plt.figure(figsize=(15,5))
plt.plot(history['loss'], label='Training', alpha=.8, color='#ff7f0e')
plt.plot(history['val_loss'], label='Validation', alpha=.8, color='#4D61E2')
plt.title('Loss')
plt.legend(loc='upper right')
plt.grid(alpha=.3)

plt.figure(figsize=(15,5))
plt.plot(history['accuracy'], label='Training', alpha=.8, color='#ff7f0e')
plt.plot(history['val_accuracy'], label='Validation', alpha=.8, color='#4D61E2')
plt.legend(loc='upper right')
plt.title('Accuracy')
plt.grid(alpha=.3)

plt.show()

In [None]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
# Compute the confusion matrix
Y_pred = ft_model.predict_generator(val_gen)
y_pred = np.argmax(Y_pred, axis=-1)

cm= confusion_matrix(val_gen.classes, y_pred)

disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
disp.plot(cmap=plt.cm.Blues)
plt.show()

print('Classification Report')
target_names = labels
print(classification_report(val_gen.classes, y_pred, target_names=target_names))

In [None]:
ft_model.save('FTModel')

In [None]:
del ft_model