## constant variables ##

In [None]:
ROOT_DIR = '../input/cassava-leaf-disease-classification/'
TRAIN_DIR = ROOT_DIR + '/train_images/'
TEST_DIR = ROOT_DIR + '/test_images/'
IMG_SIZE = 300# IMG_SIZE is determined by EfficientNet model choice. I selected EfficientNetB3 model. therefore input = (300,300,3)
BATCH_SIZE = 32
NUM_CLASSES = 5
EPOCHS = 50
SEED = 42 
#why 42 always ? https://medium.com/geekculture/the-story-behind-random-seed-42-in-machine-learning-b838c4ac290a
TEST_SIZE = 0.2
VALIDATION_SIZE = 0.1
LEARNING_RATE =  1e-4


## Downloading efficientnet weights ##
in this notebook I will use efficientnet for transfer learning. therfore we need latest EfficientNet weights. 

https://keras.io/examples/vision/image_classification_efficientnet_fine_tuning/

In [None]:
!wget https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/noisystudent/noisy_student_efficientnet-b3.tar.gz

In [None]:
!tar -xf /kaggle/working/noisy_student_efficientnet-b3.tar.gz

In [None]:
!wget https://raw.githubusercontent.com/9vimu9/efficientnet_weight_update_util/main/efficientnet_weight_update_util.py

In [None]:
!python ./efficientnet_weight_update_util.py --model b3 --notop --ckpt ./noisy_student_efficientnet-b3/model.ckpt --o ./efficientnetb3_notop.h5

## Importing packages ## 

In [None]:
import matplotlib.pyplot as plt #draw graphs
import numpy as np
import pandas as pd #create dataframes using csv files
import os
from sklearn.model_selection import train_test_split # to create train, test, validation datasets
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator #for data preprocessing, image augmentation
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.applications import EfficientNetB3 #transfer learning model
from tensorflow.keras.utils import plot_model # to draw summary of the model

## Seeding ##
We utilize random seed values when constructing training, validation, and test dataset. The aim is to make sure we receive the same training and validation dataset while we utilize various hyperparameters or methods in order to analyze the performance of different models

In [None]:
tf.random.set_seed(SEED) #tensorflow
os.environ['PYTHONHASHSEED'] = str(SEED) #python
np.random.seed(SEED) #numpy


## preparing training, validation and test datasets ##
labels of the dataset are included in the train.csv.we can use read_csv function in pandas to read the file. it will return a DataFrame. it is very big advantage. because in Tensorflow, we can directly feed DataFrame object to train our model

In [None]:
all_data_frame = pd.read_csv(ROOT_DIR + 'train.csv')

In [None]:
# read first 10 records
all_data_frame.sample(10)

change the data type of the column "label" to string

In [None]:
all_data_frame = all_data_frame.astype({"label": str})

creating test dataset

In [None]:
other_data_frame, test_data_frame = train_test_split(
                        all_data_frame, 
                        test_size = TEST_SIZE, 
                        random_state = SEED)
print("test data size : "+str(test_data_frame.shape[0]))

creating training and validation dataset using remaining dataset

In [None]:
train_data_frame, validation_data_frame = train_test_split(
                    other_data_frame, 
                    test_size = VALIDATION_SIZE, 
                    random_state = SEED)

print("train data size : "+str(train_data_frame.shape[0]))
print("validation data size : "+str(validation_data_frame.shape[0]))

## data preprocessing ##
I used ImageDataGenerator object to increase training dataset by doing image augmentation. this is very useful method when the dataset is small

In [None]:
train_image_data_generator = ImageDataGenerator(
                    rotation_range = 270, 
                    zoom_range = [0.05,1.0],
                    shear_range = 0.3, 
                    brightness_range = [0.5,1.5],
                    horizontal_flip = True,
                    vertical_flip = True,
                    width_shift_range = 0.4, 
                    height_shift_range = 0.4, 
                    fill_mode = 'nearest'
)

now we created a image generator for training dataset.next, we can select data source for the image generator.in earlier steps, I converted dataset to dataframe. therfore we can use ImageDataGenerator's built in function, flow_from_dataframe.

In [None]:

train_data_frame_iterator = train_image_data_generator.flow_from_dataframe(
                    train_data_frame,
                    directory = TRAIN_DIR,
                    x_col = "image_id",
                    y_col = "label",
                    target_size = (IMG_SIZE,IMG_SIZE),
                    class_mode = "sparse",
                    batch_size = BATCH_SIZE,
                    shuffle = True,
                    seed = SEED,
                    interpolation = "nearest"
)

creating DataFrame object for validation

In [None]:

validation_data_frame_iterator =  ImageDataGenerator().flow_from_dataframe(
                    validation_data_frame,
                    directory = TRAIN_DIR,
                    x_col = "image_id",
                    y_col = "label",
                    target_size = (IMG_SIZE,IMG_SIZE),
                    class_mode = "sparse",
                    batch_size = BATCH_SIZE,
                    shuffle = True,
                    seed = SEED,
                    interpolation = "nearest"
)

creating DataFrame object for testing

In [None]:
test_data_frame_iterator =  ImageDataGenerator().flow_from_dataframe(
                    test_data_frame,
                    directory = TRAIN_DIR,
                    x_col = "image_id",
                    y_col = "label",
                    target_size = (IMG_SIZE,IMG_SIZE),
                    class_mode = "sparse",
                    batch_size = BATCH_SIZE,
                    shuffle = False,
                    seed = SEED,
                    interpolation = "nearest"
)

## Model creation and training ## 

In [None]:
def create_model_unfreezed():
    #create sequential Model object
    model = models.Sequential()
    
    #create pretrained EfficientNetB3 model and loading weights from downloaded file in previous steps
    pre_trained_model = EfficientNetB3(input_shape = (IMG_SIZE, IMG_SIZE, 3), include_top = False, weights = "./efficientnetb3_notop.h5",drop_connect_rate=0.2)
    #make every layer in the pre trained model trainable. that means during the training, weights and biases of the pretrained model can be changed. 
    pre_trained_model.trainable = True
    #but according to  this documentation BathcNormalization layers need to be kept frozen
    #https://keras.io/examples/vision/image_classification_efficientnet_fine_tuning/
    for layer in pre_trained_model.layers:
        if isinstance(layer, layers.BatchNormalization):
            layer.trainable = False
            
    model.add(pre_trained_model)
    model.add(layers.GlobalAveragePooling2D())
    model.add(layers.Dense(256, activation = 'relu'))
    model.add(layers.Dropout(0.7)) # to prevent overfitting
    model.add(layers.Dense(NUM_CLASSES, activation = 'softmax'))
    plot_model(model, show_shapes = True)
    return model

In [None]:
model = create_model_unfreezed()


In [None]:
model.compile(
        loss = SparseCategoricalCrossentropy(name='sparse_categorical_crossentropy'),
        optimizer = Adam(learning_rate = LEARNING_RATE),
        metrics = ['accuracy']
)

## Creating callbacks ##

In [None]:
# Stop training when a monitored metric has stopped improving. usefull to prevent overfitting
early_stopping = EarlyStopping(monitor = 'val_loss',
                               patience = 4,
                               mode = 'min',
                               restore_best_weights = True)

# To save the Keras model in h5 format for future use.
model_checkpoint = ModelCheckpoint('model.h5', 
                             monitor = 'val_accuracy',
                             mode = 'max', 
                             save_best_only = True)

# When a measure no longer improves, slow down its learning rate.
reduce_learning_rate = ReduceLROnPlateau(monitor = 'val_loss',
                              factor = 0.3,
                              patience = 3,
                              mode = 'min')

## Calculating step size ##

In [None]:
TRAIN_STEPS_PER_EPOCH = train_data_frame_iterator.n//train_data_frame_iterator.batch_size
VALIDATION_STEPS_PER_EPOCH = validation_data_frame_iterator.n//validation_data_frame_iterator.batch_size
TEST_STEPS_PER_EPOCH = test_data_frame_iterator.n//test_data_frame_iterator.batch_size

In [None]:
 train_data_frame_iterator.n

In [None]:
train_data_frame_iterator.batch_size

In [None]:
model.summary()

In [None]:
plot_model(model, show_shapes = True)

In [None]:
history = model.fit(train_data_frame_iterator,
                    validation_data = validation_data_frame_iterator,
                    epochs = EPOCHS,
                    steps_per_epoch = TRAIN_STEPS_PER_EPOCH,
                    validation_steps = VALIDATION_STEPS_PER_EPOCH,
                    callbacks = [reduce_learning_rate,model_checkpoint,early_stopping]
                   )

## measuring validation accuracy ##

In [None]:
validation_loss, validation_accuracy = model.evaluate(validation_data_frame_iterator, steps = VALIDATION_STEPS_PER_EPOCH)
print("validation accuracy: {:5.2f}%".format(100 * validation_accuracy))

## measrung test accuracy ##

In [None]:
test_loss, test_accuracy = model.evaluate(test_data_frame_iterator, steps = TEST_STEPS_PER_EPOCH)
print("test accuracy: {:5.2f}%".format(100 * test_accuracy))

## train accuracy vs validation accuracy ##

In [None]:
plt.figure(figsize=(15, 5))
plt.plot(history.history['accuracy'], 'b*-', label="train accuracy")
plt.plot(history.history['val_accuracy'], 'r*-', label="validation accuracy")
plt.grid()
plt.title("train accuracy vs validation accuracy")
plt.ylabel("Accuracy")
plt.xlabel("Epochs")
plt.legend()
plt.show()

## train loss vs validation loss ##

In [None]:
plt.figure(figsize=(15, 5))
plt.plot(history.history['loss'], 'b*-', label="train loss")
plt.plot(history.history['val_loss'], 'r*-', label="validation loss")
plt.grid()
plt.title("train loss vs validation loss")
plt.ylabel("Loss")
plt.xlabel("Epochs")
plt.legend()
plt.show()