* This Notebook is Simple fine-tuning Model for begginners.
* I'm a beginner at Machine Learning, so I'm sorry if there are any mistakes.
* I referred to the following documents for fine-tuning.
https://keras.io/examples/vision/image_classification_efficientnet_fine_tuning/



### Import Module

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import glob

import tensorflow as tf
from tensorflow import keras
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Dense, Input, Activation, add, Add, Dropout, BatchNormalization, GlobalAveragePooling2D
from keras.models import Sequential, Model
from keras import optimizers
from kaggle_datasets import KaggleDatasets


from sklearn.model_selection import train_test_split
from keras.callbacks import ModelCheckpoint

from tensorflow.keras.applications import EfficientNetB4
from tensorflow.keras.applications.efficientnet import preprocess_input

### Set up variables

In [None]:
batch_size = 256
IMG_WIDTH = 380
IMG_HEIGHT = 380
CHANNELS = 3
CLASSES = 5
SEED = 5

print("Tensorflow version " + tf.__version__)

### Set Up Data Augmentation

In [None]:
datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range=90,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    shear_range = 0.2,
    zoom_range = 0.2,
    fill_mode='nearest',
    cval=0xCC,
    horizontal_flip=True,
    vertical_flip=True,
    validation_split = 0.2
    )

### Load Image

In [None]:
df_train = pd.read_csv('../input/cassava-leaf-disease-classification/train.csv')
df_train.head()

In [None]:
df_train['path'] = '../input/cassava-leaf-disease-classification/train_images/' + df_train['image_id']
df_train.head()

In [None]:
test_images = glob.glob('../input/cassava-leaf-disease-classification/test_images/*.jpg')
df_test = pd.DataFrame(test_images, columns = ['path'])
df_test.head()

In [None]:
# If class_mode="categorical", y_col="label" column values must be type string, list or tuple.
df_train['label'] = df_train['label'].astype('str') 

train_generator = datagen.flow_from_dataframe(
    df_train,
    x_col = 'path',
    y_col = 'label',
    target_size=(IMG_WIDTH,IMG_HEIGHT),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=True,
    seed = SEED,
    subset = 'training'
    )

validation_generator = datagen.flow_from_dataframe(
    df_train,
    x_col = 'path',
    y_col = 'label',
    target_size=(IMG_WIDTH,IMG_HEIGHT),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=True,
    seed = SEED,
    subset = 'validation'
    )

### Create Model

In [None]:
from tensorflow.keras import layers
def build_model():
    inputs = layers.Input(shape=(IMG_WIDTH,IMG_HEIGHT,CHANNELS))
    x = inputs
    model = EfficientNetB4(include_top=False, input_tensor=x, 
                           weights='../input/tfkerasefficientnetimagenetnotop/efficientnetb4_notop.h5')
    
    # Freeze the pretrained weights
    model.trainable = False
    
    x = layers.GlobalAveragePooling2D(name="avg_pool")(model.output)
    x = layers.BatchNormalization()(x)
    top_dropout_rate = 0.2
    x = layers.Dropout(top_dropout_rate, name="top_dropout")(x)
    outputs = layers.Dense(CLASSES, activation="softmax", name="pred")(x)
    
    model = tf.keras.Model(inputs, outputs, name="EfficientNet")
    
    return model
    

In [None]:
model = build_model()
model.summary()

### Pre Training

In [None]:
loss = tf.keras.losses.CategoricalCrossentropy(from_logits = False,
                                               label_smoothing=0.0001,
                                               name='categorical_crossentropy' )  

model.compile(loss = loss,
              optimizer = optimizers.Adam(learning_rate=1e-2),
              metrics=['accuracy'])
 
STEP_SIZE_TRAIN=train_generator.n // train_generator.batch_size
STEP_SIZE_VALID=validation_generator.n  // validation_generator.batch_size

modelCheckpoint = ModelCheckpoint(filepath = 'cassava-efficientNetB4_bestParam_pre.h5',
                                  monitor='val_loss',
                                  verbose=1,
                                  save_best_only=True,
                                  save_weights_only=True,
                                  mode='min',
                                  period=1)


epochs = 10

history = model.fit_generator(train_generator,
                             steps_per_epoch=STEP_SIZE_TRAIN,
                             epochs=epochs,
                             validation_data=validation_generator,
                             validation_steps=STEP_SIZE_VALID,
                             callbacks=[modelCheckpoint])

In [None]:
plt.style.use('fivethirtyeight')
plt.title('Pre Model Accuracy')
plt.plot(range(1, epochs+1), history.history['accuracy'], label="training")
plt.plot(range(1, epochs+1), history.history['val_accuracy'], label="validation")
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
plt.style.use('fivethirtyeight')
plt.title('Pre Model Loss')
plt.plot(range(1, epochs+1), history.history['loss'], label="training")
plt.plot(range(1, epochs+1), history.history['val_loss'], label="validation")
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

### Fine Tuning

In [None]:
# Load Pretrain-weights
model.load_weights('cassava-efficientNetB4_bestParam_pre.h5', by_name=True)
model.summary()

In [None]:
def unfreeze_model(model):
    model.trainable = True
    
    for layer in model.layers:
        layer.trainable = False

    # unfreeze the top 20 layers while leaving BatchNorm layers frozen
    for layer in model.layers[-20:]:
        if not isinstance(layer, BatchNormalization):
            layer.trainable = True

In [None]:
#Unfreezing
unfreeze_model(model)
model.summary()

In [None]:
# Fine Tuning
model.compile(loss = loss,
              optimizer = optimizers.Adam(learning_rate=1e-4),
              metrics=['accuracy'])
 
STEP_SIZE_TRAIN=train_generator.n // train_generator.batch_size
STEP_SIZE_VALID=validation_generator.n  // validation_generator.batch_size

modelCheckpoint = ModelCheckpoint(filepath = 'cassava-efficientNetB4_bestParam.h5',
                                  monitor='val_loss',
                                  verbose=1,
                                  save_best_only=True,
                                  save_weights_only=False,
                                  mode='min',
                                  period=1)

epochs = 10

history = model.fit_generator(train_generator,
                             steps_per_epoch=STEP_SIZE_TRAIN,
                             epochs=epochs,
                             validation_data=validation_generator,
                             validation_steps=STEP_SIZE_VALID,
                             callbacks=[modelCheckpoint])

In [None]:
plt.style.use('fivethirtyeight')
plt.title('Model Accuracy')
plt.plot(range(1, epochs+1), history.history['accuracy'], label="training")
plt.plot(range(1, epochs+1), history.history['val_accuracy'], label="validation")
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
plt.style.use('fivethirtyeight')
plt.title('Model Loss')
plt.plot(range(1, epochs+1), history.history['loss'], label="training")
plt.plot(range(1, epochs+1), history.history['val_loss'], label="validation")
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

### Create Submission File

In [None]:
model.load_weights('cassava-efficientNetB4_bestParam.h5')

In [None]:
test_datagen = image.ImageDataGenerator(preprocessing_function=preprocess_input)
test_generator = test_datagen.flow_from_dataframe(
    df_test,
    x_col = 'path',
    y_col=None,
    target_size=(IMG_WIDTH,IMG_HEIGHT),
    batch_size=batch_size,
    class_mode=None,
    shuffle=False,
    seed = SEED
    )


In [None]:
pred = model.predict_generator(test_generator, verbose = True)
pred_labels = np.argmax(pred, axis = -1)

In [None]:
df_test['path'].str.split('/').str[-1]

In [None]:
df_sub = df_test
df_sub['image_id'] = df_test['path'].str.split('/').str[-1]
df_sub['label'] = pred_labels
df_sub.head()

In [None]:
df_sub.to_csv('submission.csv', index=False, columns=['image_id', 'label'])