In [None]:
!wget https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/noisystudent/noisy_student_efficientnet-b3.tar.gz

In [None]:
!tar -xf /kaggle/working/noisy_student_efficientnet-b3.tar.gz

In [None]:
!wget https://raw.githubusercontent.com/tensorflow/tensorflow/master/tensorflow/python/keras/applications/efficientnet_weight_update_util.py

In [None]:
!python ./efficientnet_weight_update_util.py --model b3 --notop --ckpt ./noisy_student_efficientnet-b3/model.ckpt --o ./efficientnetb3_notop.h5

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
ROOT_DIR = '../input/cassava-leaf-disease-classification/'
os.listdir(ROOT_DIR)

import json # to read in the 'label_num_to_disease_map.json' file

In [None]:
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns
import cv2
import random

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras.optimizers import Adam

from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.applications import EfficientNetB3, EfficientNetB5
from tensorflow.keras.utils import plot_model

In [None]:
# set the training and test directory paths
TRAIN_DIR = '../input/cassava-leaf-disease-classification/train_images/'
TEST_DIR = '../input/cassava-leaf-disease-classification/test_images/'

In [None]:
# set seed
seed = 42

def seed_everything(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    
seed_everything(seed)

<center><h1> Data exploration </h1></center> 

In [None]:
train_df = pd.read_csv(ROOT_DIR + 'train.csv')
sample_df = pd.read_csv(ROOT_DIR + 'sample_submission.csv')

In [None]:
print(train_df.shape, sample_df.shape)
display(train_df.head())

In [None]:
f = open(ROOT_DIR + 'label_num_to_disease_map.json')
data = json.load(f)
print(json.dumps(data, indent = 2))

In [None]:
z = train_df.sample(20)
display(z)
images, labels = z['image_id'].tolist(), z['label'].tolist()

<center><h1> Split dataset for training and validation </h1></center> 
<center> Reserving 15% of data for validation </center>

In [None]:
train_df = train_df.astype({"label": str})

In [None]:
train, test = train_test_split(train_df, test_size = 0.15, random_state = seed)
print(train.shape, test.shape)

<center><h1> Creating ImageDataGenerator to generate data in batches and perform image augmentation. </h1></center> 

In [None]:
IMG_SIZE = 300
size = (IMG_SIZE,IMG_SIZE)
batch_size = 32

In [None]:
datagen = ImageDataGenerator(
                    rotation_range = 30,
                    width_shift_range = 0.2,
                    height_shift_range = 0.2,
                    shear_range = 0.2,
                    zoom_range = 0.2,
                    brightness_range = [0.5,1.5],
                    horizontal_flip = True,
                    vertical_flip = True,
                    fill_mode = 'nearest'
)

In [None]:
validgen = ImageDataGenerator()

In [None]:
train_generator = datagen.flow_from_dataframe(
                    train,
                    directory = TRAIN_DIR,
                    x_col = "image_id",
                    y_col = "label",
                    target_size = size,
                    class_mode = "sparse",
                    batch_size = batch_size,
                    shuffle = True,
                    seed = seed,
                    interpolation = "nearest"
)

In [None]:
valid_generator = validgen.flow_from_dataframe(
                    test,
                    directory = TRAIN_DIR,
                    x_col = "image_id",
                    y_col = "label",
                    target_size = size,
                    class_mode = "sparse",
                    batch_size = batch_size,
                    shuffle = False,
                    seed = seed,
                    interpolation = "nearest"
)

<center><h1> Model creation and training </h1></center> 

In [None]:
NUM_CLASSES = 5

In [None]:
def create_model():
    
    model = models.Sequential()
    # initialize EfficientNetB3 model with input shape as (300,300,3)
    model.add(EfficientNetB3(input_shape = (IMG_SIZE, IMG_SIZE, 3), include_top = False, weights = "./efficientnetb3_notop.h5"))
    model.add(layers.GlobalAveragePooling2D())
    model.add(layers.Dense(256, activation = 'relu'))
    model.add(layers.Dropout(0.4))
    model.add(layers.Dense(NUM_CLASSES, activation = 'softmax'))
    model.summary()
    
    return model

In [None]:
model = create_model()


In [None]:
def create_modelFreezed():
    print("gg")
    model = models.Sequential()
    pre_trained_model = EfficientNetB3(input_shape = (IMG_SIZE, IMG_SIZE, 3), include_top = False, weights = "./efficientnetb3_notop.h5",drop_connect_rate=0.2)
    pre_trained_model.trainable = True
     # We unfreeze the top 25 layers while leaving BatchNorm layers frozen
    for layer in pre_trained_model.layers:
        if isinstance(layer, layers.BatchNormalization):
            layer.trainable = False
            
    model.add(pre_trained_model)
    model.add(layers.GlobalAveragePooling2D())
    model.add(layers.Dense(256, activation = 'relu'))
    model.add(layers.Dropout(0.6))
    model.add(layers.Dense(NUM_CLASSES, activation = 'softmax'))
    plot_model(model, show_shapes = True)
    model.summary()
    
    return model

In [None]:
model = create_modelFreezed()


In [None]:
model.compile(loss = 'sparse_categorical_crossentropy',
             optimizer = Adam(learning_rate = 1e-4),
             metrics = ['accuracy'])

In [None]:
# Stop training when the validation loss metric has stopped decreasing for 5 epochs.
early_stopping = EarlyStopping(monitor = 'val_loss',
                               patience = 5,
                               mode = 'min',
                               restore_best_weights = True)

# Save the model with the maximum validation accuracy 
checkpoint = ModelCheckpoint('best_model.hdf5', 
                             monitor = 'val_accuracy',
                             verbose = 1,
                             mode = 'max', 
                             save_best_only = True)
# reduce learning rate
reduce_lr = ReduceLROnPlateau(monitor = 'val_loss',
                              factor = 0.2,
                              patience = 2,
                              mode = 'min',
                              verbose = 1)

In [None]:
EPOCHS = 30
STEP_SIZE_TRAIN = train_generator.n//train_generator.batch_size
STEP_SIZE_VALID = valid_generator.n//valid_generator.batch_size

In [None]:
history = model.fit(train_generator,
                    validation_data = valid_generator,
                    epochs = EPOCHS,
                    steps_per_epoch = STEP_SIZE_TRAIN,
                    validation_steps = STEP_SIZE_VALID,
                    callbacks = [early_stopping, checkpoint, reduce_lr]
                   )

In [None]:
model.summary()

In [None]:
plot_model(model, show_shapes = True)

<center><h1> Model evaluation </h1></center> 

In [None]:
model.evaluate_generator(generator = valid_generator, steps = STEP_SIZE_VALID)

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'c-', label='Training accuracy')
plt.plot(epochs, val_acc, 'y-', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'c-', label='Training Loss')
plt.plot(epochs, val_loss, 'y-', label='Validation Loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

### Short version history:
* Baseline model - 0.601
* EfficientNetB0 - image_size = (224,224), batch_size =	64	- 0.841
* EfficientNetB3 - image_size =	(300,300), batch_size =	32	- 0.856
* EfficientNetB3 with more augmentations - 0.865


| Base model	  |resolution |
|-----------------|-----------|
| EfficientNetB0  |	224 |
| EfficientNetB1  |	240 |
| EfficientNetB2  |	260 |
| EfficientNetB3  |	300 |
| EfficientNetB4  |	380 |
| EfficientNetB5  |	456 |
| EfficientNetB6  |	528 |
| EfficientNetB7  |	600 |

### Some useful links:
* The prediction and submission notebook can be found here : [Inference Notebook](https://www.kaggle.com/lavanyask/cassava-leaf-disease-inference)
* More about keras EfficientNets: [here](https://keras.io/examples/vision/image_classification_efficientnet_fine_tuning/)

### Further experiments:
* Trying out different network architecture, changing number of layers in the network
* Hyperparameter tuning - changing epochs, batch size, number of neurons in hiddden layers, activation function ...
* Cross Validation
* More augmentation techniques
* PyTorch

## Do consider upvoting if you found it useful :)
### Thank you for reading the notebook.