# IMPORTS

In [None]:
import time
start = time.time()

In [None]:
import os
import json
from collections import Counter
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
import itertools
import keras
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import RMSprop, Adam, SGD
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator

print("Tensorflow version " + tf.__version__)

In [None]:
work_dir = '/kaggle/input/cassava-leaf-disease-classification/'
os.listdir(work_dir) 
train_path = '/kaggle/input/cassava-leaf-disease-classification/train_images'

## Labels

In [None]:
data = pd.read_csv(work_dir + 'train.csv')
print(Counter(data['label']))

In [None]:
data['label'].hist()

## Add class name

In [None]:
file = open(work_dir + 'label_num_to_disease_map.json')
real_labels = json.load(file)
real_labels = {int(k):v for k,v in real_labels.items()}

# Head
data['class_name'] = data.label.map(real_labels)
print(data.head(10))

In [None]:
print(data['class_name'].unique())

## Show img classes

In [None]:
def show_img(images):
    # 16 random images
    random_images = [np.random.choice(images) for i in range(16)]

    # Change size
    plt.figure(figsize=(16,12))

    # Plot
    for i in range(16):
        plt.subplot(4, 4, i + 1)
        img = plt.imread(train_path + '/' + random_images[i])
        plt.imshow(img)
        plt.axis('off')

    # Padding
    plt.tight_layout()

In [None]:
mask = data['label'] == 4
class_healthy = data[mask]

In [None]:
show_img(class_healthy['image_id'])

In [None]:
mask = data['label'] == 3
classCMD = data[mask]

In [None]:
show_img(classCMD['image_id'])

In [None]:
mask = data['label'] == 2
classCGM = data[mask]

In [None]:
show_img(classCGM['image_id'])

In [None]:
mask = data['label'] == 1
classCBSD = data[mask]

In [None]:
show_img(classCBSD['image_id'])

In [None]:
mask = data['label'] ==0
classCBB = data[mask]

In [None]:
show_img(classCBB['image_id'])

## Data Generation

In [None]:
# 90% of class
class0 = classCBB.sample(frac=0.9)
class1 = classCBSD.sample(frac=0.9)
class2 = classCGM.sample(frac=0.9)
class3 = classCMD.sample(frac=0.9)
class4 = class_healthy.sample(frac=0.9)

# concat
frames=[class0,class1,class2,class3,class4]
finalData = pd.concat(frames)
print('images =',len(finalData))

# train_test_split with proportions(stratify)
train,val = train_test_split(finalData, test_size = 0.05, random_state = 42, stratify = finalData['class_name'])

# Creating additional data
IMG_SIZE = 300
size = (IMG_SIZE,IMG_SIZE)
n_CLASS = 5

datagen = ImageDataGenerator(
    # appropriate
    preprocessing_function = tf.keras.applications.efficientnet.preprocess_input,

    # params
    rotation_range = 60,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = True,
    vertical_flip = True,
    fill_mode = 'nearest')

# initialize
train_set = datagen.flow_from_dataframe(
    train,
    directory = train_path,
    seed=42,
    x_col = 'image_id',
    y_col = 'class_name',
    target_size = size,
    class_mode = 'categorical',
    interpolation = 'nearest',
    shuffle = True,
    batch_size = 32)

# initialize
val_set = datagen.flow_from_dataframe(
    val,
    directory = train_path,
    seed=42,
    x_col = 'image_id',
    y_col = 'class_name',
    target_size = size,
    class_mode = 'categorical',
    interpolation = 'nearest',
    shuffle = True,
    batch_size = 32)

In [None]:
def create_model():
    model = Sequential()
    model.add(tf.keras.applications.EfficientNetB3(input_shape = (IMG_SIZE, IMG_SIZE, 3), include_top = False, weights = 'imagenet', drop_connect_rate=0.6))
    model.add(GlobalAveragePooling2D())
    model.add(Flatten())
    model.add(Dense(256, activation='relu', bias_regularizer=tf.keras.regularizers.L1L2(l1=0.01, l2=0.001)))
    model.add(Dropout(0.5))
    model.add(Dense(n_CLASS, activation = 'softmax'))
    
    return model

leaf_model = create_model()
leaf_model.summary()

## Fit

In [None]:
# steps
EPOCHS = 15
STEP_SIZE_TRAIN = train_set.n//train_set.batch_size
STEP_SIZE_VALID = val_set.n//val_set.batch_size

In [None]:
def Model_fit():
    leaf_model = create_model()
    
    loss = tf.keras.losses.CategoricalCrossentropy(from_logits = False,
                                                   label_smoothing=0.001,
                                                   name='categorical_crossentropy')
    
    leaf_model.compile(optimizer = Adam(learning_rate = 2e-4),
                        loss = loss,
                        metrics = ['categorical_accuracy'])
    
    # Stopper
    es = EarlyStopping(monitor='val_loss', mode='min', patience=5,
                       restore_best_weights=True, verbose=1)
    
    # Save
    checkpoint_cb = ModelCheckpoint('Cassava_model_best'+'.h5',
                                    save_best_only=True,
                                    monitor = 'val_loss',
                                    mode='min')
    
    # Reduce learning rate
    reduce_lr = ReduceLROnPlateau(monitor = 'val_loss',
                                  factor = 0.3,
                                  patience = 3,
                                  min_lr = 1e-6,
                                  mode = 'min',
                                  verbose = 1)
    
    history = leaf_model.fit(train_set,
                             validation_data = val_set,
                             epochs= EPOCHS,
                             batch_size = 32,
                             steps_per_epoch = STEP_SIZE_TRAIN,
                             validation_steps = STEP_SIZE_VALID,
                             callbacks=[es, checkpoint_cb, reduce_lr])
    
    leaf_model.save('Cassava_model'+'.h5')
    
    return history

In [None]:
def run_with_tpu():
    # detect TPU
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)

    # strategy
    tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)
    
    with tpu_strategy.scope():
        history = Model_fit()

In [None]:
run_with_tpu()

## Plot

In [None]:
acc = history.history['categorical_accuracy']
val_acc = history.history['val_categorical_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(EPOCHS)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

## Time

In [None]:
end = time.time()
hours, rem = divmod(end-start, 3600)
minutes, seconds = divmod(rem, 60)
print("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))