In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import tensorflow as tf
import tensorflow_datasets as tfds
import seaborn as sns
import json
from scipy import stats
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## check how data is distributed and create tf.Data

In [None]:
train_csv = pd.read_csv('/kaggle/input/cassava-leaf-disease-classification/train.csv')
print("Number of train images: {}".format(len(train_csv)))

In [None]:
train_csv.head()

In [None]:
with open('/kaggle/input/cassava-leaf-disease-classification/label_num_to_disease_map.json', 'r') as fp:
    class_map = json.load(fp)
class_map

In [None]:
ax=train_csv.pivot_table(columns='label',aggfunc='size').plot(kind='barh')
ax.set_yticklabels(class_map.values()) 
ax.set_xlabel('count')

apparently the class is not balanced, we will need to address that problem during training.

It is also helpful to understand what we are trying to solve, read here for more info about the diseases:
https://www.kaggle.com/c/cassava-leaf-disease-classification/discussion/198143

#### Create tf.dataset

In [None]:
@tf.function
def process_data(path, label):
    path = '/kaggle/input/cassava-leaf-disease-classification/train_images/' + path
    img = tf.io.read_file(path)
    img = tf.image.decode_jpeg(img, channels=3) 
    return img, tf.one_hot(label, 5) 

In [None]:
train, val=train_test_split(train_csv, test_size=0.1, random_state=42,stratify=train_csv['label'])

In [None]:
oversampled_df = []
target_count = int(train.pivot_table(columns='label',aggfunc='size').values[3]*0.5)
for i in range(len(class_map)):
    class_i = train[train.label==i]
    oversampled_df.append(class_i.sample(target_count, replace=True))


In [None]:
resampled_train = pd.concat(oversampled_df, axis=0)
resampled_train.pivot_table(columns='label',aggfunc='size')

In [None]:
resampled_train = resampled_train.sample(frac=1).reset_index(drop=True)
resampled_train.head()

In [None]:
batch_size = 8

In [None]:
train_ds = tf.data.Dataset.from_tensor_slices((train.image_id.values, train.label.values))\
                .map(process_data, num_parallel_calls = tf.data.experimental.AUTOTUNE)\
                .shuffle(buffer_size = 2000)\
                .batch(batch_size)\
                .prefetch(tf.data.experimental.AUTOTUNE) 

val_ds = tf.data.Dataset.from_tensor_slices((val.image_id.values, val.label.values))\
                .map(process_data, num_parallel_calls = tf.data.experimental.AUTOTUNE)\
                .batch(batch_size)\
                .prefetch(tf.data.experimental.AUTOTUNE) 

In [None]:
data_augmentation = tf.keras.Sequential(
    [
        tf.keras.layers.experimental.preprocessing.RandomCrop(height=512, width=512),
        tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"),
        tf.keras.layers.experimental.preprocessing.RandomRotation(0.25),
        tf.keras.layers.experimental.preprocessing.RandomZoom((-0.2, 0)),
        tf.keras.layers.experimental.preprocessing.RandomContrast((0,0.2))
    ]
)

In [None]:
# train_ds = train_ds.map(lambda x, y: (data_augmentation(x, training=True), y), 
#                 num_parallel_calls=tf.data.experimental.AUTOTUNE)

In [None]:
# test if datset is fetching and decoding images correctly
for images, labels in train_ds.take(1):
    images = data_augmentation(images, training=True)
    print(images.shape, labels.shape)
    plt.figure(figsize=(10, 10))
    labels = np.argmax(labels.numpy(), -1)
    for i in range(8):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images.numpy()[i])
        plt.title(class_map[str(labels[i])])
        plt.axis("off")

In [None]:
len(train_ds)//batch_size

## Model Training
### Baseline
Let's first train a baseline model to see the performance, baseline model would simple do transfer learning on a pretrained model, without data augmentation and anything specific to balance the dataset.

In [None]:
def plot_metrics(history, metrics= ['loss', 'accuracy']):
    for n, metric in enumerate(metrics):
        name = metric.replace("_"," ").capitalize()
        plt.subplot(2,2,n+1)
        plt.plot(history.epoch, history.history[metric],  label='Train')
        plt.plot(history.epoch, history.history['val_'+metric], linestyle="--", label='Val')
        plt.xlabel('Epoch')
        plt.ylabel(name)
        if metric == 'loss':
            plt.ylim([0, plt.ylim()[1]])
        else:
            plt.ylim([0, 1])

    plt.legend()

In [None]:
def plot_cm(labels, predictions):
    cm = confusion_matrix(labels, predictions)
    plt.figure(figsize=(5,5))
    sns.heatmap(cm, annot=True, fmt="d")
    plt.title('Confusion matrix')
    plt.ylabel('Actual label')
    plt.xlabel('Predicted label')

    print('True Negatives: ', cm[0][0])
    print('False Positives: ', cm[0][1])
    print('False Negatives: ', cm[1][0])
    print('True Positives: ', cm[1][1])
    print('Total: ', np.sum(cm[1]))

In [None]:
import tensorflow.keras.backend as K
def sigmoid_focal_crossentropy(y_true, y_pred, alpha=0.5, gamma=0.5, from_logits=False):
    """Implements the focal loss function.
    Focal loss was first introduced in the RetinaNet paper
    (https://arxiv.org/pdf/1708.02002.pdf). Focal loss is extremely useful for
    classification when you have highly imbalanced classes. It down-weights
    well-classified examples and focuses on hard examples. The loss value is
    much high for a sample which is misclassified by the classifier as compared
    to the loss value corresponding to a well-classified example. One of the
    best use-cases of focal loss is its usage in object detection where the
    imbalance between the background class and other classes is extremely high.
    Args:
        y_true: true targets tensor.
        y_pred: predictions tensor.
        alpha: balancing factor.
        gamma: modulating factor.
    Returns:
        Weighted loss float `Tensor`. If `reduction` is `NONE`,this has the
        same shape as `y_true`; otherwise, it is scalar.
    """
    if gamma and gamma < 0:
        raise ValueError("Value of gamma should be greater than or equal to zero")

    y_pred = tf.convert_to_tensor(y_pred)
    y_true = tf.convert_to_tensor(y_true, dtype=y_pred.dtype)

    # Get the cross_entropy for each entry
    ce = K.binary_crossentropy(y_true, y_pred, from_logits=from_logits)

    # If logits are provided then convert the predictions into probabilities
    if from_logits:
        pred_prob = tf.sigmoid(y_pred)
    else:
        pred_prob = y_pred

    p_t = (y_true * pred_prob) + ((1 - y_true) * (1 - pred_prob))
    alpha_factor = 1.0
    modulating_factor = 1.0

    if alpha:
        alpha = tf.convert_to_tensor(alpha, dtype=K.floatx())
        alpha_factor = y_true * alpha + (1 - y_true) * (1 - alpha)

    if gamma:
        gamma = tf.convert_to_tensor(gamma, dtype=K.floatx())
        modulating_factor = tf.pow((1.0 - p_t), gamma)

    # compute the final loss and return
    return tf.reduce_sum(alpha_factor * modulating_factor * ce, axis=-1)

In [None]:
def build_efficient_model(input_layer, input_shape, model_inputs, num_classes, dropout_rate=0.2):

    model = tf.keras.applications.EfficientNetB3(weights='/kaggle/input/efficientnetb3notop/efficientnetb3_notop.h5', 
                              include_top=False, 
                                input_shape=input_shape, 
                              drop_connect_rate=dropout_rate)
    
    # Freeze the pretrained weights
    model.trainable = False
    model_output = model(model_inputs, training=False)
    # Rebuild top
    x = tf.keras.layers.GlobalAveragePooling2D(name="avg_pool")(model_output)
#     x = tf.keras.layers.BatchNormalization()(x)

#     x = tf.keras.layers.Dropout(dropout_rate, name="top_dropout")(x)
    outputs = tf.keras.layers.Dense(num_classes, activation="softmax", name="pred")(x)

    # Compile
    model = tf.keras.Model(input_layer, outputs, name="EfficientNet")
    return model

In [None]:
input_shape = (512, 512, 3)
dropout_rate=0.2
num_classes = len(class_map)

In [None]:
# build the complete model, the tf implementation already has a rescaling layer in efficient net 

input_layer = tf.keras.layers.Input([None, None, 3], dtype = tf.uint8)
x = tf.cast(input_layer, tf.float32)
x = data_augmentation(x, training=False)
x = tf.keras.layers.experimental.preprocessing.Resizing(input_shape[0], input_shape[1])(x)

base_model = tf.keras.applications.EfficientNetB5(weights='/kaggle/input/efficientnetb5notop/efficientnetb5.h5', 
                              include_top=False, 
                                input_shape=input_shape, 
                              drop_connect_rate=dropout_rate)
    
# Freeze the pretrained weights
base_model.trainable = False
model_output = base_model(x, training=False)
# Rebuild top
x = tf.keras.layers.GlobalAveragePooling2D(name="avg_pool")(model_output)
#     x = tf.keras.layers.BatchNormalization()(x)

#     x = tf.keras.layers.Dropout(dropout_rate, name="top_dropout")(x)
outputs = tf.keras.layers.Dense(num_classes, activation="softmax", name="pred")(x)

# Compile
model = tf.keras.Model(input_layer, outputs, name="EfficientNet")
    
# model = build_efficient_model(input_layer, input_shape, x, len(class_map))

In [None]:
model.summary()

In [None]:
layers = [layer.name for layer in base_model.layers]
fine_tune_at = layers.index('block7a_expand_conv')
base_model.trainable = True
for layer in base_model.layers[:fine_tune_at]:
    layer.trainable =  False

In [None]:
# since the class is imbalanced, it is helpful to include other metrics than accuracy
METRICS = [
      tf.keras.metrics.CategoricalAccuracy(name='accuracy'),
      tf.keras.metrics.Precision(name='precision'),
      tf.keras.metrics.Recall(name='recall'),
      tf.keras.metrics.AUC(name='auc')
]

optimizer = tf.keras.optimizers.Adam()

callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath='best_model.h5', monitor='val_accuracy', save_best_only=False)]

In [None]:
model.compile(
    optimizer=optimizer, loss="categorical_crossentropy", metrics=METRICS
)

In [None]:
hist = model.fit(train_ds, epochs=4, validation_data=val_ds, callbacks=callbacks)

In [None]:
plot_metrics(hist, ['loss', 'auc', 'precision', 'recall'])

In [None]:
val_preds = model.predict(val_ds)
val_preds = tf.math.argmax(val_preds, -1)

In [None]:
plot_cm(val.label.values, val_preds.numpy())

In [None]:
np.mean(np.where(val.label.values==val_preds, 1, 0))

In [None]:
# fine_tune_at = layers.index('block7a_expand_conv')
# base_model.trainable = True
# for layer in base_model.layers[:fine_tune_at]:
#     layer.trainable =  False

# model.compile(
#     optimizer=tf.keras.optimizers.Adam(tf.keras.experimental.CosineDecay(1e-4, 300*10)), 
#     loss='categorical_crossentropy', metrics=METRICS
# )

In [None]:
# fine_tune_epochs = 5
# total_epochs =  5 + fine_tune_epochs

# history_fine = model.fit(train_ds,
#                          epochs=total_epochs,
#                          initial_epoch=hist.epoch[-1], 
#                          validation_data=val_ds, callbacks=callbacks)

In [None]:
# %%time

# pred_list = []
# for i in range(5):
#     print(i)
#     each_loop = np.empty((0, ))
#     for images, labels in val_ds:
#         images = data_augmentation(images, training=True)
# #         print(images.shape)
#         val_preds = efficient.predict(images)
#         val_preds = tf.math.argmax(val_preds, -1)
#         each_loop = np.concatenate((each_loop, val_preds), 0) 
#     pred_list.append(each_loop)

In [None]:
# pred_list = np.array(pred_list)
# pred_list.shape

In [None]:
# pred_list[:, 0]

In [None]:
# np.mean(np.where(val.label.values==stats.mode(pred_list)[0].reshape((-1,)), 1, 0))

## predict on test data

In [None]:
@tf.function
def process_img(path):
    img = tf.io.read_file(path)
    img = tf.image.decode_jpeg(img, channels=3) 
    return img

In [None]:
TEST_FILENAMES = tf.io.gfile.glob('../input/cassava-leaf-disease-classification/test_images/*.jpg')
# TEST_FILENAMES

In [None]:
test_ds = tf.data.Dataset.from_tensor_slices((TEST_FILENAMES))\
                .map(process_img, num_parallel_calls = tf.data.experimental.AUTOTUNE)\
                .batch(batch_size)\
                .prefetch(tf.data.experimental.AUTOTUNE) 

In [None]:
probabilities = model.predict(test_ds)
predictions = np.argmax(probabilities, axis=-1)
# print(predictions)

In [None]:
test_ids = [os.path.split(path)[1] for path in TEST_FILENAMES]
# test_ids
submission = pd.DataFrame({'image_id': test_ids, 'label': predictions})

submission.to_csv('submission.csv', index = False)
submission