In [None]:
!pip install --quiet ../input/kerasapplications
!pip install --quiet ../input/efficientnet-git

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns; sns.set_style("whitegrid")

import tensorflow as tf
import tensorflow_datasets as tfds
import keras
from keras import backend as K
from sklearn.model_selection import KFold
from efficientnet import keras as efn

from kaggle_datasets import KaggleDatasets

import os, re, sys
import json
import math
from functools import partial

inputdir = "/kaggle/input/cassava-leaf-disease-classification"
listdir = os.listdir(inputdir)
for filename in listdir:
    filepath = os.path.join(inputdir, filename)
    print(filepath)

In [None]:
# Detect hardware, return appropriate distribution strategy
try:
    # TPU detection. No parameters necessary if TPU_NAME environment variable is set. 
    # On Kaggle this is always the case.
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    print("tpu")
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
    DEVICE = "TPU"
else:
    # default distribution strategy in Tensorflow. 
    # Works on CPU and single GPU.
    strategy = tf.distribute.get_strategy()
    DEVICE = "notTPU"

REPLICAS = strategy.num_replicas_in_sync

print("REPLICAS: ", REPLICAS)

# Parameters

In [None]:
TRAIN_SIZE = 21397
N_CLASS = 5
N_SPLITS = 5

IMAGE_SIZE = [256, 256]
EPOCHS = 5
BATCH_SIZE = 16 * REPLICAS 
AUG_BATCH = BATCH_SIZE
STEPS_PER_EPOCH = TRAIN_SIZE // BATCH_SIZE
AUTO = tf.data.experimental.AUTOTUNE

TRAIN_PATH = "train_tfrecords/*.tfrec"
TEST_PATH = "test_tfrecords/ld_test*.tfrec"
MAIN_PATH = "/kaggle/input/cassava-leaf-disease-classification"

if DEVICE == "TPU":
    MAIN_PATH = KaggleDatasets().get_gcs_path("cassava-leaf-disease-classification")    

# Read Disease Map

In [None]:
json_path = "/kaggle/input/cassava-leaf-disease-classification/label_num_to_disease_map.json"
with open(json_path, "r") as file:
    disease_map = json.load(file)

disease_map

# Load TfRecords

In [None]:
def get_filenames(path):
    return tf.io.gfile.glob(path)

def load_records(filenames):
    dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads=-1)
    return dataset

train_filenames = get_filenames(os.path.join(MAIN_PATH, TRAIN_PATH))
test_filenames = get_filenames(os.path.join(MAIN_PATH, TEST_PATH))

train_records = load_records(train_filenames)
test_records = load_records(test_filenames)

# TFrecords features

Train features: image, image_name, target

Test features: image, image_name

# Parse Tensors

In [None]:
# Create features description to build their shape and type signature
config = tf.io.FixedLenFeature

feature_train = {"image"     : config([], tf.string),
                 "image_name": config([], tf.string),
                 "target"    : config([], tf.int64),}

feature_test = {"image_name" : config([], tf.string),
                "image"      : config([], tf.string),}


# There is image feature inside the records, so we need to decode it
def decode_image(raw):
    """Decode parsed bytes string into jpeg format"""
    
    decoded = tf.io.decode_jpeg(raw) 
    image = tf.image.resize(decoded, size=IMAGE_SIZE)
    image = tf.reshape(image, shape=(*IMAGE_SIZE, 3))
    image = tf.cast(image, tf.float32) / 255. # normalize to 0..1 value
    return image 


# Now we can parse our tfrecords
def read_tfrecords(example_single, features):
    """Parse raw bytes string from tfrec"""
    
    parsed = tf.io.parse_single_example(example_single, features)
    
    image = decode_image(parsed.get("image"))
    image_name = parsed.get("image_name")
    target = parsed.get("target")
    
    # Train dataset
    if features.get("target"):
        return image, target
    
    # Test dataset
    return image_name, image

In [None]:
# Use functools.partial to set up default args for specific data


# Default arg for train data
parse_train = partial(read_tfrecords, features=feature_train)

# Default arg for test data
parse_test = partial(read_tfrecords, features=feature_test)


# Map the previous functs to our dataset
train_dataset = train_records.map(parse_train)
test_dataset = test_records.map(parse_test)

In [None]:
for x in train_dataset.take(1):
    print("Image Shape {.shape}".format(x[0].numpy()))
    print("Target Shape {.shape}".format(x[1].numpy()))

# EDA
Leaf Categories:

'0': 'Cassava Bacterial Blight (CBB)'      <br/>
'1': 'Cassava Brown Streak Disease (CBSD)' <br/>
'2': 'Cassava Green Mottle (CGM)'          <br/>
'3': 'Cassava Mosaic Disease (CMD)'        <br/>
'4': 'Healthy'                             <br/>

### Class Distribution

In [None]:
# Read csv file
train_csv = pd.read_csv("../input/cassava-leaf-disease-classification/train.csv")

sns.countplot(x="label", data=train_csv)
plt.title("Label Distribution");

In [None]:
# Filter dataset based on labels
bacterial_blight = train_dataset.filter(lambda image, label: label == 0)
brown_disease    = train_dataset.filter(lambda image, label: label == 1)
green_mottle     = train_dataset.filter(lambda image, label: label == 2)
mosaic_disease   = train_dataset.filter(lambda image, label: label == 3)
healthy          = train_dataset.filter(lambda image, label: label == 4)

In [None]:
def plot_image(rows, cols, dataset):    
    index = 1
    plt.figure(figsize=(3 * cols, rows * 3))
    for image, label in dataset.take(rows * cols):
        plt.subplot(rows, cols, index)
        plt.imshow(image.numpy())
        plt.title(label.numpy())
        plt.axis("off")
        index += 1

### Cassava Bacterial Blight

In [None]:
plot_image(1, 5, bacterial_blight)

### Cassava Brown Streak Disease

In [None]:
plot_image(1, 5, brown_disease)

### Cassava Green Mottle

In [None]:
plot_image(1, 5, green_mottle)

### Cassava Mosaic Disease

In [None]:
plot_image(1, 5, mosaic_disease)

### Healthy

In [None]:
plot_image(1, 5, healthy)

# Image Augmentation

### Rotation, Shift, Zoom, Shear

In [None]:
def get_mat(rotation, shear, height_zoom, width_zoom, height_shift, width_shift):
    # returns 3x3 transformmatrix which transforms indicies
        
    # CONVERT DEGREES TO RADIANS
    rotation = math.pi * rotation / 180.
    shear = math.pi * shear / 180.
    
    # ROTATION MATRIX
    c1 = tf.math.cos(rotation)
    s1 = tf.math.sin(rotation)
    one = tf.constant([1],dtype='float32')
    zero = tf.constant([0],dtype='float32')
    rotation_matrix = tf.reshape( tf.concat([c1,s1,zero, -s1,c1,zero, zero,zero,one],axis=0),[3,3] )
        
    # SHEAR MATRIX
    c2 = tf.math.cos(shear)
    s2 = tf.math.sin(shear)
    shear_matrix = tf.reshape( tf.concat([one,s2,zero, zero,c2,zero, zero,zero,one],axis=0),[3,3] )    
    
    # ZOOM MATRIX
    zoom_matrix = tf.reshape( tf.concat([one/height_zoom,zero,zero, zero,one/width_zoom,zero, zero,zero,one],axis=0),[3,3] )
    
    # SHIFT MATRIX
    shift_matrix = tf.reshape( tf.concat([one,zero,height_shift, zero,one,width_shift, zero,zero,one],axis=0),[3,3] )
    
    return K.dot(K.dot(rotation_matrix, shear_matrix), K.dot(zoom_matrix, shift_matrix))

In [None]:
def rot_shift_zoom_shear(image, DIM = IMAGE_SIZE[0]):
    # input image - is one image of size [dim,dim,3] not a batch of [b,dim,dim,3]
    # output - image randomly rotated, sheared, zoomed, and shifted

    XDIM = DIM % 2
    
    rot = 15. * tf.random.normal([1],dtype='float32')
    shr = 5. * tf.random.normal([1],dtype='float32') 
    h_zoom = 1.0 + tf.random.normal([1],dtype='float32')/10.
    w_zoom = 1.0 + tf.random.normal([1],dtype='float32')/10.
    h_shift = 16. * tf.random.normal([1],dtype='float32') 
    w_shift = 16. * tf.random.normal([1],dtype='float32') 
  
    # GET TRANSFORMATION MATRIX
    m = get_mat(rot,shr,h_zoom,w_zoom,h_shift,w_shift) 

    # LIST DESTINATION PIXEL INDICES
    x = tf.repeat( tf.range(DIM//2,-DIM//2,-1), DIM )
    y = tf.tile( tf.range(-DIM//2,DIM//2),[DIM] )
    z = tf.ones([DIM*DIM],dtype='int32')
    idx = tf.stack( [x,y,z] )
    
    # ROTATE DESTINATION PIXELS ONTO ORIGIN PIXELS
    idx2 = K.dot(m,tf.cast(idx,dtype='float32'))
    idx2 = K.cast(idx2,dtype='int32')
    idx2 = K.clip(idx2,-DIM//2+XDIM+1,DIM//2)
    
    # FIND ORIGIN PIXEL VALUES           
    idx3 = tf.stack( [DIM//2-idx2[0,], DIM//2-1+idx2[1,]] )
    d = tf.gather_nd(image,tf.transpose(idx3))
        
    return tf.reshape(d,[DIM,DIM,3])

### CutMix
from <a href="https://www.kaggle.com/cdeotte/cutmix-and-mixup-on-gpu-tpu">here</a>

In [None]:
def onehot(image,label):
    CLASSES = N_CLASS
    return image,tf.one_hot(label,CLASSES)

In [None]:
def cutmix(image, label, PROBABILITY = 1.0):
    # input image - is a batch of images of size [n,dim,dim,3] not a single image of [dim,dim,3]
    # output - a batch of images with cutmix applied
    DIM = IMAGE_SIZE[0]
    CLASSES = N_CLASS
    
    imgs = []; labs = []
    for j in range(AUG_BATCH):
        # DO CUTMIX WITH PROBABILITY DEFINED ABOVE
        P = tf.cast( tf.random.uniform([],0,1)<=PROBABILITY, tf.int32)
        # CHOOSE RANDOM IMAGE TO CUTMIX WITH
        k = tf.cast( tf.random.uniform([],0,AUG_BATCH),tf.int32)
        # CHOOSE RANDOM LOCATION
        x = tf.cast( tf.random.uniform([],0,DIM),tf.int32)
        y = tf.cast( tf.random.uniform([],0,DIM),tf.int32)
        b = tf.random.uniform([],0,1) # this is beta dist with alpha=1.0
        WIDTH = tf.cast( DIM * tf.math.sqrt(1-b),tf.int32) * P
        ya = tf.math.maximum(0,y-WIDTH//2)
        yb = tf.math.minimum(DIM,y+WIDTH//2)
        xa = tf.math.maximum(0,x-WIDTH//2)
        xb = tf.math.minimum(DIM,x+WIDTH//2)
        # MAKE CUTMIX IMAGE
        one = image[j,ya:yb,0:xa,:]
        two = image[k,ya:yb,xa:xb,:]
        three = image[j,ya:yb,xb:DIM,:]
        middle = tf.concat([one,two,three],axis=1)
        img = tf.concat([image[j,0:ya,:,:],middle,image[j,yb:DIM,:,:]],axis=0)
        imgs.append(img)
        # MAKE CUTMIX LABEL
        a = tf.cast(WIDTH*WIDTH/DIM/DIM,tf.float32)
        if len(label.shape)==1:
            lab1 = tf.one_hot(label[j],CLASSES)
            lab2 = tf.one_hot(label[k],CLASSES)
        else:
            lab1 = label[j,]
            lab2 = label[k,]
        labs.append((1-a)*lab1 + a*lab2)
            
    # RESHAPE HACK SO TPU COMPILER KNOWS SHAPE OF OUTPUT TENSOR (maybe use Python typing instead?)
    image2 = tf.reshape(tf.stack(imgs),(AUG_BATCH,DIM,DIM,3))
    label2 = tf.reshape(tf.stack(labs),(AUG_BATCH,CLASSES))
    return image2,label2

### Augmentation

In [None]:
def rotshift_aug(image, label):
    prob = tf.random.uniform(shape=[], minval=0.0, maxval=1.0)
    if prob <= 0.3:
        image = rot_shift_zoom_shear(image)    
    
    label = tf.one_hot(label, N_CLASS)    
    return tf.cast(image, tf.float32), label


def cutmix_aug(image, label):
    prob = tf.random.uniform(shape=[], minval=0.0, maxval=1.0)
    if prob <= 0.5: 
        image, label = cutmix(image, label,)
    
    return tf.cast(image, tf.float32), label    

# Augmented Pictures

In [None]:
# # Show augmented images
# rows, cols = 2, 5
# rows = min(rows, AUG_BATCH//cols)

# augmented_element = train_dataset.batch(BATCH_SIZE).map(augmentation)

# for (img,label) in augmented_element:
#     plt.figure(figsize=(15,int(15*rows/cols)))
#     for j in range(rows * cols):
#         plt.subplot(rows, cols, j+1)
#         plt.axis('off')
#         plt.imshow(img[j,])
#     plt.show();
#     break

# Modeling

In [None]:
def filter_layers(model, layer_name):
    compiler = re.compile(r"{name}".format(name=layer_name))

    for layer in model.layers:
        if not compiler.search(layer.name):
            layer.trainable = False # transfer learning

    return model

In [None]:
# dense_model = filter_layers(dense_net, "conv5_block")

In [None]:
params = {"include_top": False, 
          "pooling": "avg", 
          "input_shape": (*IMAGE_SIZE, 3),
          "weights": None}

loss = "categorical_crossentropy"
optimizer = "adam"
metric = "categorical_accuracy"

with strategy.scope():
    
    # Load model
    if DEVICE == "TPU":
        dense_net = efn.EfficientNetB0(**params)
        dense_net.load_weights("../input/pretrained-cnn-weights/efficientnet-b0_256_weights.h5")
    else:
        dense_net = efn.EfficientNetB2(**params)
        dense_net.load_weights("../input/pretrained-cnn-weights/efficientnet-b2_256_weights.h5")
    
    # Build layers
    input_image = keras.Input(shape=[*IMAGE_SIZE, 3])
    dense_nets = dense_net(input_image)
    dropout = keras.layers.Dropout(0.3)(dense_nets)
    output = keras.layers.Dense(N_CLASS, activation="softmax")(dropout)
    
    # Initialize Model
    base_model = keras.models.Model(input_image, output)
    
    # Model Optimizer
    optimizer = keras.optimizers.Adamax(learning_rate=1e-3)

    base_model.compile(loss=loss, optimizer=optimizer, metrics=[metric])
    
    
# Set initial weights
initial_weights = base_model.get_weights()

# KFold on Tf.Dataset
Inspired from : <a href="https://stackoverflow.com/questions/59669413/what-is-the-canonical-way-to-split-tf-dataset-into-test-and-validation-subsets">here</a>

In [None]:
# Fold Train Dataset
kfold = KFold(n_splits=N_SPLITS)
splits = list(kfold.split(np.arange(TRAIN_SIZE)))

In [None]:
def kfold_dataset(dataset: tf.data.Dataset, fold_splits):
    """
    Splits a dataset of type tf.data.Dataset into a training and validation dataset 
    using KFold splits.
    
    @param dataset: the input dataset to split.
    @param fold_splits: splits produces from KFold .split() method.
    @return: a tuple of two tf.data.Datasets as (training, validation)
    """
    
    train_fold, val_fold = fold_splits # train and val index
    
    val_min = np.min(val_fold) # val min index
    val_max = np.max(val_fold) #     max index
    
    dataset = dataset.enumerate()
    train_dataset = dataset.filter(lambda x, data: x <= val_min or x >= val_max)
    validation_dataset = dataset.filter(lambda x, data: x >= val_min and x <= val_max)

    # remove enumeration
    train_dataset = train_dataset.map(lambda x, data: data)
    validation_dataset = validation_dataset.map(lambda x, data: data)

    return train_dataset, validation_dataset

# Training

In [None]:
# Train steps
train, val = splits[0]
TRAIN_STEPS = len(train) // BATCH_SIZE

In [None]:
for fold, split in enumerate(splits):
    
    print("Fold {}".format(fold))
    model_name = "model_fold_{}.h5".format(fold)
    weights_name = "weights_fold_{}.h5".format(fold)
    
    train_split, val_split = kfold_dataset(train_dataset, split)
    
    
    train_input = (train_split.repeat()                                  # note: repeat then map
                            .map(rotshift_aug, num_parallel_calls=AUTO)
                            .batch(BATCH_SIZE)
                            .map(cutmix_aug, num_parallel_calls=AUTO)
                            .prefetch(AUTO))

    val_input = val_split.map(onehot).batch(BATCH_SIZE)
    
    model_checkpoint = keras.callbacks.ModelCheckpoint(
                                            filepath=model_name,
                                            monitor="val_" + metric,
                                            save_best_only=True)
    
    base_model.fit(train_input, 
                     epochs=EPOCHS,
                     steps_per_epoch=TRAIN_STEPS,
                     validation_data=val_input,
                     callbacks = [model_checkpoint])
    
    # Save weights
    best_model = keras.models.load_model(model_name)
    best_model.save_weights(weights_name)
    
    # Reset weights for every fold :D
    base_model.set_weights(initial_weights)

# Predictions

In [None]:
# Test Dataset
test_input = test_dataset.batch(BATCH_SIZE)

# Test Images and Names
test_images = test_input.map(lambda image_name, image: image)
test_names = test_input.map(lambda image_name, image: image_name).unbatch().batch(15000)

# Define weights
weights = []

# Get all fold weights
for index in range(N_SPLITS):
    base_model.load_weights("weights_fold_{}.h5".format(index))
    fold_weight = base_model.get_weights()
    weights.append(fold_weight)
    
# Average of all weights
w = []
for weight in zip(*weights):
    w.append(np.mean(weight, axis=0))

In [None]:
# Set average weight
base_model.set_weights(w)

# Predictions
probability = base_model.predict(test_images)
prediction = np.argmax(probability, axis=1)
image_ids = next(iter(test_names)).numpy().astype("U")

# Submission

In [None]:
submission = pd.read_csv("../input/cassava-leaf-disease-classification/sample_submission.csv")

submission.loc[:, "image_id"] = image_ids
submission.loc[:, "label"] = prediction

submission.to_csv("submission.csv", index=False)

In [None]:
submission

### Thing that work:
- Using DenseNet201, DenseNet169 and EfficientNetB0
- RotShift and CutMix Aug
- KFold 5

### Thing that didn't work:
- GridMask Aug or any Blockout Aug

### Models Note:
- But in LB, DenseNet still win (0.86 LB) while efb0 (0.83 LB)
- EfficientNetB0 perform better than DenseNet
- DenseNet201 and DenseNet169 didn't differ so much (both perform 0.84 CV and 0.86 LB)


### Optimizers Note:
- AdaMax converge faster than Adam (less and equal than 5 epochs)
- Adam performs well in this dataset (need more than 5 epochs to converge)
- RMSProp performance is less than Adam (it stuck on suboptimal minima)
- Adagrad here perform worse on start

### Additional Note:
- Both RotShift and CutMix Aug when performed individually, produce same result, but increase LB by 0.001 when perform simultaneously