Install Keras Applications and EfficientNet pip packages to load EfficientNet models.
The Keras Applications is a dependency for EfficientNet

In [None]:
!pip install -q '/kaggle/input/birdcall-identification-submission-custom/Keras_Applications-1.0.8-py3-none-any.whl'
!pip install -q '/kaggle/input/birdcall-identification-submission-custom/efficientnet-1.1.0-py3-none-any.whl'

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import efficientnet.tfkeras as efn
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm

In [None]:
IMG_HEIGHT = 600
IMG_WIDTH = 800

IMG_SIZE = 600
IMG_TARGET_SIZE = 512
N_CHANNELS = 3
N_LABELS = 5
N_FOLDS = 5

BATCH_SIZE = 16

AUTO = tf.data.experimental.AUTOTUNE

# ImageNet mean and standard deviation, used for normalizing images
IMAGENET_MEAN = tf.constant([0.485, 0.456, 0.406], dtype=tf.float32)
IMAGENET_STD = tf.constant([0.229, 0.224, 0.225], dtype=tf.float32)

# Model

In [None]:
def get_model(fold):
    # reset to free memory and training variables
    tf.keras.backend.clear_session()
    
    net = efn.EfficientNetB4(
        include_top=False,
        weights=None,
        input_shape=(IMG_TARGET_SIZE, IMG_TARGET_SIZE, N_CHANNELS),
    )
    
    for layer in reversed(net.layers):
        if isinstance(layer, tf.keras.layers.BatchNormalization):
            layer.trainable = False
        else:
            layer.trainable = True

    model = tf.keras.Sequential([
        net,
        tf.keras.layers.Dropout(0.45),
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dropout(0.45),
        tf.keras.layers.Dense(N_LABELS, activation='softmax', dtype=tf.float32),
    ])
    
    model.load_weights(f'/kaggle/input/cassava-leaf-disease-prediction/model_fold_{fold}_weights.h5')
    
    return model

# Test Dataset

In [None]:
@tf.function
def decode_tfrecord_test(file_path):        
    image = tf.io.read_file(file_path)
    image = tf.io.decode_jpeg(image)
    
    image = tf.reshape(image, [IMG_HEIGHT, IMG_WIDTH, N_CHANNELS])
    image = tf.cast(image, tf.float32)
    
    # get image id
    image_id = tf.strings.split(file_path, '/')[-1]
    
    return image, image_id

In [None]:
def get_test_dataset():
    ignore_order = tf.data.Options()
    ignore_order.experimental_deterministic = False
    
    test_dataset = tf.data.Dataset.list_files('/kaggle/input/cassava-leaf-disease-classification/test_images/*.jpg')
    test_dataset = test_dataset.with_options(ignore_order)
    
    test_dataset = test_dataset.map(decode_tfrecord_test, num_parallel_calls=AUTO)
    test_dataset = test_dataset.batch(BATCH_SIZE)
    test_dataset = test_dataset.prefetch(AUTO)
    
    return test_dataset

In [None]:
def show_first_test_batch():
    # log info of batch and first few train images
    imgs, imgs_ids = next(iter(get_test_dataset()))
    img = imgs[0].numpy().astype(np.float32)
    
    print(f'imgs.shape: {imgs.shape}, imgs.dtype: {imgs.dtype}, imgs_ids.shape: {imgs_ids.shape}, imgs_ids.dtype: {imgs_ids.dtype}')
    print('img mean: {:.3f}, img std {:.3f}, img min: {:.3f}, img max: {:.3f}'.format(img.mean(), img.std(), img.min(), img.max()))
    print(f'imgs_id: {imgs_ids[0]}')

    img += abs(img.min())
    img /= img.max()
    
    plt.imshow(img)
    plt.show()
            
show_first_test_batch()

# Predictions

8x Test Time Augmentation (TTA) is applied for each fold, resulting in 40 predictions for each image. The augmentations used are horizontal/vertical flip, transpose and random zoom.

In [None]:
@tf.function
def chance(x, y):
    return tf.random.uniform(shape=[], minval=0, maxval=y, dtype=tf.int32) < x

@tf.function
def tta_images(image):
    offset = tf.random.uniform(shape=(), minval=0, maxval=IMG_WIDTH-IMG_HEIGHT, dtype=tf.int64)
    image = tf.slice(image, [0, offset, 0], [IMG_HEIGHT, IMG_HEIGHT, N_CHANNELS])
    
    # random flip image horizontally
    image = tf.image.random_flip_left_right(image)
    # random flip image vertically
    image = tf.image.random_flip_up_down(image)
    
    # random transpose
    if chance(1,2):
        image = tf.image.transpose(image)
    
    # random crop between 75%-100%
    crop_size = tf.random.uniform(shape=(), minval=IMG_SIZE*0.75, maxval=IMG_SIZE)
    image = tf.image.random_crop(image, [crop_size, crop_size, N_CHANNELS])
    
    # cast to target dtype and resize
    image = tf.image.resize(image, [IMG_TARGET_SIZE, IMG_TARGET_SIZE])
    
    image /= 255.0
    image = (image - IMAGENET_MEAN) / IMAGENET_STD
    
    return image

@tf.function
def predict_tta(model, image, tta=8):
    images = tf.expand_dims(image, axis=0)
    images = tf.repeat(images, tta, axis=0)
    images = tf.map_fn(tta_images, images)
    
    preds = model(images, training=False)
    preds = tf.math.reduce_sum(preds, axis=0)
    
    return preds

Predictions are performed per fold and saved in a dictionary, makking an image_id to class probabilities (softmax output). This approach is chosen as it is incensitive for the order of images. The class probabilities for each fold are summed and the index of the maximum probability is chosen as the predicted class.

In [None]:
submission = pd.DataFrame(columns=['image_id', 'label'])
preds_dict = dict()

for fold in range(N_FOLDS):
    model = get_model(fold)
    for idx, (imgs, image_ids) in tqdm(enumerate(get_test_dataset())):
        for img, image_id in zip(imgs, image_ids.numpy().astype(str)):
            pred = predict_tta(model, img)
            if image_id in preds_dict:
                preds_dict[image_id] += pred
            else:
                preds_dict[image_id] = pred

# Out of fold prediction
for idx, (image_id, preds) in enumerate(preds_dict.items()):
    if idx is 0:
        print(f'image {image_id} predictions:{preds}')
        
    label = np.argmax(preds)
    submission = submission.append({ 'image_id': image_id, 'label': label }, ignore_index=True)
        
submission.to_csv('./submission.csv', index=False)

In [None]:
display(submission)