I'll attempt to make a decent prediction with fewest lines of simple code.

In [None]:
import tensorflow as tf
from tensorflow import keras
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from functools import partial

AUTOTUNE = tf.data.experimental.AUTOTUNE
BATCH_SIZE = 32
IMAGE_SIZE = [128, 128]
CLASSES = [str(i) for i in range(5)]
EPOCHS = 20
WEIGHTS_FILE = "../input/resnet50/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5"

In [None]:
def decode_image(image):
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.reshape(image, [512, 512, 3]) # That's how it's saved in the tfrec file.
    image = tf.image.resize(image, IMAGE_SIZE, method='nearest') # Make it smaller to run faster
    return image

def read_tfrec(example, labeled):
    tfrecord_format = {
        "image": tf.io.FixedLenFeature([], tf.string),
        "target": tf.io.FixedLenFeature([], tf.int64)
    } if labeled else {
        "image": tf.io.FixedLenFeature([], tf.string),
        "image_name": tf.io.FixedLenFeature([], tf.string)
    }
    example = tf.io.parse_example(example, tfrecord_format)
    image = decode_image(example['image'])
    if labeled:
        label = tf.cast(example['target'], tf.int32)
        return image, label
    idnum = example['image_name']
    return image, idnum

In [None]:
PATH = ("../input/cassava-leaf-disease-classification/"
        "train_tfrecords/ld_train00-1338.tfrec")

def load_dataset(filenames, labeled=True, ordered=False):
    ignore_order = tf.data.Options()
    if not ordered:
        ignore_order.experimental_deterministic = False
    dataset = tf.data.TFRecordDataset(filenames, 
                                      num_parallel_reads=AUTOTUNE)
    dataset = dataset.with_options(ignore_order)
    dataset = dataset.map(
        partial(read_tfrec, labeled=labeled),
        num_parallel_calls=AUTOTUNE
    )
    return dataset
    
for image,idnum in load_dataset(PATH).take(3):
    print(f"Image: {image.shape}, {image.dtype}, "
          f"min {tf.reduce_min(image)}, max {tf.reduce_max(image)};")
    print(f"idnum: {idnum.shape}, {idnum.dtype}, {idnum}")

In [None]:
from sklearn.model_selection import train_test_split
INPUT_PATH = "../input/cassava-leaf-disease-classification/"

TRAINING_FILENAMES, VALID_FILENAMES = train_test_split(
    tf.io.gfile.glob(INPUT_PATH + "train_tfrecords/*"),
    test_size=0.35, random_state=42,
)
TEST_FILENAMES = tf.io.gfile.glob(
    INPUT_PATH + "/test_tfrecords/*"
)

In [None]:
def get_train_dataset():
    dataset = load_dataset(TRAINING_FILENAMES, labeled=True)
    dataset = dataset.repeat()
    dataset = dataset.shuffle(2048)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTOTUNE)
    return dataset

def get_valid_dataset(ordered=False):
    dataset = load_dataset(VALID_FILENAMES, labeled=True, 
                          ordered=ordered)
    dataset = dataset.repeat()
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTOTUNE)
    return dataset

def get_test_dataset(ordered=False):
    dataset = load_dataset(TEST_FILENAMES, labeled=False, ordered=ordered)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTOTUNE)
    return dataset

In [None]:
import re
def count_data_items(tfrec_filenames):
    num_items = [
        int(re.compile(r"-(\d*)\.").search(
            filename).group(1)) for filename in tfrec_filenames
    ]
    return np.sum(num_items)

In [None]:
NUM_TRAIN_IMAGES = count_data_items(TRAINING_FILENAMES)
NUM_VALID_IMAGES = count_data_items(VALID_FILENAMES)
NUM_TEST_IMAGES = count_data_items(TEST_FILENAMES)

print(f"Dataset: train:{NUM_TRAIN_IMAGES}, " 
      f"valid:{NUM_VALID_IMAGES}, test:{NUM_TEST_IMAGES}")

In [None]:
img_adjust_layer = tf.keras.layers.Lambda(
    keras.applications.resnet50.preprocess_input, 
)    

resnet_model = tf.keras.applications.ResNet50(
    weights=WEIGHTS_FILE,
    include_top=False,
)

switch_trainable = False
for layer in resnet_model.layers:
    if layer.name == "conv4_block1_out": 
        switch_trainable = True
    layer.trainable = switch_trainable
    
resnet_model = keras.Model(
    inputs=resnet_model.input,
    outputs=resnet_model.get_layer("conv4_block5_out").output
)

resnet_model.summary()

In [None]:
lr_scheduler = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=1e-4, 
    decay_steps=10000, 
    decay_rate=0.9)

model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=[*IMAGE_SIZE, 3]),
    tf.keras.layers.BatchNormalization(renorm=True),
    img_adjust_layer,
    resnet_model,
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(300, activation='relu'),
    tf.keras.layers.Dense(len(CLASSES), activation='softmax')  
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=lr_scheduler, epsilon=0.001),
    loss='sparse_categorical_crossentropy',  
    metrics=['sparse_categorical_accuracy']
)
model.summary()

In [None]:
train_dataset = get_train_dataset()
valid_dataset = get_valid_dataset()

STEPS_PER_EPOCH = NUM_TRAIN_IMAGES // BATCH_SIZE
VALID_STEPS = NUM_VALID_IMAGES // BATCH_SIZE
callbacks = [keras.callbacks.EarlyStopping(restore_best_weights=True,patience=int(EPOCHS/4))]
history = model.fit(train_dataset, 
                    steps_per_epoch=STEPS_PER_EPOCH, 
                    epochs=EPOCHS,
                    validation_data=valid_dataset,
                    validation_steps=VALID_STEPS,
                   callbacks=callbacks)
model.save("model.h5")

In [None]:
history_frame = pd.DataFrame(history.history)
history_frame.loc[:, ['loss', 'val_loss']].plot()
history_frame.loc[:, ['sparse_categorical_accuracy', 'val_sparse_categorical_accuracy']].plot();

In [None]:
# this code will convert our test image data to a float32 
def to_float32(image, label):
    return tf.cast(image, tf.float32), label

In [None]:
testing_dataset = get_test_dataset()
testing_dataset = testing_dataset.unbatch().batch(20)
test_batch = iter(testing_dataset)

test_ds = get_test_dataset(ordered=True) 
test_ds = test_ds.map(to_float32)

print('Computing predictions...')
test_images_ds = testing_dataset
test_images_ds = test_ds.map(lambda image, idnum: image)
probabilities = model.predict(test_images_ds)
predictions = np.argmax(probabilities, axis=-1)
print(predictions)

In [None]:
print('Generating submission.csv file...')
test_ids_ds = test_ds.map(lambda image, idnum: idnum).unbatch()
test_ids = next(iter(test_ids_ds.batch(NUM_TEST_IMAGES))).numpy().astype('U') # all in one batch
np.savetxt('submission.csv', np.rec.fromarrays([test_ids, predictions]), fmt=['%s', '%d'], delimiter=',', header='id,label', comments='')
!head submission.csv