In [1]:
import os
import numpy as np
import pandas as pd
import joblib

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import callbacks
from tensorflow.keras.applications import EfficientNetB3
from randomcutout_tf2_3 import RandomCutout

from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold

LOG MESSAGE
1. Version 8:
    * Train effnetB3
    * 12 epoch
    * image size 224
    * preprocessing layer (0.2)
    * random cutout (32)
1. Version 10:
    * Retrain effnetB3
    * 7 epoch
    * image size 512
    * preprocessing layer (0.4)
    * random cutout (96)

In [2]:
from tensorflow.keras.mixed_precision import experimental as mixed_precision
policy = tf.keras.mixed_precision.experimental.Policy('mixed_float16')
mixed_precision.set_policy(policy)

In [3]:
def set_seed(SEED):
    os.environ['PYTHONHASHSEED'] = str(SEED)
    np.random.seed(SEED)
    tf.random.set_seed(SEED)

In [4]:
CSV_DIR = '../input/cassava-leaf-disease-classification/train.csv'
IMG_DIR = '../input/cassava-leaf-disease-classification/train_images'
CACHE_DIR = '/kaggle/dump.tfcache'
SAVE_DIR = './'

RETRAIN = True
WEIGHT_DIR = '../input/cassava-efficientnetb3/ModelB3v1_'

DEBUG = False
DEBUG_FUNC = False
if DEBUG:
    EPOCHS = 1
else:
    EPOCHS = 7

SEED = 2021
CLASS = 5
BATCH_SIZE = 8
IMG_SIZE = 512
DROPOUT_RATE = 0.4
BUFFER_SIZE = 1024

INPUT_SHAPE = (IMG_SIZE, IMG_SIZE, 3)
TARGET_SIZE = (IMG_SIZE, IMG_SIZE)

AUTOTUNE = tf.data.experimental.AUTOTUNE
set_seed(SEED)

## Dataset

In [5]:
df = pd.read_csv(CSV_DIR)
df['path'] = IMG_DIR + '/' + df.image_id
# df['label'] = df['label'].astype(str)
if DEBUG:
    df = df.groupby('label', group_keys=False).apply(lambda x: x.sample(frac=0.005))
    if DEBUG_FUNC:
        X_train, X_valid = train_test_split(df, test_size=0.1, random_state=SEED, stratify=df['label'])
        
df.groupby('label')['path'].count()

label
0     1087
1     2189
2     2386
3    13158
4     2577
Name: path, dtype: int64

In [6]:
def create_dataset(X_train, X_valid):
    train_dataset = tf.data.Dataset.from_tensor_slices((X_train.path.values,
                                                        X_train.label.values))
    valid_dataset = tf.data.Dataset.from_tensor_slices((X_valid.path.values,
                                                        X_valid.label.values))
    return train_dataset, valid_dataset

def load_image(image_path, label):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.random_crop(img, size=[IMG_SIZE, IMG_SIZE, 3])


    # # If we use CategoricalCrossentropy
    # label = tf.one_hot(label, CLASS, dtype='uint8')
    return img, label

def improve_performance(ds, batch_size=BATCH_SIZE):
    # comment if we use CategoricalCrossentropy (improvement gone)
    ds = ds.cache(CACHE_DIR)
    ds = ds.shuffle(buffer_size=BUFFER_SIZE)
    ds = ds.repeat()
    ds = ds.batch(batch_size)
    ds = ds.prefetch(buffer_size=AUTOTUNE)
    return ds


def create_dataset_batch(X_train, X_valid):
    train_ds, valid_ds = create_dataset(X_train, X_valid)
    
    train_ds = train_ds.map(load_image, num_parallel_calls=AUTOTUNE)
    valid_ds = valid_ds.map(load_image, num_parallel_calls=AUTOTUNE)

    train_ds_batch = improve_performance(train_ds, BATCH_SIZE)
    valid_ds_batch = valid_ds.repeat().batch(BATCH_SIZE)
    return train_ds_batch, valid_ds_batch

In [7]:
if DEBUG_FUNC:
    train_ds_batch, valid_ds_batch = create_dataset_batch(X_train, X_valid)
    image_batch, label_batch = next(iter(train_ds_batch))

## Model

In [8]:
# Data Augmentation in GPU
data_augmentation = keras.Sequential([
#     layers.experimental.preprocessing.RandomCrop(height=IMG_SIZE, width=IMG_SIZE),
    layers.experimental.preprocessing.RandomFlip(mode='horizontal_and_vertical'),
    layers.experimental.preprocessing.RandomZoom(height_factor=(-0.4, 0)),
    layers.experimental.preprocessing.RandomRotation(factor=0.4),
    layers.experimental.preprocessing.RandomContrast(factor=0.4),
    # Custom augmentation
    RandomCutout(mask=(96, 96))
])

In [9]:
def create_model():
    tf.keras.backend.clear_session()
    # Base model is efficientnet
    efficientnet = EfficientNetB3(weights='imagenet',
                                  include_top=False, 
                                  input_shape=INPUT_SHAPE, 
                                  drop_connect_rate=DROPOUT_RATE)
    # Mean and variance of dataset (obtain using adapt function)
    efficientnet.get_layer('normalization').mean.assign([0.42580798, 0.4968467 , 0.3084383])
    efficientnet.get_layer('normalization').variance.assign([0.05642978, 0.05777279, 0.05193274])

    inputs = layers.Input(shape=INPUT_SHAPE, name='input')
    augmentations = data_augmentation(inputs)
    base_model = efficientnet(augmentations)
    global_avg_pooling = layers.GlobalAveragePooling2D(name='last_global_avg_pooling')(base_model)
    dropout = layers.Dropout(DROPOUT_RATE, name='last_dropout')(global_avg_pooling)
    outputs = layers.Dense(CLASS, activation='softmax', name='predictions', dtype='float32')(dropout)
    
    model = keras.Model(inputs=inputs, outputs=outputs)
    
    
    decay_steps = int(round(len(df)/BATCH_SIZE))*EPOCHS
    cosine_decay = tf.keras.experimental.CosineDecay(initial_learning_rate=1e-5,
                                                     decay_steps=decay_steps,
                                                     alpha=0.3)
    optimizer = tf.keras.optimizers.Adam(learning_rate=cosine_decay)

    loss = tf.keras.losses.SparseCategoricalCrossentropy()
    model.compile(loss=loss, optimizer=optimizer, metrics=['sparse_categorical_accuracy'])
    return model

# Create and compile model
model = create_model()
model.summary()

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb3_notop.h5
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           [(None, 512, 512, 3)]     0         
_________________________________________________________________
sequential (Sequential)      (None, 512, 512, 3)       0         
_________________________________________________________________
efficientnetb3 (Functional)  (None, 16, 16, 1536)      10783535  
_________________________________________________________________
last_global_avg_pooling (Glo (None, 1536)              0         
_________________________________________________________________
last_dropout (Dropout)       (None, 1536)              0         
_________________________________________________________________
predictions (Dense)          (None, 5)                 7685      
Total params: 10,791,220

## Training

In [10]:
tf.keras.backend.clear_session()
N_SPLITS = 5
oof = {}
loss = {}
val_loss = {}
accuracy = {}
val_accuracy = {}
MODEL_NAME = 'ModelB3v2_'

fold = StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=SEED)
for _fold, (train_idx, valid_idx) in enumerate(fold.split(df['path'], df['label'])):
    train_set = df.iloc[train_idx]
    valid_set = df.iloc[valid_idx]
    steps_per_epoch = len(train_set)//BATCH_SIZE
    validation_steps = len(valid_set)//BATCH_SIZE
    
    
    print('FOLD {}'.format(_fold))
    print('Train size: {0}, Valid size: {1}'.format(len(train_set), len(valid_set)))
    print('Train class: {}'.format(train_set.groupby('label')['image_id'].count().to_dict()))
    print('Valid class: {}'.format(valid_set.groupby('label')['image_id'].count().to_dict()))
    
    train_ds_batch, valid_ds_batch = create_dataset_batch(train_set, valid_set)
    model = create_model()
    if RETRAIN:
        model.load_weights(WEIGHT_DIR+str(_fold)+'.h5')
    
    checkpoint = callbacks.ModelCheckpoint(SAVE_DIR+MODEL_NAME+str(_fold)+'.h5',
                                           monitor='val_loss',
                                           verbose=1,
                                           save_best_only=True,
                                           save_weights_only=False)
    
    history_fold = model.fit(train_ds_batch,
                             epochs=EPOCHS,
                             steps_per_epoch=steps_per_epoch,
                             validation_data=valid_ds_batch,
                             validation_steps=validation_steps,
                             callbacks=[checkpoint])
    print('.'*80)
    # Save useful metrics and valid_idx per fold
    loss[_fold] = history_fold.history['loss']
    val_loss[_fold] = history_fold.history['val_loss']
    accuracy[_fold] = history_fold.history['sparse_categorical_accuracy']
    val_accuracy[_fold] = history_fold.history['val_sparse_categorical_accuracy']
    oof[_fold] = valid_idx
    # Save metrics for futher analysis
    print('Saving metrics...')
    metrics = (loss, val_loss, accuracy, val_accuracy, oof)
    joblib.dump(metrics, open(SAVE_DIR+'metrics.pkl', 'wb'))
    print()

FOLD 0
Train size: 17117, Valid size: 4280
Train class: {0: 869, 1: 1751, 2: 1909, 3: 10527, 4: 2061}
Valid class: {0: 218, 1: 438, 2: 477, 3: 2631, 4: 516}
Epoch 1/7
Epoch 00001: val_loss improved from inf to 0.37067, saving model to ./ModelB3v2_0.h5
Epoch 2/7
Epoch 00002: val_loss did not improve from 0.37067
Epoch 3/7
Epoch 00003: val_loss improved from 0.37067 to 0.36919, saving model to ./ModelB3v2_0.h5
Epoch 4/7
Epoch 00004: val_loss improved from 0.36919 to 0.36668, saving model to ./ModelB3v2_0.h5
Epoch 5/7
Epoch 00005: val_loss did not improve from 0.36668
Epoch 6/7
Epoch 00006: val_loss did not improve from 0.36668
Epoch 7/7
Epoch 00007: val_loss did not improve from 0.36668
................................................................................
Saving metrics...

FOLD 1
Train size: 17117, Valid size: 4280
Train class: {0: 869, 1: 1751, 2: 1909, 3: 10527, 4: 2061}
Valid class: {0: 218, 1: 438, 2: 477, 3: 2631, 4: 516}
Epoch 1/7
Epoch 00001: val_loss improved from inf

In [11]:
# # Save metrics for futher analysis
# metrics = (loss, val_loss, accuracy, val_accuracy, oof)
# joblib.dump(metrics, open(SAVE_DIR+'metrics2.pkl', 'wb'))