## Imports

In [None]:
!pip install efficientnet

In [None]:
import tensorflow as tf, re, math
import tensorflow_addons as tfa

from tensorflow.keras import layers, optimizers, losses, metrics, callbacks, initializers
from tensorflow.keras import Sequential, Model, Input

import efficientnet.tfkeras as efn

import os
import sys
import glob
import json
import gc

from functools import partial

import random
import numpy as np
import pandas as pd 

from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import LabelEncoder

from tqdm.notebook import tqdm

import matplotlib.pyplot as plt
plt.style.use("ggplot")
%matplotlib inline

import seaborn as sns

import cv2
from PIL import Image

print(f'tensorflow version: {tf.__version__}')
print(f'python version: P{sys.version}')

def seed_everything(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)    
    
seed_everything(42)

## TPU&GPU Check

In [None]:
try:
    TPU = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection. No parameters necessary if TPU_NAME environment variable is set. On Kaggle this is always the case.
    print('Running on TPU ', TPU.master())
except ValueError:
    print('Running on GPU')
    TPU = None

if TPU:
    tf.config.experimental_connect_to_cluster(TPU)
    tf.tpu.experimental.initialize_tpu_system(TPU)
    strategy = tf.distribute.experimental.TPUStrategy(TPU)
else:
    strategy = tf.distribute.get_strategy() # default distribution strategy in Tensorflow. Works on CPU and single GPU.

REPLICAS = strategy.num_replicas_in_sync
print(f'REPLICAS: {REPLICAS}')

AUTO = tf.data.experimental.AUTOTUNE

In [None]:
train = pd.read_csv('../input/sorghum-id-fgvc-9/train_cultivar_mapping.csv')
train = train.dropna().reset_index(drop=True)
display(train.head())

## Parameters

In [None]:
# Input Image Shape
IMG_SIZE = 512
N_CHANNELS = 3
INPUT_SHAPE = (IMG_SIZE, IMG_SIZE, N_CHANNELS)
N_SAMPLES = len(train)

N_EPOCHS = 15

BATCH_SIZE_BASE = 16
BATCH_SIZE = BATCH_SIZE_BASE * REPLICAS

# ImageNet Normalization
IMAGENET_MEAN = tf.constant([0.485, 0.456, 0.406], dtype=tf.float32)
IMAGENET_STD = tf.constant([0.229, 0.224, 0.225], dtype=tf.float32)

CUTOUT = True

AUTO = tf.data.experimental.AUTOTUNE
EPS = tf.keras.backend.epsilon()

val_fold = 0
N_SPLITS = 5

print(f'N_SAMPLES: {N_SAMPLES}, BATCH_SIZE: {BATCH_SIZE}')

## Train CSV

In [None]:
from kaggle_datasets import KaggleDatasets
# You can learn GCS Patsh for every datasets. Sometimes you can get error due to large data (look at Version 1). 
#GCS_DS_PATH = KaggleDatasets().get_gcs_path('sorghum-id-fgvc-9') 

GCS_DS_PATH = 'gs://kds-61791860f9a09f446243cd1f60e8ed71b3743aa148e9c9092cdbdea3'

In [None]:
train["image_path"] = train["image"].apply(lambda image: GCS_DS_PATH + '/train_images/' + image)

encoder = LabelEncoder()
labels2ids = {l: i for (i, l) in enumerate(encoder.fit(train["cultivar"]).classes_)}
ids2labels = {x[1]: x[0] for x in labels2ids.items()}

train["cultivar_id"] = encoder.fit_transform(train["cultivar"])

skf = StratifiedKFold(n_splits=N_SPLITS)
for fold, (_, val_) in enumerate(skf.split(X=train, y=train.cultivar)):
    train.loc[val_, "kfold"] = fold
    
train.head()

## Number of Labels

In [None]:
N_CULTIVAR = train['cultivar'].nunique()
print(f'N_INDIVIDUAL_IDS: {N_CULTIVAR}')

## Augmentation

In [None]:
def data_augment(image, label): 
    if tf.random.uniform([])>0.5 and CUTOUT:
        N_CUTOUT = 1
        for cutouts in range(N_CUTOUT):
            if tf.random.uniform([])>0.5:
                DIM = IMG_SIZE
                CUTOUT_LENGTH = int(DIM * 0.4)
                x1 = tf.cast( tf.random.uniform([],0,DIM-CUTOUT_LENGTH),tf.int32)
                x2 = tf.cast( tf.random.uniform([],0,DIM-CUTOUT_LENGTH),tf.int32)
                filter_ = tf.concat([tf.zeros((x1,CUTOUT_LENGTH)),tf.ones((CUTOUT_LENGTH,CUTOUT_LENGTH)),tf.zeros((DIM-x1-CUTOUT_LENGTH,CUTOUT_LENGTH))],axis=0)
                filter_ = tf.concat([tf.zeros((DIM,x2)),filter_,tf.zeros((DIM,DIM-x2-CUTOUT_LENGTH))],axis=1)
                cutout = tf.reshape(1-filter_,(DIM,DIM,1))
                image = cutout*image
                
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_hue(image, 0.01)
    image = tf.image.random_saturation(image, 0.70, 1.30)
    image = tf.image.random_brightness(image, 0.10)
    return image, label

## Dataset Utils

In [None]:
def decode_image(filename, label=None, image_size=IMG_SIZE):
    bits = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(bits, channels=3)
    image = tf.image.resize(image, [image_size, image_size])
    image = tf.cast(image, tf.float32) / 255.0
    #image = (image - IMAGENET_MEAN) / IMAGENET_STD
    return image, label

def view_image(ds):
    image, label = next(iter(ds)) # extract 1 batch from the dataset
    image = image.numpy()
    label = label.numpy()
    
    fig = plt.figure(figsize=(22, 22))
    for i in range(20):
        ax = fig.add_subplot(4, 5, i+1, xticks=[], yticks=[])
        ax.imshow(image[i])
        ax.set_title(f"Label: {label[i]}")

## Train and Valid Dataset

In [None]:
train_df = train[train.kfold%N_SPLITS != val_fold].reset_index(drop=True)

val_df = train[train.kfold%N_SPLITS == val_fold].reset_index(drop=True)

In [None]:
train_dataset = (
    tf.data.Dataset
    .from_tensor_slices((train_df.image_path, train_df.cultivar_id))
    .map(decode_image, num_parallel_calls=AUTO)
    .map(data_augment, num_parallel_calls = AUTO)
    .repeat()
    .shuffle(1024)
    .batch(BATCH_SIZE)
    .prefetch(AUTO)
)

valid_dataset = (
    tf.data.Dataset
    .from_tensor_slices((val_df.image_path, val_df.cultivar_id))
    .map(decode_image, num_parallel_calls=AUTO)
    .batch(BATCH_SIZE)
    .prefetch(AUTO)
)

In [None]:
view_image(train_dataset)

In [None]:
view_image(valid_dataset)

## Model

In [None]:
def get_model():
    
    tf.config.optimizer.set_jit(True)

    with strategy.scope():
        
        image = Input(INPUT_SHAPE, name='image', dtype=tf.float32)
        backbone = efn.EfficientNetB3(weights = 'noisy-student', include_top = False)(image)
        avg_pool = tf.keras.layers.GlobalAveragePooling2D()(backbone)
        max_pool = tf.keras.layers.GlobalMaxPooling2D()(backbone)
        pretrained_out = tf.keras.layers.Concatenate()([avg_pool, max_pool])
        
        # You can choose one of the pooling layers.
        
        outputs = layers.Dropout(0.25)(pretrained_out)
        
        outputs = layers.Dense(N_CULTIVAR, activation="softmax", name='predictions')(outputs)
        
        model = Model(inputs=image, outputs=outputs)
        
        # OPTIMIZER
        #optimizer = optimizers.Adam()
        optimizer = tfa.optimizers.AdamW(weight_decay=1e-8)
        
        # LOSS
        loss = {
            'predictions': losses.SparseCategoricalCrossentropy(),
        }
 
        # METRICS
        metricsx =[
            metrics.SparseTopKCategoricalAccuracy(k=1, name='top1acc'),
        ]

        # Compile Model
        model.compile(optimizer=optimizer, loss=loss, metrics=metricsx)

        return model

In [None]:
model = get_model()

In [None]:
model.summary()

## Learning Rate Scheduler

In [None]:
class CosineScheduler():
    def __init__(self, max_update, base_lr=0.01, final_lr=0,
               warmup_steps=0, warmup_begin_lr=0):
        self.base_lr_orig = base_lr
        self.max_update = max_update
        self.final_lr = final_lr
        self.warmup_steps = warmup_steps
        self.warmup_begin_lr = warmup_begin_lr
        self.max_steps = self.max_update - self.warmup_steps

    def get_warmup_lr(self, epoch):
        increase = (self.base_lr_orig - self.warmup_begin_lr) \
                       * float(epoch) / float(self.warmup_steps)
        return self.warmup_begin_lr + increase

    def __call__(self, epoch):
        if epoch < self.warmup_steps:
            return self.get_warmup_lr(epoch)
        if epoch <= self.max_update:
            self.base_lr = self.final_lr + (
                self.base_lr_orig - self.final_lr) * (1 + math.cos(
                math.pi * (epoch - self.warmup_steps) / self.max_steps)) / 2
        return self.base_lr

scheduler = CosineScheduler(max_update=N_EPOCHS, base_lr=1e-3, final_lr=1e-4, warmup_steps=3, warmup_begin_lr=1e-4)
plt.plot([scheduler(t) for t in range(N_EPOCHS)])
plt.show()

## Callbacks

In [None]:
model_checkpoint_callback = callbacks.ModelCheckpoint(
    'model_best.h5', 
    monitor='loss', 
    verbose=1, 
    save_best_only=True, 
    save_weights_only=True
)

model_checkpoint_callback.set_model(model)

learning_rate_callback = callbacks.LearningRateScheduler(scheduler, verbose=1)

## Train

In [None]:
STEPS_PER_EPOCH = len(train_df) // BATCH_SIZE

STEPS_PER_EPOCH

In [None]:
history = model.fit(train_dataset,
                    validation_data=valid_dataset,
                    epochs=N_EPOCHS,
                    callbacks=[learning_rate_callback,
                              model_checkpoint_callback,],
                    steps_per_epoch=STEPS_PER_EPOCH
                   )

## Training History


In [None]:
def plot_history_metric(metric, f_best=np.argmax, yscale='linear'):
    x = np.arange(1, len(history.history[metric]) + 1)
    y_train = history.history[metric]
    plt.figure(figsize=(20, 8))
    # TRAIN
    plt.plot(x, y_train, color='tab:blue', lw=3, label='train')
    plt.title(f'Training {metric}', fontsize=24, pad=10)
    plt.ylabel(metric, fontsize=20, labelpad=10)
    plt.xlabel('epoch', fontsize=20, labelpad=10)
    plt.xticks([1] + np.arange(5, N_EPOCHS + 1, 5).tolist(), fontsize=16) # set tick step to 1 and let x axis start at 1
    plt.yticks(fontsize=16)
    plt.yscale(yscale)
    
    # Train Best Marker
    x_best = f_best(y_train)
    y_best = y_train[x_best]
    plt.scatter(x_best + 1, y_best, color='purple', s=100, marker='o', label=f'train best: {y_best:.4f}')
 
    if f'val_{metric}' in history.history:
        y_val = history.history[f'val_{metric}']
       # Validation Best Marker
        plt.plot(x, y_val, color='tab:orange', lw=3, label='validation')
        # VALIDATION
        x_best = f_best(y_val)
        y_best = y_val[x_best]
        plt.scatter(x_best + 1, y_best, color='red', s=100, marker='o', label=f'validation best: {y_best:.4f}')
    
    plt.grid()
    plt.legend(prop={'size': 18})
    plt.show()

In [None]:
plot_history_metric('loss', f_best=np.argmin)

In [None]:
plot_history_metric('top1acc', f_best=np.argmax)

## Prediction

In [None]:
model.load_weights('./model_best.h5')

def predict_on_batch(images):
    return model(images, training=False)

In [None]:
test_df = pd.read_csv('../input/sorghum-id-fgvc-9/sample_submission.csv')

test_df["image_path"] = test_df["filename"].apply(lambda image: GCS_DS_PATH + '/test/' + image)

test_df.head()

In [None]:
test_dataset = (
    tf.data.Dataset
    .from_tensor_slices((test_df.image_path, test_df.filename))
    .map(decode_image, num_parallel_calls=AUTO)                 
    .batch(BATCH_SIZE)
    .prefetch(AUTO)
)

In [None]:
predictions = []
file_names = []

with strategy.scope():
    for (images, image_ids) in (tqdm(test_dataset)):
        preds = predict_on_batch(images)
        preds = np.argmax(preds ,axis=1)
        preds = [ids2labels[x] for x in preds]
        files = [i.decode() for i in image_ids.numpy()]
        
        predictions.extend(preds)
        file_names.extend(files)

## Submission

In [None]:
sub = pd.DataFrame({
    "filename":file_names,
    "cultivar":predictions,
})

sub.to_csv("submission.csv", index=False)
display(sub.head(5))