Original author: Rakka Alhazimi

To do:
1. Read tfrec file
2. Image Augmentation
3. Build Neural Network
4. Train Model
5. Create Submission

In [None]:
!pip install -U efficientnet

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from keras import backend as K
from efficientnet import keras as efn

from kaggle_datasets import KaggleDatasets

import re, math

In [None]:
tf.__version__

# Detect TPU

In [None]:
# Detect hardware, return appropriate distribution strategy
try:
    # TPU detection. No parameters necessary if TPU_NAME environment variable is set. 
    # On Kaggle this is always the case.
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    print("tpu")
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    # default distribution strategy in Tensorflow. 
    # Works on CPU and single GPU.
    strategy = tf.distribute.get_strategy()

REPLICAS = strategy.num_replicas_in_sync

print("REPLICAS: ", REPLICAS)

# Initial Parameter

In [None]:
BATCH_SIZE = 16 * REPLICAS
EPOCHS = 20

IMAGE_SIZE = (512, 512)

TRAIN_IMG_NUM = 12753
VAL_IMG_NUM = 3712
TEST_IMG_NUM = 7382

STEPS_PER_EPOCHS = TRAIN_IMG_NUM // BATCH_SIZE
STEPS_PER_EPOCHS_FULL = (TRAIN_IMG_NUM + VAL_IMG_NUM) // BATCH_SIZE

AUTO = tf.data.experimental.AUTOTUNE

AugParams = {
    'd1' : 100,
    'd2': 160,
    'rotate' : 45,
    'ratio' : 0.5
}

# Read TFRecord File

You can learn more about TFRecord from [here](https://www.tensorflow.org/tutorials/load_data/tfrecord#tfrecords_format_details)

## Get Filenames

In [None]:
# With TPU enable, we can only read filenames through Google Cloud Storage
GCS_DATA_PATH = KaggleDatasets().get_gcs_path()


def get_filenames(path):
    return tf.io.gfile.glob(GCS_DATA_PATH + path)

train_filenames = get_filenames("/tfrecords-jpeg-{0}x{1}/train/*.tfrec".format(*IMAGE_SIZE))
val_filenames = get_filenames("/tfrecords-jpeg-{0}x{1}/val/*.tfrec".format(*IMAGE_SIZE))
test_filenames = get_filenames("/tfrecords-jpeg-{0}x{1}/test/*.tfrec".format(*IMAGE_SIZE))

## Load Dataset
To open tfrec file, we need to pass the filenames into **tf.data.TFRecordDataset** class

In [None]:
# Create dataset from multiple filenames.
def load_dataset(filenames, ordered=True):
    
    dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads=-1)
    ignore_order = tf.data.Options()
    
    if not ordered:
        ignore_order.experimental_deterministic = False # disable order, more speed
    
    return dataset.with_options(ignore_order)


train_records = load_dataset(train_filenames, False)
val_records = load_dataset(val_filenames, False)
test_records = load_dataset(test_filenames, True)

### Identify tfrecord features
Take one sample from training dataset and parse with tf.train.Example()

In [None]:
# for raw_record in train_records.take(1):
#     example = tf.train.Example()
#     example.ParseFromString(raw_record.numpy())
#     print(example)

## Read raw bytes string
Contents inside tfrec file are bytes format, we need to parse it back into specified features.

In [None]:
def decode_image(raw):
    """Decode parsed bytes string into jpeg format"""
    
    decoded = tf.io.decode_jpeg(raw) 
    image = tf.cast(decoded, tf.float32) / 255. # normalize to 0..1 value
    image = tf.reshape(image, [*IMAGE_SIZE, 3]) # Size require for TPU
    
    return image 

In [None]:
from functools import partial

# Write all known features here, in format "feature": "type"
config = tf.io.FixedLenFeature

feature_train = {"class": config([], tf.int64),
                 "id"   : config([], tf.string),
                 "image": config([], tf.string),}

feature_test = {"id"   : config([], tf.string),
                "image": config([], tf.string),}


def read_tfrecord(example_single, features):
    """Parse raw bytes string from tfrec"""
    
    parsed = tf.io.parse_single_example(example_single, features)
    
    idm = parsed.get("id")
    label = parsed.get("class")
    image = decode_image(parsed.get("image"))
    
    if not features.get("class"):                # Test data didn't have class/label
        return image, idm                        # Return image and id
    
    return image, label                          # Return image and label


# Use functools.partial to set up default args for specific data

# Default arg for train, val data
parse_train = partial(read_tfrecord, features=feature_train) 

# Default arg for test data
parse_test = partial(read_tfrecord, features=feature_test)  

* tf.io.FixedLenFeature : a class to configure the incoming feature
* first arg             : shape [] means single element
* second arg            : tf.int64 means dtype


In [None]:
# Clean dataset, ready to be trained or modified first
train_dataset = train_records.map(parse_train, num_parallel_calls=AUTO)
val_dataset = val_records.map(parse_train, num_parallel_calls=AUTO)
test_dataset = test_records.map(parse_test)

In [None]:
train_dataset

# Plot Random Images

## Create function

In [None]:
def plot_random_image(rows, cols, dataset, shuffle=5000):    
    index = 1
    plt.figure(figsize=(3 * cols, rows * 3))
    for image, label in dataset.shuffle(shuffle).take(rows * cols):
        plt.subplot(rows, cols, index)
        plt.imshow(image.numpy())
        plt.title(label.numpy())
        plt.axis("off")
        index += 1

## Train Dataset

In [None]:
# plot_random_image(2, 5, train_dataset)

## Validation Dataset

In [None]:
# plot_random_image(2, 5, val_dataset)

## Test Dataset

In [None]:
# plot_random_image(2, 5, test_dataset)

# Image Augmentation

## Rotation, Shift, Zoom, Shear

In [None]:
def get_mat(rotation, shear, height_zoom, width_zoom, height_shift, width_shift):
    # returns 3x3 transformmatrix which transforms indicies
        
    # CONVERT DEGREES TO RADIANS
    rotation = math.pi * rotation / 180.
    shear = math.pi * shear / 180.
    
    # ROTATION MATRIX
    c1 = tf.math.cos(rotation)
    s1 = tf.math.sin(rotation)
    one = tf.constant([1],dtype='float32')
    zero = tf.constant([0],dtype='float32')
    rotation_matrix = tf.reshape( tf.concat([c1,s1,zero, -s1,c1,zero, zero,zero,one],axis=0),[3,3] )
        
    # SHEAR MATRIX
    c2 = tf.math.cos(shear)
    s2 = tf.math.sin(shear)
    shear_matrix = tf.reshape( tf.concat([one,s2,zero, zero,c2,zero, zero,zero,one],axis=0),[3,3] )    
    
    # ZOOM MATRIX
    zoom_matrix = tf.reshape( tf.concat([one/height_zoom,zero,zero, zero,one/width_zoom,zero, zero,zero,one],axis=0),[3,3] )
    
    # SHIFT MATRIX
    shift_matrix = tf.reshape( tf.concat([one,zero,height_shift, zero,one,width_shift, zero,zero,one],axis=0),[3,3] )
    
    return K.dot(K.dot(rotation_matrix, shear_matrix), K.dot(zoom_matrix, shift_matrix))



In [None]:
def rot_shift_zoom_shear(image, DIM = IMAGE_SIZE[0]):
    # input image - is one image of size [dim,dim,3] not a batch of [b,dim,dim,3]
    # output - image randomly rotated, sheared, zoomed, and shifted

    XDIM = DIM % 2
    
    rot = 15. * tf.random.normal([1],dtype='float32')
    shr = 5. * tf.random.normal([1],dtype='float32') 
    h_zoom = 1.0 + tf.random.normal([1],dtype='float32')/10.
    w_zoom = 1.0 + tf.random.normal([1],dtype='float32')/10.
    h_shift = 16. * tf.random.normal([1],dtype='float32') 
    w_shift = 16. * tf.random.normal([1],dtype='float32') 
  
    # GET TRANSFORMATION MATRIX
    m = get_mat(rot,shr,h_zoom,w_zoom,h_shift,w_shift) 

    # LIST DESTINATION PIXEL INDICES
    x = tf.repeat( tf.range(DIM//2,-DIM//2,-1), DIM )
    y = tf.tile( tf.range(-DIM//2,DIM//2),[DIM] )
    z = tf.ones([DIM*DIM],dtype='int32')
    idx = tf.stack( [x,y,z] )
    
    # ROTATE DESTINATION PIXELS ONTO ORIGIN PIXELS
    idx2 = K.dot(m,tf.cast(idx,dtype='float32'))
    idx2 = K.cast(idx2,dtype='int32')
    idx2 = K.clip(idx2,-DIM//2+XDIM+1,DIM//2)
    
    # FIND ORIGIN PIXEL VALUES           
    idx3 = tf.stack( [DIM//2-idx2[0,], DIM//2-1+idx2[1,]] )
    d = tf.gather_nd(image,tf.transpose(idx3))
        
    return tf.reshape(d,[DIM,DIM,3])

## Grid Mask

In [None]:
def transform(image, inv_mat, image_shape):

    h, w, c = image_shape
    cx, cy = w//2, h//2

    new_xs = tf.repeat( tf.range(-cx, cx, 1), h)
    new_ys = tf.tile( tf.range(-cy, cy, 1), [w])
    new_zs = tf.ones([h*w], dtype=tf.int32)

    old_coords = tf.matmul(inv_mat, tf.cast(tf.stack([new_xs, new_ys, new_zs]), tf.float32))
    old_coords_x, old_coords_y = tf.round(old_coords[0, :] + w//2), tf.round(old_coords[1, :] + h//2)

    clip_mask_x = tf.logical_or(old_coords_x<0, old_coords_x>w-1)
    clip_mask_y = tf.logical_or(old_coords_y<0, old_coords_y>h-1)
    clip_mask = tf.logical_or(clip_mask_x, clip_mask_y)

    old_coords_x = tf.boolean_mask(old_coords_x, tf.logical_not(clip_mask))
    old_coords_y = tf.boolean_mask(old_coords_y, tf.logical_not(clip_mask))
    new_coords_x = tf.boolean_mask(new_xs+cx, tf.logical_not(clip_mask))
    new_coords_y = tf.boolean_mask(new_ys+cy, tf.logical_not(clip_mask))

    old_coords = tf.cast(tf.stack([old_coords_y, old_coords_x]), tf.int32)
    new_coords = tf.cast(tf.stack([new_coords_y, new_coords_x]), tf.int64)
    rotated_image_values = tf.gather_nd(image, tf.transpose(old_coords))
    rotated_image_channel = list()
    for i in range(c):
        vals = rotated_image_values[:,i]
        sparse_channel = tf.SparseTensor(tf.transpose(new_coords), vals, [h, w])
        rotated_image_channel.append(tf.sparse.to_dense(sparse_channel, default_value=0, validate_indices=False))

    return tf.transpose(tf.stack(rotated_image_channel), [1,2,0])

def random_rotate(image, angle, image_shape):

    def get_rotation_mat_inv(angle):
          #transform to radian
        angle = math.pi * angle / 180

        cos_val = tf.math.cos(angle)
        sin_val = tf.math.sin(angle)
        one = tf.constant([1], tf.float32)
        zero = tf.constant([0], tf.float32)

        rot_mat_inv = tf.concat([cos_val, sin_val, zero,
                                     -sin_val, cos_val, zero,
                                     zero, zero, one], axis=0)
        rot_mat_inv = tf.reshape(rot_mat_inv, [3,3])

        return rot_mat_inv
    angle = float(angle) * tf.random.normal([1],dtype='float32')
    rot_mat_inv = get_rotation_mat_inv(angle)
    return transform(image, rot_mat_inv, image_shape)


def GridMask(image_height, image_width, d1, d2, rotate_angle=1, ratio=0.5):

    h, w = image_height, image_width
    hh = int(np.ceil(np.sqrt(h*h+w*w)))
    hh = hh+1 if hh%2==1 else hh
    d = tf.random.uniform(shape=[], minval=d1, maxval=d2, dtype=tf.int32)
    l = tf.cast(tf.cast(d,tf.float32)*ratio+0.5, tf.int32)

    st_h = tf.random.uniform(shape=[], minval=0, maxval=d, dtype=tf.int32)
    st_w = tf.random.uniform(shape=[], minval=0, maxval=d, dtype=tf.int32)

    y_ranges = tf.range(-1 * d + st_h, -1 * d + st_h + l)
    x_ranges = tf.range(-1 * d + st_w, -1 * d + st_w + l)

    for i in range(0, hh//d+1):
        s1 = i * d + st_h
        s2 = i * d + st_w
        y_ranges = tf.concat([y_ranges, tf.range(s1,s1+l)], axis=0)
        x_ranges = tf.concat([x_ranges, tf.range(s2,s2+l)], axis=0)

    x_clip_mask = tf.logical_or(x_ranges <0 , x_ranges > hh-1)
    y_clip_mask = tf.logical_or(y_ranges <0 , y_ranges > hh-1)
    clip_mask = tf.logical_or(x_clip_mask, y_clip_mask)

    x_ranges = tf.boolean_mask(x_ranges, tf.logical_not(clip_mask))
    y_ranges = tf.boolean_mask(y_ranges, tf.logical_not(clip_mask))

    hh_ranges = tf.tile(tf.range(0,hh), [tf.cast(tf.reduce_sum(tf.ones_like(x_ranges)), tf.int32)])
    x_ranges = tf.repeat(x_ranges, hh)
    y_ranges = tf.repeat(y_ranges, hh)

    y_hh_indices = tf.transpose(tf.stack([y_ranges, hh_ranges]))
    x_hh_indices = tf.transpose(tf.stack([hh_ranges, x_ranges]))

    y_mask_sparse = tf.SparseTensor(tf.cast(y_hh_indices, tf.int64),  tf.zeros_like(y_ranges), [hh, hh])
    y_mask = tf.sparse.to_dense(y_mask_sparse, 1, False)

    x_mask_sparse = tf.SparseTensor(tf.cast(x_hh_indices, tf.int64), tf.zeros_like(x_ranges), [hh, hh])
    x_mask = tf.sparse.to_dense(x_mask_sparse, 1, False)

    mask = tf.expand_dims( tf.clip_by_value(x_mask + y_mask, 0, 1), axis=-1)

    mask = random_rotate(mask, rotate_angle, [hh, hh, 1])
    mask = tf.image.crop_to_bounding_box(mask, (hh-h)//2, (hh-w)//2, image_height, image_width)

    return mask

def apply_grid_mask(image, image_shape):
    mask = GridMask(image_shape[0],
                    image_shape[1],
                    AugParams['d1'],
                    AugParams['d2'],
                    AugParams['rotate'],
                    AugParams['ratio'])
    
    if image_shape[-1] == 3:
        mask = tf.concat([mask, mask, mask], axis=-1)

    return image * tf.cast(mask, tf.float32)

# Random Blackout

In [None]:
def random_blockout(img, sl=0.1, sh=0.2, rl=0.4):

    h, w, c = IMAGE_SIZE[0], IMAGE_SIZE[1], 3
    origin_area = tf.cast(h*w, tf.float32)

    e_size_l = tf.cast(tf.round(tf.sqrt(origin_area * sl * rl)), tf.int32)
    e_size_h = tf.cast(tf.round(tf.sqrt(origin_area * sh / rl)), tf.int32)

    e_height_h = tf.minimum(e_size_h, h)
    e_width_h = tf.minimum(e_size_h, w)

    erase_height = tf.random.uniform(shape=[], minval=e_size_l, maxval=e_height_h, dtype=tf.int32)
    erase_width = tf.random.uniform(shape=[], minval=e_size_l, maxval=e_width_h, dtype=tf.int32)

    erase_area = tf.zeros(shape=[erase_height, erase_width, c])
    erase_area = tf.cast(erase_area, tf.uint8)

    pad_h = h - erase_height
    pad_top = tf.random.uniform(shape=[], minval=0, maxval=pad_h, dtype=tf.int32)
    pad_bottom = pad_h - pad_top

    pad_w = w - erase_width
    pad_left = tf.random.uniform(shape=[], minval=0, maxval=pad_w, dtype=tf.int32)
    pad_right = pad_w - pad_left

    erase_mask = tf.pad([erase_area], [[0,0],[pad_top, pad_bottom], [pad_left, pad_right], [0,0]], constant_values=1)
    erase_mask = tf.squeeze(erase_mask, axis=0)
    erased_img = tf.multiply(tf.cast(img,tf.float32), tf.cast(erase_mask, tf.float32))

    return tf.cast(erased_img, img.dtype)

In [None]:
def augmentation(image, label):
    
    prob = tf.random.uniform(shape=[], minval=0.0, maxval=1.0)
    
    if prob <= 0.25: image = apply_grid_mask(image, (*IMAGE_SIZE,3))
    elif prob <= 0.50: image = rot_shift_zoom_shear(image)
    elif prob <= 0.75: image = random_blockout(image)
    else: pass
    
    return tf.cast(image, tf.float32), label

In [None]:
# Show augmented images
rows, cols = 4, 6
plt.figure(figsize=(2.9 * cols, rows * 2.9))

for row, element in zip(range(rows), train_dataset):
    one_element = tf.data.Dataset.from_tensors(element).repeat()
    for col, (image, _) in zip(range(cols), one_element.map(augmentation, ).as_numpy_iterator()):
        plt.subplot(rows, cols, row * cols + col + 1)
        plt.axis('off')
        plt.imshow(image)

# Learning Rate Scheduler

In [None]:
lr_start = 5e-4
lr_min = 5e-6
epoch_decay = 4

def scheduler(epoch, lr):
    result = max(lr_min, lr * tf.math.exp(-0.2))
    return tf.constant(result)

# Plot learning rate scheduler
lr_show = []
previous = lr_start
for i in range(EPOCHS):
    current = scheduler(i, previous)
    lr_show.append(current)
    previous = current

lr_show = list(map(lambda x: x.numpy(), lr_show))

plt.plot(range(EPOCHS), lr_show);

# Image Classification with Pretrained CNN

## Train model with only training data

In [None]:
train_input = (
    train_dataset
        .repeat()
        .map(augmentation, num_parallel_calls=AUTO)
        .shuffle(2048)
        .batch(BATCH_SIZE)
        .prefetch(AUTO)
)

val_input = (
    val_dataset
        .batch(BATCH_SIZE)
        .cache()
)

test_input = (
    test_dataset.batch(BATCH_SIZE)
    .prefetch(AUTO)
                 
)

In [None]:
# Params for Pretrained CNN
params = {
    "include_top":False, 
    "input_shape":[*IMAGE_SIZE, 3], 
    "classes":104,
    "pooling":"avg"}

def create_dnet():
    model = keras.applications.DenseNet201(**params)
    compiler = re.compile(r"conv5_block")

    for layer in model.layers:
        if not compiler.search(layer.name):
            layer.trainable = False # transfer learning

    return model

def create_efn():
    model = efn.EfficientNetB6(**params)
    compiler = re.compile(r"block7")

    for layer in model.layers:
        if not compiler.search(layer.name):
            layer.trainable = False # transfer learning

    return model

# Merge two pretrained CNN

In [None]:
# Build neural network using pretrained CNN
loss = "sparse_categorical_crossentropy"
metric = "sparse_categorical_accuracy"

with strategy.scope():

    efficient = create_efn()
    dense_net = create_dnet()

    input_image = keras.Input(shape=[*IMAGE_SIZE, 3])
    efficient = efficient(input_image)
    dense_net = dense_net(input_image)

    dropout_1 = keras.layers.Dropout(0.3)(efficient)
    dropout_2 = keras.layers.Dropout(0.3)(dense_net)
    
    concat = keras.layers.concatenate([dropout_1, dropout_2])
    
    dense_2 = keras.layers.Dense(104, activation="softmax")(concat)

    merged_model = keras.Model(input_image, dense_2)

    merged_model.compile(optimizer=keras.optimizers.Adam(lr_start), 
                         loss=loss,
                         metrics=[metric])
    
    full_model = merged_model

In [None]:
keras.utils.plot_model(merged_model, "merged.png")

In [None]:
# Setup Callbacks
early_stop = keras.callbacks.EarlyStopping(monitor=metric, patience=5)

check_point = keras.callbacks.ModelCheckpoint(
                                    filepath="best_model.h5",
                                    monitor="val_sparse_categorical_accuracy",
                                    save_best_only=True)

learning_schedule = keras.callbacks.LearningRateScheduler(scheduler)


callbacks_list = [early_stop, check_point, learning_schedule]

In [None]:
# # Fit with training data, validate with validation data

# history = merged_model.fit(train_input,
#                             epochs=EPOCHS,
#                             steps_per_epoch=STEPS_PER_EPOCHS,
#                             callbacks=callbacks_list,
#                             validation_data=val_input,)

In [None]:
# train_acc = history.history["sparse_categorical_accuracy"] 
# val_acc = history.history["val_sparse_categorical_accuracy"]
# epochs = list(range(EPOCHS))

# plt.figure()
# plt.plot(epochs, train_acc, label="training acc")
# plt.plot(epochs, val_acc, label="validaiton acc")
# plt.legend();

## Utilize Both Train and Validation Dataset

In [None]:
 full_input = (
    train_dataset.concatenate(val_dataset)
        .repeat()
        .map(augmentation, num_parallel_calls=AUTO)
        .shuffle(2048)
        .batch(BATCH_SIZE)
        .prefetch(AUTO)
)

In [None]:
# # Change model check point to "sparse_accuracy"
check_point.monitor = metric

history = full_model.fit(full_input,
                         epochs=EPOCHS,
                         steps_per_epoch=STEPS_PER_EPOCHS_FULL,
                         callbacks=callbacks_list,
        )

In [None]:
best_model = keras.models.load_model("./best_model.h5")

In [None]:
best_weights = best_model.get_weights()

# use merged or full model, the weight will be same anyway
merged_model.set_weights(best_weights)

In [None]:
merged_model

# Test Time Augmentation (TTA)

In [None]:
def aug_gridmask(image):
    image = apply_grid_mask(image, (*IMAGE_SIZE, 3))
    
    return tf.cast(image, tf.float32)

def aug_rot(image):
    image = rot_shift_zoom_shear(image)
    
    return tf.cast(image, tf.float32)

def aug_blockout(image):
    image = random_blockout(image)
    
    return tf.cast(image, tf.float32)

def aug_none(image):
    return image

aug_list = [aug_gridmask, aug_rot, aug_blockout, aug_none]

In [None]:
def predict_tta(model, test_image, aug_list=aug_list):
    probs = []
    
    for index, aug in enumerate(aug_list):
        copy_image = (
            test_image.unbatch()
            .map(aug)
            .batch(BATCH_SIZE))
        
        probs.append(model.predict(copy_image))
    
    return np.mean(probs, axis=0)

In [None]:
test_image = test_input.map(lambda image, _id: image, num_parallel_calls=AUTO)
test_id = test_input.map(lambda image, _id: _id).unbatch().batch(TEST_IMG_NUM)
image_id = next(iter(test_id)).numpy().astype("U")

# probabilities = merged_model.predict(test_image)
probabilities = predict_tta(merged_model, test_image)
predictions = np.argmax(probabilities, axis=-1)

In [None]:
submission = pd.DataFrame({"id": image_id, "label": predictions})
submission.to_csv("submission.csv", index=False)

In [None]:
submission