<b>This is Training Notebook for Image Encoder</b>

In [None]:
#basics
import numpy as np
import pandas as pd
import os
import sys
import sklearn
import tensorflow as tf
from tensorflow import keras
import math

#modeling
from keras import backend as K
from keras.models import Model
from keras.layers import *
import tensorflow_addons as tfa
from sklearn.model_selection import train_test_split
import transformers

#image preprocessing
from keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
from PIL import Image
import cv2

#visualization
import matplotlib.pyplot as plt
import seaborn as sns

#warnings
import warnings
warnings.filterwarnings('ignore')

SEED = 42

In [None]:
train = pd.read_csv('../input/shopee-product-matching/train.csv')
test = pd.read_csv('../input/shopee-product-matching/test.csv')
sub = pd.read_csv('../input/shopee-product-matching/sample_submission.csv')

# 1. Explaratory Data Analysis

In [None]:
train.info()

all rows are non-null

In [None]:
count_by_label = train.groupby('label_group').size().sort_values(ascending = False).reset_index()
sns.kdeplot(data=count_by_label, x=0)

Most of the products have 1~15 duplicates

There are at least 2 duplicates of each label. therefore we can form image, text triplet

In [None]:
train_jpg_directory = '../input/shopee-product-matching/train_images'
test_jpg_directory = '../input/shopee-product-matching/test_images'

train_image_path = []
for img in train.image:
    train_image_path.append(os.path.join(train_jpg_directory, img))
train['img_path'] = train_image_path

test_image_path = []
for img in test.image:
    test_image_path.append(os.path.join(test_jpg_directory, img))
test['img_path'] = test_image_path

In [None]:
count_by_label

In [None]:
def display_multiple_img(images_paths, rows, cols):
    figure, ax = plt.subplots(nrows=rows,ncols=cols,figsize=(16,8) )
    for ind,image_path in enumerate(images_paths):
        image=cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 
        try:
            ax.ravel()[ind].imshow(image)
            ax.ravel()[ind].set_axis_off()
        except:
            continue;
    plt.tight_layout()
    plt.show()

In [None]:
display_multiple_img(train.loc[train.label_group == 994676122, 'img_path'][:20], 4,5)

In [None]:
display_multiple_img(train.loc[train.label_group == 2141883596, 'img_path'], 1,2)

In [None]:
#drop duplicated images
train = train.drop_duplicates('image')

# 2. Image Embedding Encoder

## 1. Image Data Preprocessing

In [None]:
#mapping label_group into integer values
label_mapper = dict(zip(train['label_group'].unique(), np.arange(len(train['label_group'].unique()))))
label_mapper_inv = dict(zip(np.arange(len(train['label_group'].unique())), train['label_group'].unique()))
train['label_group'] = train['label_group'].map(label_mapper)

In [None]:
#train val split
tr, val = train_test_split(train, random_state = SEED, shuffle = True, test_size = 0.1)
tr.reset_index(drop = True, inplace = True)
val.reset_index(drop = True, inplace = True)

I implemented create_batch function which creates batch of Image Triplets, for your understanding toward Input Image Triplet.

In [None]:
AUTO = tf.data.experimental.AUTOTUNE
IMAGE_SIZE = [384, 384]
BATCH_SIZE = 8

def create_batch(train, batch_size=16):
    x_anchors = np.zeros((batch_size, 384, 384, 3))
    x_positives = np.zeros((batch_size, 384, 384, 3))
    x_negatives = np.zeros((batch_size, 384, 384, 3))
    
    for i in range(0, batch_size):
        # We need to find an anchor, a positive example and a negative example
        random_index = np.random.randint(0, train.shape[0] - 1)
        x_anchor = train.img_path[random_index]
        x_anchor = tf.io.read_file(x_anchor)
        x_anchor = tf.image.decode_jpeg(x_anchor, channels=3)
        x_anchor = tf.image.resize(x_anchor, IMAGE_SIZE)
        x_anchor = x_anchor / 255
        #identifying label of x_anchor
        y = train.label_group[random_index]
        #identifying possible index of x_positive and x_negative
        indices_for_pos = np.where(train.label_group == y)[0]
        indices_for_neg = np.where(train.label_group != y)[0]
        
        #selecting 
        if len(indices_for_pos)==1:
            x_positive = train.img_path[indices_for_pos[0]]
        else:
            x_positive = train.img_path[indices_for_pos[np.random.randint(0, len(indices_for_pos) - 1)]]
        x_positive = tf.io.read_file(x_positive)
        x_positive = tf.image.decode_jpeg(x_positive, channels=3)
        x_positive = tf.image.resize(x_positive, IMAGE_SIZE)
        x_positive = x_positive/255
        
        x_negative = train.img_path[indices_for_neg[np.random.randint(0, len(indices_for_neg) - 1)]]
        x_negative = tf.io.read_file(x_negative)
        x_negative = tf.image.decode_jpeg(x_negative, channels=3)
        x_negative = tf.image.resize(x_negative, IMAGE_SIZE)
        x_negative = x_negative/255
        
        x_anchors[i] = x_anchor
        x_positives[i] = x_positive
        x_negatives[i] = x_negative
        
    return [x_anchors, x_positives, x_negatives]

In [None]:
#example
batch = create_batch(tr)
fig = plt.figure(figsize = (12, 4))
plt.subplot(131)
plt.imshow(batch[0][0])
plt.title('anchor')
plt.subplot(132)
plt.imshow(batch[1][0])
plt.title('positive')
plt.subplot(133)
plt.imshow(batch[2][0])
plt.title('negative')

### Data Generator for Semi-Hard Batch mining

In [None]:
IMAGE_SIZE = [512, 512, 3]
BATCH_SIZE = 8

#src : https://www.kaggle.com/akensert/glret-triplet-semi-hard-loss-with-distributed-tf/notebook
#Data generator for tfa.losses.TripletSemiHardLoss
def _get_transform_matrix(rotation, shear, hzoom, wzoom, hshift, wshift):

    def get_3x3_mat(lst):
        return tf.reshape(tf.concat([lst],axis=0), [3,3])

    # convert degrees to radians
    rotation = math.pi * rotation / 360.
    shear    = math.pi * shear    / 360.

    one  = tf.constant([1],dtype='float32')
    zero = tf.constant([0],dtype='float32')

    c1   = tf.math.cos(rotation)
    s1   = tf.math.sin(rotation)
    rot_mat = get_3x3_mat([c1,    s1,   zero ,
                           -s1,   c1,   zero ,
                           zero,  zero, one ])

    c2 = tf.math.cos(shear)
    s2 = tf.math.sin(shear)
    shear_mat = get_3x3_mat([one,  s2,   zero ,
                             zero, c2,   zero ,
                             zero, zero, one ])

    zoom_mat = get_3x3_mat([one/hzoom, zero,      zero,
                            zero,      one/wzoom, zero,
                            zero,      zero,      one])

    shift_mat = get_3x3_mat([one,  zero, hshift,
                             zero, one,  wshift,
                             zero, zero, one   ])
    return tf.matmul(
        tf.matmul(rot_mat, shear_mat),
        tf.matmul(zoom_mat, shift_mat)
        )
# zoom and shift images
def _spatial_transform(image,
                       rotation=3.0,
                       shear=2.0,
                       hzoom=8.0,
                       wzoom=8.0,
                       hshift=8.0,
                       wshift=8.0):

    ydim = tf.gather(tf.shape(image), 0)
    xdim = tf.gather(tf.shape(image), 1)
    xxdim = xdim % 2
    yxdim = ydim % 2

    # random rotation, shear, zoom and shift
    rotation = rotation * tf.random.normal([1], dtype='float32')
    shear = shear * tf.random.normal([1], dtype='float32')
    hzoom = 1.0 + tf.random.normal([1], dtype='float32') / hzoom
    wzoom = 1.0 + tf.random.normal([1], dtype='float32') / wzoom
    hshift = hshift * tf.random.normal([1], dtype='float32')
    wshift = wshift * tf.random.normal([1], dtype='float32')

    m = _get_transform_matrix(
        rotation, shear, hzoom, wzoom, hshift, wshift)

    # origin pixels
    y = tf.repeat(tf.range(ydim//2, -ydim//2,-1), xdim)
    x = tf.tile(tf.range(-xdim//2, xdim//2), [ydim])
    z = tf.ones([ydim*xdim], dtype='int32')
    idx = tf.stack([y, x, z])
    
    # destination pixels
    idx2 = tf.matmul(m, tf.cast(idx, dtype='float32'))
    idx2 = tf.cast(idx2, dtype='int32')
    # clip to origin pixels range
    idx2y = tf.clip_by_value(idx2[0,], -ydim//2+yxdim+1, ydim//2)
    idx2x = tf.clip_by_value(idx2[1,], -xdim//2+xxdim+1, xdim//2)
    idx2 = tf.stack([idx2y, idx2x, idx2[2,]])

    # apply destinations pixels to image
    idx3 = tf.stack([ydim//2-idx2[0,], xdim//2-1+idx2[1,]])
    d = tf.gather_nd(image, tf.transpose(idx3))
    image = tf.reshape(d, [ydim, xdim, 3])
    return image

#changing brightness of image
def _pixel_transform(image,
                     saturation_delta=0.3,
                     contrast_delta=0.1,
                     brightness_delta=0.2):
    image = tf.image.random_saturation(
        image, 1-saturation_delta, 1+saturation_delta)
    image = tf.image.random_contrast(
        image, 1-contrast_delta, 1+contrast_delta)
    image = tf.image.random_brightness(
        image, brightness_delta)
    return image

#flipping images
def _random_fliplr(image, p=0.25):
    r = tf.random.uniform(())
    mirror_cond = tf.math.less(r, p)
    image = tf.cond(
        mirror_cond,
        lambda: tf.reverse(image, [1]),
        lambda: image
    )
    return image
###########################################Triplet Data Generator##################################
def preprocess_input(image, target_size, augment=True):
    
    image = tf.image.resize(
        image, target_size, method='bilinear')

    image = tf.cast(image, tf.uint8)
    if augment:
        image = _spatial_transform(image)
        image = _random_fliplr(image)
        image = _pixel_transform(image)
    image = tf.cast(image, tf.float32)
    image /= 255.
    return image

def create_triplet_dataset(df, training, batch_size, input_size, K):
    '''
    df: dataset containing label and img_path
    K : number of images which have same label
    '''
    def sample_input(image_paths, label, K):
        image_paths = tf.strings.split(image_paths, sep=',')
        labels = tf.tile([label], [K,])
        if K-len(image_paths) > 0:
            image_paths = tf.random.shuffle(image_paths)
            for i in tf.range(K-len(image_paths)):
                image_paths = tf.concat(
                    [image_paths, [tf.gather(image_paths, i)]], axis=0)
            return image_paths, labels
        idx = tf.argsort(tf.random.uniform(tf.shape(image_paths)))
        idx = tf.gather(idx, tf.range(K))
        image_paths = tf.gather(image_paths, idx)
        return image_paths, labels

    def read_image(image_path):
        image = tf.io.read_file(image_path)
        return tf.image.decode_jpeg(image, channels=3)

    def reshape(x, y):
        x = tf.reshape(x, (-1, *input_size))
        y = tf.reshape(y, (-1,))
        return x, y
    @tf.autograph.experimental.do_not_convert # to silence warning
    def nested(x, y):
        return (tf.data.Dataset.from_tensor_slices((x, y))
                .map(lambda x, y: (read_image(x), y),
                    tf.data.experimental.AUTOTUNE)
                .map(lambda x, y: (preprocess_input(
                        x, input_size[:2], True), y),
                     tf.data.experimental.AUTOTUNE)
                .batch(K))

    image_paths, labels = df.img_path, df.label_group
    dataset = tf.data.Dataset.from_tensor_slices((image_paths, labels))
    if training:
        dataset = dataset.shuffle(10_000)
    dataset = dataset.map(
        lambda x, y: sample_input(x, y, K), tf.data.experimental.AUTOTUNE)
    dataset = dataset.flat_map(nested)
    dataset = dataset.batch(batch_size)
    dataset = dataset.map(lambda x, y: reshape(x, y), tf.data.experimental.AUTOTUNE)
    return dataset

* 1. Data Generator above randomly select 16 images from different Categories.
* 2. And then It augments K images for each category. therefore, there are K * BATCH_SIZE images in a batch.
* 3. TripletSemiHardLoss class automatically conducts semi hard batch mining and selects Image triplets.

In [None]:
train_img_data = create_triplet_dataset(tr[['img_path', 'label_group']],
                                    training = True,
                                    batch_size = BATCH_SIZE,
                                    input_size = IMAGE_SIZE,
                                    K = 4)

val_img_data = create_triplet_dataset(val[['img_path', 'label_group']],
                                    training = True,
                                    batch_size = 16,
                                    input_size = IMAGE_SIZE,
                                    K = 4)

## 2. Triplet EfficientNet for fine-tuning with Triplet Loss Objective

In [None]:
def triplet_net():
    input = Input(shape = IMAGE_SIZE, dtype = tf.float32) #input image
    base_model = keras.applications.Xception
    base_model = base_model(include_top = False, weights = 'imagenet', input_shape = IMAGE_SIZE)
    
    for layer in base_model.layers:
        layer.trainable = False
    
    x = base_model(input)
    pooled = Concatenate(axis = -1)([GlobalMaxPool2D()(x), GlobalAvgPool2D()(x)])
    
    output = Dense(300, name = 'linear_transformation')(pooled) 
    
    model = Model(input, output)
    model.compile(loss = tfa.losses.TripletSemiHardLoss(margin=5.0, distance_metric='L2'),
                  optimizer = keras.optimizers.Adam(learning_rate=0.0001),
                  metrics = tfa.losses.TripletSemiHardLoss(margin=5.0, distance_metric='L2'))
    
    return model

In [None]:
image_model = triplet_net()
image_model.summary()

In [None]:
earlystop = keras.callbacks.EarlyStopping(patience=5)
model_check = keras.callbacks.ModelCheckpoint(    
        filepath="./image_model.h5", 
        monitor='val_loss',  
        save_best_only=True)


history = image_model.fit(train_img_data,
                          verbose=1,
                          steps_per_epoch = len(tr)//BATCH_SIZE,
                          validation_data = val_img_data,
                          validation_steps = len(val)//16,
                          epochs=20,
                          callbacks = [earlystop, model_check])

In [None]:
#unfreeze EfficientNet and Fine Tuning
image_model.layers[3].trainable = True

#recompile model
image_model.compile(loss = tfa.losses.TripletSemiHardLoss(margin=5.0, distance_metric='L2'),
                  optimizer = keras.optimizers.Adam(learning_rate=0.00002),
                  metrics = tfa.losses.TripletSemiHardLoss(margin=5.0, distance_metric='L2'))
    
    
earlystop = keras.callbacks.EarlyStopping(patience=5)

model_check = keras.callbacks.ModelCheckpoint(    
        filepath="./image_model_finetune.h5", 
        monitor='val_loss',  
        save_best_only=True)


history = image_model.fit(train_img_data,
                          verbose=1,
                          steps_per_epoch = len(tr)//BATCH_SIZE,
                          validation_data = val_img_data,
                          validation_steps = 10,
                          epochs=20,
                          callbacks = [earlystop, model_check])

# 3. Saving Image Encoder for Inference

In [None]:
image_model.save('image_encoder_Xception.h5')