In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
import random
import sys
import sklearn as sk
from IPython.display import Image, display
from pathlib import Path
from tqdm import tqdm
import pickle
from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
import gc
import math
import re
import joblib
import scipy.sparse

import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras import applications
from tensorflow.keras import layers
from tensorflow.keras import losses
from tensorflow.keras import optimizers
from tensorflow.keras import metrics
from tensorflow.keras import Model
from tensorflow.keras.models import model_from_json

import efficientnet.tfkeras as efn

In [2]:
AUTO = tf.data.experimental.AUTOTUNE

BATCH_SIZE = 8
IMAGE_SIZE = [512, 512]
SEED = 42
VERBOSE = 1
N_CLASSES = 11014

In [3]:
GET_CV = True
CHECK_SUB = False

In [4]:
# Function to read out dataset
def read_dataset():
    if GET_CV:
        # Get train data from preprocess dataset (here we have our target ready)
        df = pd.read_csv('../Dataset/train.csv')
        if CHECK_SUB:
            df = pd.concat([df, df], axis = 0)
            df.reset_index(drop = True, inplace = True)
        image_paths = '../Dataset/train_images/' + df['image']
    else:
        df = pd.read_csv('../Dataset/test.csv')
        image_paths = '../Dataset/test_images/' + df['image']
        
    return df, image_paths

# Function to decode our images
def decode_image(image_data):
    image = tf.image.decode_jpeg(image_data, channels = 3)
    image = tf.image.resize(image, IMAGE_SIZE)
    image = tf.cast(image, tf.float32) / 255.0
    return image

# Function to read our test image and return image
def read_image(image):
    image = tf.io.read_file(image)
    image = decode_image(image)
    return image

# Function to get our dataset that read images
def get_dataset(image):
    dataset = tf.data.Dataset.from_tensor_slices(image)
    dataset = dataset.map(read_image, num_parallel_calls = AUTO)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTO)
    return dataset

In [5]:
class ArcMarginProduct(tf.keras.layers.Layer):
    '''
    Implements large margin arc distance.

    Reference:
        https://arxiv.org/pdf/1801.07698.pdf
        https://github.com/lyakaap/Landmark2019-1st-and-3rd-Place-Solution/
            blob/master/src/modeling/metric_learning.py
    '''
    def __init__(self, n_classes, s=30, m=0.50, easy_margin=False,
                 ls_eps=0.0, **kwargs):

        super(ArcMarginProduct, self).__init__(**kwargs)

        self.n_classes = n_classes
        self.s = s
        self.m = m
        self.ls_eps = ls_eps
        self.easy_margin = easy_margin
        self.cos_m = tf.math.cos(m)
        self.sin_m = tf.math.sin(m)
        self.th = tf.math.cos(math.pi - m)
        self.mm = tf.math.sin(math.pi - m) * m

    def get_config(self):

        config = super().get_config().copy()
        config.update({
            'n_classes': self.n_classes,
            's': self.s,
            'm': self.m,
            'ls_eps': self.ls_eps,
            'easy_margin': self.easy_margin,
        })
        return config

    def build(self, input_shape):
        super(ArcMarginProduct, self).build(input_shape[0])

        self.W = self.add_weight(
            name='W',
            shape=(int(input_shape[0][-1]), self.n_classes),
            initializer='glorot_uniform',
            dtype='float32',
            trainable=True,
            regularizer=None)

    def call(self, inputs):
        X, y = inputs
        y = tf.cast(y, dtype=tf.int32)
        cosine = tf.matmul(
            tf.math.l2_normalize(X, axis=1),
            tf.math.l2_normalize(self.W, axis=0)
        )
        sine = tf.math.sqrt(1.0 - tf.math.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = tf.where(cosine > 0, phi, cosine)
        else:
            phi = tf.where(cosine > self.th, phi, cosine - self.mm)
        one_hot = tf.cast(
            tf.one_hot(y, depth=self.n_classes),
            dtype=cosine.dtype
        )
        if self.ls_eps > 0:
            one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.n_classes

        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.s
        return output

In [20]:
def get_image_embeddings(image_paths):
    embeds = []
    
    margin = ArcMarginProduct(
                n_classes = N_CLASSES, 
                s = 30, 
                m = 0.5, 
                name='head/arc_margin', 
                dtype='float32'
                )
        
    inp = tf.keras.layers.Input(shape = (*IMAGE_SIZE, 3), name = 'inp1')
    label = tf.keras.layers.Input(shape = (), name = 'inp2')
    x = efn.EfficientNetB3(weights = None, include_top = False)(inp)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = margin([x, label])
        
    output = tf.keras.layers.Softmax(dtype='float32')(x)
    
    model = tf.keras.models.Model(inputs = [inp, label], outputs = [output])
    model.load_weights('../EffNetB1_Weights/EfficientNetB3_224_42.h5')
    
    model = tf.keras.models.Model(inputs = model.input[0], outputs = model.layers[-4].output)
    
    chunk = 5000
    iterator = np.arange(np.ceil(len(df) / chunk))
    for j in iterator:
        a = int(j * chunk)
        b = int((j + 1) * chunk)
        image_dataset = get_dataset(image_paths[a:b])
        image_embeddings = model.predict(image_dataset)
        embeds.append(image_embeddings)
    
    image_embeddings = np.concatenate(embeds)
    
    print(f'Our image embeddings shape is {image_embeddings.shape}')
    del embeds
    gc.collect()
    
    return model, image_embeddings

In [21]:
def get_text_embeddings(df, max_features = 15500):
    model = TfidfVectorizer(stop_words = 'english', binary = True, max_features = max_features)
    fitted_tfidf = model.fit(df['title'])
    text_embeddings = model.transform(df['title'])
    del model
    return fitted_tfidf, text_embeddings

In [22]:
df, image_paths = read_dataset()

In [23]:
model, image_embeddings = get_image_embeddings(image_paths)

Model: "model_6"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
inp1 (InputLayer)               [(None, 512, 512, 3) 0                                            
__________________________________________________________________________________________________
efficientnet-b3 (Functional)    (None, None, None, 1 10783528    inp1[0][0]                       
__________________________________________________________________________________________________
global_average_pooling2d_3 (Glo (None, 1536)         0           efficientnet-b3[0][0]            
__________________________________________________________________________________________________
inp2 (InputLayer)               [(None,)]            0                                            
____________________________________________________________________________________________

In [34]:
model.summary()

Model: "model_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inp1 (InputLayer)            [(None, 512, 512, 3)]     0         
_________________________________________________________________
efficientnet-b3 (Functional) (None, None, None, 1536)  10783528  
_________________________________________________________________
global_average_pooling2d_3 ( (None, 1536)              0         
Total params: 10,783,528
Trainable params: 10,696,232
Non-trainable params: 87,296
_________________________________________________________________


In [32]:
model.save('effnetb3_model_tf', save_format='tf')

INFO:tensorflow:Assets written to: effnetb3_model_tf\assets


In [None]:
np.save('image_embeddings', image_embeddings)

In [45]:
tfidf, title_embeddings = get_text_embeddings(df, max_features = 21500)

In [48]:
joblib.dump(tfidf, 'title_tfidf.pkl')

['title_tfidf.pkl']

In [49]:
scipy.sparse.save_npz('title_embeddings.npz', title_embeddings)