### Import all libraries

In [1]:
!pip install --quiet ../input/keras-efficientnet-whl/Keras_Applications-1.0.8-py3-none-any.whl
!pip install --quiet ../input/keras-efficientnet-whl/efficientnet-1.1.1-py3-none-any.whl

import sys
sys.path.append('../input/nfnets-keras')

import os
import math
import re
import random
import tensorflow as tf
import tensorflow_addons as tfa
import numpy as np
import tensorflow.keras.backend as K
import efficientnet.keras as efn
import efficientnet
import itertools
import matplotlib
import scipy
import pandas as pd
import sklearn
from matplotlib import pyplot as plt
from datetime import datetime
from functools import partial
from kaggle_datasets import KaggleDatasets
import pickle
from collections import Counter
from sklearn.preprocessing import StandardScaler
## for bert language model
import transformers
from transformers import TFAutoModel, AutoTokenizer
from transformers import RobertaTokenizer, TFRobertaModel
from nfnet import NFNet, nfnet_params

import cudf
import cuml
import cupy
from cuml.feature_extraction.text import TfidfVectorizer
from cuml.neighbors import NearestNeighbors

from nltk.corpus import stopwords
import string
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization

import gc

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

### Memory adjustment

In [2]:
# RESTRICT TENSORFLOW TO 2GB OF GPU RAM
# SO THAT WE HAVE 14GB RAM FOR RAPIDS
LIMIT = 5.0
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_virtual_device_configuration(
            gpus[0],
            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024*LIMIT)])
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        #print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        print(e)
print('We will restrict TensorFlow to max %iGB GPU RAM'%LIMIT)
print('then RAPIDS can use %iGB GPU RAM'%(16-LIMIT))

AUTO = tf.data.experimental.AUTOTUNE
BATCH_SIZE = 32
IMAGE_SIZE = (380, 380)

We will restrict TensorFlow to max 5GB GPU RAM
then RAPIDS can use 11GB GPU RAM


### Help functions for text preprocessing

In [3]:
# Preprocessing function helper
# replace word that concatenate with other word
def remove_concatenate_2_words(text):
    list_words = ['khusus']
    for w in list_words:
        text = text.replace(w, '')
    return text

PUNCT_TO_REMOVE = string.punctuation
def remove_punctuation(text):
    return text.translate(str.maketrans('', '', PUNCT_TO_REMOVE))

STOPWORDS_ID = set(stopwords.words('indonesian'))
STOPWORDS_EN = set(stopwords.words('english'))
def remove_stopwords(list_text):
    text_not_in_ID = [word for word in list_text if word not in STOPWORDS_EN]
    text = [word for word in text_not_in_ID if word not in STOPWORDS_ID]
    return text

# remove big number and split text that contains word and number
def remove_big_number(list_text):
    words = []
    for w in list_text:
        sub_w = re.split('(\d+)',w)
        for item in sub_w:
            try:
                tmp = int(item)
                if tmp < 7000:
                    if (tmp>1000) and (tmp % 100 == 0): # for even number
                        words.append(str(tmp))
                    elif (tmp<=1000) and (tmp>100) and (tmp % 10 == 0 ):
                        words.append(str(tmp))
                    elif (tmp<=100) and (tmp % 2 == 0):
                        words.append(str(tmp))
            except:
                words.append(item)
    return words

def remove_zero_val(list_text):
    return [w for w in list_text if w not in ['0']]

def remove_common_words(list_text):
    common_words = "hari keren kere kw super baik jual jualan quality best free  kwalitas berkualitas kualitas bagus terbaik kembali dijamin beli gratis murah free diskon ongkir cek berkualitas original asli kualitas uang jaminan jamin terjamin buatan buat kirim wilayah luar kota jawa bali jakarta surabaya bulan month year day tahun hari harian anda your nikmat singapore malaysia indonesia vietnam thailand filipina bangkok jepang buy one get dapat dua two satu meriah kirim send pengiriman paket hemat uang kembali dapat guarantee buatan lokal dalam internasional karya termurah paling murah terbaik cheap murah biaya".split(' ')
    return [w for w in list_text if w not in common_words]

def remove_strange_words(list_text):
    strange_words = ['aaa', 'aaaa', 'aaaaa', 'abc', 'abcd', 'bb', 'bbb', 'bbbb', 'ccc', 'cccc', 'thn', 'th', 'bln']
    return [w for w in list_text if w not in strange_words]

def text_vectorizer(max_features, max_len, vocab):
    # max_features: Maximum vocab size.
    # max_len: Sequence length to pad the outputs to.
    
    text_dataset = tf.data.Dataset.from_tensor_slices(vocab)
    
    # Create the layer.
    vectorize_layer = TextVectorization(
        max_tokens = max_features,
        output_mode = 'int',
        output_sequence_length = max_len
    )

    vectorize_layer.adapt(text_dataset.batch(64))

    model = tf.keras.models.Sequential()
    model.add(tf.keras.Input(shape=(1,), dtype=tf.string))
    model.add(vectorize_layer)
    return model

In [4]:
def utils_preprocess_text(text, flg_stemm=False, flg_lemm=True, lst_stopwords=None):
    ## clean (convert to lowercase and remove punctuations and characters and then strip
    text = re.sub(r'[^\w\s]', '', str(text).lower().strip())
            
    ## Tokenize (convert from string to list)
    lst_text = text.split()
    ## remove Stopwords
    if lst_stopwords is not None:
        for stopwords in lst_stopwords:
            lst_text = [word for word in lst_text if word not in 
                        stopwords]
                
    ## Stemming (remove -ing, -ly, ...)
    if flg_stemm == True:
        # english stemming
        ps = nltk.stem.porter.PorterStemmer()
        lst_text = [ps.stem(word) for word in lst_text]
        
        # indonesian stemming
#         factory = StemmerFactory()
#         id_stemmer = factory.create_stemmer()

#         lst_text = [id_stemmer.stem(word) for word in lst_text]
                
    ## Lemmatisation (convert the word into root word)
    if flg_lemm == True:
        lem = nltk.stem.wordnet.WordNetLemmatizer()
        lst_text = [lem.lemmatize(word) for word in lst_text]
        
    # remove_zero_val
    lst_text = [w for w in lst_text if w not in ['0']]
    
    # remove strange words
    strange_words = ['aaa', 'aaaa', 'aaaaa', 'abc', 'abcd', 'bb', 'bbb', 'bbbb', 'ccc', 'cccc', 'thn', 'th', 'bln']
    lst_text = [w for w in lst_text if w not in strange_words]
            
    ## back to string from list
    text = " ".join(lst_text)
    return text

def string_escape(s, encoding='utf-8'):
    return (
        s.encode('latin1')  # To bytes, required by 'unicode-escape'
        .decode('unicode-escape')  # Perform the actual octal-escaping decode
        .encode('latin1')  # 1:1 mapping back to bytes
        .decode(encoding)
    )  # Decode original encoding

def regular_encode(texts, tokenizer, maxlen=512):
    enc_di = tokenizer.batch_encode_plus(
        texts, 
#         add_special_tokens = True,
        return_attention_mask = True,
        return_token_type_ids=True,
        pad_to_max_length=True,
        max_length=maxlen
        )
    
    return np.array(enc_di['input_ids']), np.array(enc_di['attention_mask'])

### Title preprocessing (train vocab)

In [5]:
# extract vocab from train data
df = pd.read_csv('../input/shopee-product-matching/train.csv')
    
df['title'] = df['title'].apply(lambda x: string_escape(x))
df['title'] = df['title'].apply(lambda x: remove_concatenate_2_words(x))
df['title'] = df['title'].str.lower()
df['title'] = df['title'].apply(lambda x: remove_punctuation(x))
df['title'] = df['title'].apply(lambda x: str(x).split())
df['title'] = df['title'].apply(lambda x: remove_stopwords(x))
# df['title'] = df['title'].apply(lambda x: remove_big_number(x))
df['title'] = df['title'].apply(lambda x: remove_zero_val(x))
df['title'] = df['title'].apply(lambda x: remove_common_words(x))
df['title'] = df['title'].apply(lambda x: remove_strange_words(x))
df['title'] = df['title'].apply(lambda x: list(np.unique(x)))

# title vocab
words = list(df['title'])
train_vocab = list(np.unique(np.concatenate(words)))

### Load test dataset

In [6]:
GET_CV = False
CHECK_RAM = False

if GET_CV:
    if CHECK_RAM:
        df = pd.read_csv('../input/shopee-product-matching/train.csv')
        df = pd.concat([df,df], axis=0)
    else:
        df = pd.read_parquet('../input/shopee-tfrecords-380-gkf-four-folds/fold_3/unseen.parquet', engine='pyarrow')
else:
    df = pd.read_csv('../input/shopee-product-matching/test.csv')

### Preprocessing for MLP

In [7]:
################ for Image + MLP ################
# title preprocessing for test dataset
df['tmp'] = df['title'].apply(lambda x: string_escape(x))
df['tmp'] = df['tmp'].apply(lambda x: remove_concatenate_2_words(x))
df['tmp'] = df['tmp'].str.lower()
df['tmp'] = df['tmp'].apply(lambda x: remove_punctuation(x))
df['tmp'] = df['tmp'].apply(lambda x: str(x).split())
df['tmp'] = df['tmp'].apply(lambda x: remove_stopwords(x))
# df['tmp'] = df['tmp'].apply(lambda x: remove_big_number(x))
df['tmp'] = df['tmp'].apply(lambda x: remove_zero_val(x))
df['tmp'] = df['tmp'].apply(lambda x: remove_common_words(x))
df['tmp'] = df['tmp'].apply(lambda x: remove_strange_words(x))
df['tmp'] = df['tmp'].apply(lambda x: list(np.unique(x)))

# title vocab
words = list(df['tmp'])
words = list(np.unique(np.concatenate(words)))
words = train_vocab + words

# Text vectorizer
model = text_vectorizer(max_features = 25000, max_len = 100, vocab = words)
list_text = [' '.join(x) for x in df['tmp']]
title_vec = model.predict(list_text)
df['title_vec'] = list(title_vec)
df['input_ids'] = None
df['att_mask'] = None

del words, model, list_text, title_vec, df['tmp'], train_vocab
gc.collect()
df.head()

Unnamed: 0,posting_id,image,image_phash,title,title_vec,input_ids,att_mask
0,test_2255846744,0006c8e5462ae52167402bac1c2e916e.jpg,ecc292392dc7687a,Edufuntoys - CHARACTER PHONE ada lampu dan mus...,"[21, 20, 13, 11, 7, 6, 2, 0, 0, 0, 0, 0, 0, 0,...",,
1,test_3588702337,0007585c4d0f932859339129f709bfdc.jpg,e9968f60d2699e2c,(Beli 1 Free Spatula) Masker Komedo | Blackhea...,"[24, 23, 22, 19, 17374, 18, 15, 10, 9, 3, 0, 0...",,
2,test_4015706929,0008377d3662e83ef44e1881af38b879.jpg,ba81c17e3581cabe,READY Lemonilo Mie instant sehat kuah dan goreng,"[17, 16, 14, 12, 8, 5, 4, 0, 0, 0, 0, 0, 0, 0,...",,


### Help functions for datasets

In [8]:
def getMetric(col):
    def f1score(row):
        n = len( np.intersect1d(row.target,row[col]) )
        return 2*n / (len(row.target)+len(row[col]))
    return f1score

def read_dataset(df):
    if GET_CV:
        image_paths = '/kaggle/input/shopee-product-matching/train_images/' + df['image']
    else:
        image_paths = '/kaggle/input/shopee-product-matching/test_images/' + df['image']
    input_ids = np.stack(df['input_ids'], axis=0)
    att_mask = np.stack(df['att_mask'], axis=0)
    title_vec = np.stack(df['title_vec'], axis=0)
    return image_paths, input_ids, att_mask, title_vec

def decode_image(image, img_size):
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, img_size)
    image = tf.cast(image, tf.float32) / 255.0
    return tf.reshape(image, [*img_size, 3])

# Function to read our test image and return image
def read_image(filename, title_vec):
    image = tf.io.read_file(filename)
    image = decode_image(image, IMAGE_SIZE)
    if ONLY_IMAGE:
        return (image), np.empty((0), dtype=int)
    else:
        return (image, title_vec), np.empty((0), dtype=int)

# Function to get our dataset that read images
def get_dataset(image, title_vec):
    dataset = tf.data.Dataset.from_tensor_slices((image, title_vec))
    dataset = dataset.map(read_image, num_parallel_calls = AUTO)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTO)
    return dataset

def get_text_dataset(input_ids, att_mask, title_vec):
    dataset = tf.data.Dataset.from_tensor_slices((input_ids, att_mask, title_vec))
    dataset = dataset.map(lambda x,y,z: ((x,y,z), ()), num_parallel_calls = AUTO)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTO)
    return dataset

### Import library for Deep Learning Model

In [9]:
from tensorflow.keras.models import Model, Sequential
import keras.backend as K
from keras.optimizers import SGD
from tensorflow.keras.layers import (
    Input, 
    Flatten, 
    Dense, 
    Dropout, 
    AveragePooling2D, 
    GlobalAveragePooling2D, 
    SpatialDropout2D, 
    BatchNormalization, 
    Activation, 
    Concatenate,
    Embedding,
    GlobalAveragePooling1D,
    Lambda
)

from keras.backend import sigmoid
def swish(x, beta = 1):
    return (x * sigmoid(beta * x))

from keras.utils.generic_utils import get_custom_objects
from keras.layers import Activation

get_custom_objects().update({'swish': Activation(swish)})

from keras.layers import Dense, Input, LSTM, Embedding, Dropout, Activation, multiply, Reshape
from keras.layers import Bidirectional, GlobalMaxPool1D,Bidirectional, Conv1D, GlobalMaxPooling1D, Conv2D
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications import ResNet50, InceptionResNetV2, Xception

### Arc margin implementation

In [10]:
class ArcMarginProduct(keras.layers.Layer):
    '''
    Implements large margin arc distance.

    Reference:
        https://arxiv.org/pdf/1801.07698.pdf
        https://github.com/lyakaap/Landmark2019-1st-and-3rd-Place-Solution/
            blob/master/src/modeling/metric_learning.py
    '''
    def __init__(self, n_classes, s=30, m=0.50, easy_margin=False,
                 ls_eps=0.0, **kwargs):

        super(ArcMarginProduct, self).__init__(**kwargs)

        self.n_classes = n_classes
        self.s = s
        self.m = m
        self.ls_eps = ls_eps
        self.easy_margin = easy_margin
        self.cos_m = tf.math.cos(m)
        self.sin_m = tf.math.sin(m)
        self.th = tf.math.cos(math.pi - m)
        self.mm = tf.math.sin(math.pi - m) * m

    def get_config(self):

        config = super().get_config().copy()
        config.update({
            'n_classes': self.n_classes,
            's': self.s,
            'm': self.m,
            'ls_eps': self.ls_eps,
            'easy_margin': self.easy_margin,
        })
        return config

    def build(self, input_shape):
        super(ArcMarginProduct, self).build(input_shape[0])

        self.W = self.add_weight(
            name='W',
            shape=(int(input_shape[0][-1]), self.n_classes),
            initializer='glorot_uniform',
            dtype='float32',
            trainable=True,
            regularizer=None)

    def call(self, inputs):
        X, y = inputs
        y = tf.cast(y, dtype=tf.int32)
        cosine = tf.matmul(
            tf.math.l2_normalize(X, axis=1),
            tf.math.l2_normalize(self.W, axis=0)
        )
        sine = tf.math.sqrt(1.0 - tf.math.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = tf.where(cosine > 0, phi, cosine)
        else:
            phi = tf.where(cosine > self.th, phi, cosine - self.mm)
        one_hot = tf.cast(
            tf.one_hot(y, depth=self.n_classes),
            dtype=cosine.dtype
        )
        if self.ls_eps > 0:
            one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.n_classes

        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.s
        return output

### Deep learning models (Image + MLP for title)

In [11]:
def effb1(weights='noisy-student'):
    efn1 = efn.EfficientNetB1(weights=weights, include_top=False, input_shape=(*IMAGE_SIZE, 3))
    
    for layer in efn1.layers:
        layer.trainable = False
    
    model_image = Sequential([
        efn1,
        GlobalAveragePooling2D(name='effb1-pooling'),
        BatchNormalization(name='effb1_bn1'),
        Dropout(0.2),
        Dense(2400, name='effb1_dense1'),
        Activation('swish', name='effb1_act1'),
    ], name='effb1-img')
    
    eff_aux = Model(
        inputs = efn1.input, 
        outputs = efn1.get_layer('block5b_activation').output)
    aux_model = Sequential([
        eff_aux,
        Conv2D(128, kernel_size=(3, 3), activation='relu', name='aux-conv1'),
        BatchNormalization(name='aux-bn1'),
        GlobalAveragePooling2D(name='aux-pooling'),
        # Flatten(),
        Dropout(0.5),
        Dense(1024, name='aux-dense1'),
        Activation('swish'),
    ], name='aux-model')
    
    model_title = Sequential([
        Input(shape=(100,), name='title-input'),
        Embedding(25000, 70, input_length=100, name='title-embed'),
#         GlobalAveragePooling1D(name='title-pooling'),
        Flatten(name='title-flatten'),
        BatchNormalization(name='title-bn1'),
        Dropout(0.2),
        Dense(650, name='title-dense1'), #650 -> 0.81
        Activation('swish', name='title-act1'),
#         Dropout(0.1),
        
        Dense(650, name='title-dense2'),
        BatchNormalization(name='title-bn2'),
        Activation('swish', name='title-act2'),
    ], name='title-vec')
    
    margin = ArcMarginProduct(
        n_classes = CLASSES, 
        s = 30, 
        m = 0.7, 
        name='head/arc_margin', 
        dtype='float32'
    )

    concatenate = Concatenate(name='concatenate')([model_image.output, model_title.output])
    concatenate2 = Concatenate(name='concatenate2')([concatenate, aux_model.output])
    label = Input(shape=(), name='arc-input')
    arc_face = margin([concatenate2, label])
    output = Dense(CLASSES, activation='softmax', name='output')(arc_face)
    
    model = Model(inputs=[model_image.input, aux_model.input, model_title.input, label], outputs=output)
    return model

def effb1_512_v2(weights='noisy-student'):
    efn1 = efn.EfficientNetB1(weights=weights, include_top=False, input_shape=(*IMAGE_SIZE, 3))
    
    for layer in efn1.layers:
        layer.trainable = False
    
    model_image = Sequential([
        efn1,
        GlobalAveragePooling2D(name='effb1-pooling'),
        BatchNormalization(name='effb1_bn1'),
        Dropout(0.2),
        Dense(2400, name='effb1_dense1'),
        Activation('swish', name='effb1_act1'),
    ], name='effb1-img')
    
    eff_aux = Model(
        inputs = efn1.input, 
        outputs = efn1.get_layer('block5b_activation').output)
    aux_model = Sequential([
        eff_aux,
        Conv2D(128, kernel_size=(3, 3), activation='relu', name='aux-conv1'),
        BatchNormalization(name='aux-bn1'),
        GlobalAveragePooling2D(name='aux-pooling'),
        Dropout(0.5),
        Dense(1024, name='aux-dense1'),
        Activation('swish'),
    ], name='aux-model')
    
    model_title = Sequential([
        Input(shape=(100,), name='title-input'),
        Embedding(25000, 150, input_length=100, name='title-embed'),
        Dropout(0.2),
        Conv1D(300, 3, padding='valid', activation='relu', strides=1),
        GlobalMaxPool1D(),
        Dense(720, name='title-dense1'), #650 -> 0.81
        Activation('swish', name='title-act1'),
        Dropout(0.2),
        
        Dense(650, name='title-dense2'),
        BatchNormalization(name='title-bn2'),
        Activation('swish', name='title-act2'),
    ], name='title-vec')
    
    margin = ArcMarginProduct(
        n_classes = CLASSES, 
        s = 30, 
        m = 0.7, 
        name='head/arc_margin', 
        dtype='float32'
    )

    concatenate = Concatenate(name='concatenate')([model_image.output, model_title.output])
    concatenate2 = Concatenate(name='concatenate2')([concatenate, aux_model.output])
    label = Input(shape=(), name='arc-input')
    arc_face = margin([concatenate2, label])
    output = Dense(CLASSES, activation='softmax', name='output')(arc_face)
    
    model = Model(inputs=[model_image.input, aux_model.input, model_title.input, label], outputs=output)
    return model

def effb1_512_v3(weights='noisy-student'):
    efn1 = efn.EfficientNetB1(weights=weights, include_top=False, input_shape=(*IMAGE_SIZE, 3))
    
    model_image = Sequential([
        efn1,
        GlobalAveragePooling2D(name='effb1-pooling'),
        BatchNormalization(name='effb1_bn1'),
        Dropout(0.2),
        Dense(2400, name='effb1_dense1'),
        Activation('swish', name='effb1_act1'),
    ], name='effb1-img')
    
    model_title = Sequential([
        Input(shape=(100,), name='title-input'),
        Embedding(25000, 150, input_length=100, name='title-embed'),
        Dropout(0.2),
        Conv1D(300, 3, padding='valid', activation='relu', strides=1),
        GlobalMaxPool1D(),
        Dense(720, name='title-dense1'), #650 -> 0.81
        Activation('swish', name='title-act1'),
        Dropout(0.2),
        
        Dense(650, name='title-dense2'),
        BatchNormalization(name='title-bn2'),
        Activation('swish', name='title-act2'),
    ], name='title-vec')
    
    margin = ArcMarginProduct(
        n_classes = CLASSES, 
        s = 30, 
        m = 0.7, 
        name='head/arc_margin', 
        dtype='float32'
    )

    concatenate = Concatenate(name='concatenate')([model_image.output, model_title.output])
    label = Input(shape=(), name='arc-input')
    arc_face = margin([concatenate, label])
    output = Dense(CLASSES, activation='softmax', name='output')(arc_face)
    
    model = Model(inputs=[model_image.input, model_title.input, label], outputs=output)
    return model

def effb2(weights='noisy-student'):
    efn2 = efn.EfficientNetB2(weights=weights, include_top=False, input_shape=(*IMAGE_SIZE, 3))
    
    for layer in efn2.layers:
        layer.trainable = False
    
    model_image = Sequential([
        efn2,
        GlobalAveragePooling2D(name='effb1-pooling'),
        BatchNormalization(name='effb1_bn1'),
        Dropout(0.2),
        Dense(2400, name='effb1_dense1'),
        Activation('swish', name='effb1_act1'),
    ], name='effb1-img')
    
    eff_aux = Model(
        inputs = efn2.input, 
        outputs = efn2.get_layer('block5b_activation').output)
    aux_model = Sequential([
        eff_aux,
        Conv2D(128, kernel_size=(3, 3), activation='relu', name='aux-conv1'),
        BatchNormalization(name='aux-bn1'),
        GlobalAveragePooling2D(name='aux-pooling'),
        # Flatten(),
        Dropout(0.5),
        Dense(1024, name='aux-dense1'),
        Activation('swish'),
    ], name='aux-model')
    
    model_title = Sequential([
        Input(shape=(100,), name='title-input'),
        Embedding(25000, 150, input_length=100, name='title-embed'),
        Dropout(0.2),
        Conv1D(300, 3, padding='valid', activation='relu', strides=1),
        GlobalMaxPool1D(),
        Dense(720, name='title-dense1'), #650 -> 0.81
        Activation('swish', name='title-act1'),
        Dropout(0.2),
        
        Dense(650, name='title-dense2'),
        BatchNormalization(name='title-bn2'),
        Activation('swish', name='title-act2'),
    ], name='title-vec')
    
    margin = ArcMarginProduct(
        n_classes = 8261, 
        s = 30, 
        m = 0.7, 
        name='head/arc_margin', 
        dtype='float32'
    )

    concatenate2 = Concatenate(name='concatenate2')([model_image.output, aux_model.output])
    label = Input(shape=(), name='arc-input')
    arc_face = margin([concatenate2, label])
    output = Dense(8261, activation='softmax', name='output')(arc_face)
    
    model = Model(inputs=[model_image.input, aux_model.input, label], outputs=output)
    return model

def effb5(weights='noisy-student'):
    effb5 = efn.EfficientNetB5(weights=weights, include_top=False, input_shape=(*IMAGE_SIZE, 3),
                                drop_connect_rate=0  # the hack
                              )
    
    model_image = Sequential([
        effb5,
        GlobalAveragePooling2D(name='effb1-pooling'),
        BatchNormalization(name='effb1_bn1'),
        Dropout(0.2),
        Dense(2400, name='effb1_dense1'),
        Activation('swish', name='effb1_act1'),
    ], name='effb1-img')
    
    model_title = Sequential([
        Input(shape=(100,), name='title-input'),
        Embedding(25000, 150, input_length=100, name='title-embed'),
        Dropout(0.2),
        Conv1D(300, 3, padding='valid', activation='relu', strides=1),
        GlobalMaxPool1D(),
        Dense(720, name='title-dense1'), #650 -> 0.81
        Activation('swish', name='title-act1'),
        Dropout(0.2),
        
        Dense(650, name='title-dense2'),
        BatchNormalization(name='title-bn2'),
        Activation('swish', name='title-act2'),
    ], name='title-vec')
    
    margin = ArcMarginProduct(
        n_classes = CLASSES, 
        s = 30, 
        m = 0.7, 
        name='head/arc_margin', 
        dtype='float32'
    )

    concatenate = Concatenate(name='concatenate')([model_image.output, model_title.output])
    label = Input(shape=(), name='arc-input')
    arc_face = margin([concatenate, label])
    output = Dense(CLASSES, activation='softmax', name='output')(arc_face)
    
    model = Model(inputs=[model_image.input, model_title.input, label], outputs=output)
    return model

def effb7(weights='noisy-student'):
    img_inp = Input(shape=(*IMAGE_SIZE, 3))
    effb5 = efn.EfficientNetB7(weights=weights, input_shape=(*IMAGE_SIZE, 3), include_top=False)
    
    pt_depth = effb5.layers[-1].get_output_shape_at(0)[-1]
    pt_features = effb5(img_inp)
    bn_features = BatchNormalization()(pt_features)
    
    attn_layer = Conv2D(64, kernel_size = (1,1), padding = 'same', activation = 'relu')(Dropout(0.5)(bn_features))
    attn_layer = Conv2D(16, kernel_size = (1,1), padding = 'same', activation = 'relu')(attn_layer)
    attn_layer = Conv2D(8, kernel_size = (1,1), padding = 'same', activation = 'relu')(attn_layer)
    attn_layer = Conv2D(1, kernel_size = (1,1), padding = 'valid', activation = 'sigmoid')(attn_layer)
    
    up_c2_w = np.ones((1, 1, 1, pt_depth))
    up_c2 = Conv2D(pt_depth, kernel_size = (1,1), padding = 'same', activation = 'linear', 
                   use_bias = False, weights = [up_c2_w])
    up_c2.trainable = False
    attn_layer = up_c2(attn_layer)
    
    mask_features = multiply([attn_layer, bn_features])
    gap_features = GlobalAveragePooling2D()(mask_features)
    gap_mask = GlobalAveragePooling2D()(attn_layer)
    # to account for missing values from the attention model
    gap = Lambda(lambda x: x[0]/x[1], name = 'RescaleGAP')([gap_features, gap_mask])
    gap_dr = Dropout(0.25)(gap)
    img_embed = Dense(2400, activation = 'swish', name='img-embed')(gap_dr)
    
    margin = ArcMarginProduct(
        n_classes = CLASSES, 
        s = 30, 
        m = 0.7, 
        name='head/arc_margin', 
        dtype='float32'
    )
    
    label = Input(shape=(), name='arc-input')
    arc_face = margin([img_embed, label])
    output = Dense(CLASSES, activation='softmax', name='output')(arc_face)
    
    model = Model(inputs=[img_inp, label], outputs=output)
    return model

def effb7_v2(weights='noisy-student'):
    img_inp = Input(shape=(*IMAGE_SIZE, 3))
    effb7 = efn.EfficientNetB7(weights=weights, input_shape=(*IMAGE_SIZE, 3), include_top=False)
    
    pt_depth = effb7.layers[-1].get_output_shape_at(0)[-1]
    pt_features = effb7(img_inp)
    bn_features = BatchNormalization()(pt_features)
    
    attn_layer = Conv2D(64, kernel_size = (1,1), padding = 'same', activation = 'relu')(Dropout(0.5)(bn_features))
    attn_layer = Conv2D(16, kernel_size = (1,1), padding = 'same', activation = 'relu')(attn_layer)
    attn_layer = Conv2D(8, kernel_size = (1,1), padding = 'same', activation = 'relu')(attn_layer)
    attn_layer = Conv2D(1, kernel_size = (1,1), padding = 'valid', activation = 'sigmoid')(attn_layer)
    
    up_c2_w = np.ones((1, 1, 1, pt_depth))
    up_c2 = Conv2D(pt_depth, kernel_size = (1,1), padding = 'same', activation = 'linear', 
                   use_bias = False, weights = [up_c2_w])
    up_c2.trainable = False
    attn_layer = up_c2(attn_layer)
    
    mask_features = multiply([attn_layer, bn_features])
    gap_features = GlobalAveragePooling2D()(mask_features)
    gap_mask = GlobalAveragePooling2D()(attn_layer)
    # to account for missing values from the attention model
    gap = Lambda(lambda x: x[0]/x[1], name = 'RescaleGAP')([gap_features, gap_mask])
    gap_dr = Dropout(0.25)(gap)
    img_embed = Dense(2400, activation = 'swish')(gap_dr)
    
    model_title = Sequential([
        Input(shape=(100,), name='title-input'),
        Embedding(22000, 150, input_length=100, name='title-embed'),
        Dropout(0.2),
        Conv1D(300, 3, padding='valid', activation='relu', strides=1),
        GlobalMaxPool1D(),
        Dense(720, name='title-dense1'), #650 -> 0.81
        Activation('swish', name='title-act1'),
        Dropout(0.2),
        
        Dense(650, name='title-dense2'),
        BatchNormalization(name='title-bn2'),
        Activation('swish', name='title-act2'),
    ], name='title-vec')
    
    margin = ArcMarginProduct(
        n_classes = CLASSES, 
        s = 30, 
        m = 0.7, 
        name='head/arc_margin', 
        dtype='float32'
    )

    concatenate = Concatenate(name='concatenate')([img_embed, model_title.output])
    label = Input(shape=(), name='arc-input')
    arc_face = margin([concatenate, label])
    output = Dense(CLASSES, activation='softmax', name='output')(arc_face)
    
    model = Model(inputs=[img_inp, model_title.input, label], outputs=output)
    return model

def effb5_v2(weights='noisy-student'):
    img_inp = Input(shape=(*IMAGE_SIZE, 3))
    effb5 = efn.EfficientNetB5(weights=weights, include_top=False, input_shape=(*IMAGE_SIZE, 3))
    
    pt_depth = effb5.layers[-1].get_output_shape_at(0)[-1]
    pt_features = effb5(img_inp)
    bn_features = BatchNormalization()(pt_features)
    
    attn_layer = Conv2D(64, kernel_size = (1,1), padding = 'same', activation = 'relu')(Dropout(0.5)(bn_features))
    attn_layer = Conv2D(16, kernel_size = (1,1), padding = 'same', activation = 'relu')(attn_layer)
    attn_layer = Conv2D(8, kernel_size = (1,1), padding = 'same', activation = 'relu')(attn_layer)
    attn_layer = Conv2D(1, kernel_size = (1,1), padding = 'valid', activation = 'sigmoid')(attn_layer)
    
    up_c2_w = np.ones((1, 1, 1, pt_depth))
    up_c2 = Conv2D(pt_depth, kernel_size = (1,1), padding = 'same', activation = 'linear', 
                   use_bias = False, weights = [up_c2_w])
    up_c2.trainable = False
    attn_layer = up_c2(attn_layer)
    
    mask_features = multiply([attn_layer, bn_features])
    gap_features = GlobalAveragePooling2D()(mask_features)
    gap_mask = GlobalAveragePooling2D()(attn_layer)
    # to account for missing values from the attention model
    gap = Lambda(lambda x: x[0]/x[1], name = 'RescaleGAP')([gap_features, gap_mask])
    gap_dr = Dropout(0.25)(gap)
    img_embed = Dense(2400, activation = 'swish', name='img-embed')(gap_dr)
    
    model_title = Sequential([
        Input(shape=(100,), name='title-input'),
        Embedding(25000, 150, input_length=100, name='title-embed'),
        Dropout(0.2),
        Conv1D(300, 3, padding='valid', activation='relu', strides=1),
        GlobalMaxPool1D(),
        Dense(720, name='title-dense1'), #650 -> 0.81
        Activation('swish', name='title-act1'),
        Dropout(0.2),
        
        Dense(650, name='title-dense2'),
        BatchNormalization(name='title-bn2'),
        Activation('swish', name='title-act2'),
    ], name='title-vec')
    
    margin = ArcMarginProduct(
        n_classes = CLASSES, 
        s = 30, 
        m = 0.7, 
        name='head/arc_margin', 
        dtype='float32'
    )

    concatenate = Concatenate(name='concatenate')([img_embed, model_title.output])
    label = Input(shape=(), name='arc-input')
    arc_face = margin([concatenate, label])
    output = Dense(CLASSES, activation='softmax', name='output')(arc_face)
    
    model = Model(inputs=[img_inp, model_title.input, label], outputs=output)
    return model

def effb1_244_v4(weights='noisy-student'):
    
    effb1 = efn.EfficientNetB1(weights=weights, include_top=False, input_shape=(*IMAGE_SIZE, 3))
    
    model_image = Sequential([
        effb1,
        GlobalAveragePooling2D(name='effb1-pooling'),
    ], name='effb1-img')
    
    model_title = Sequential([
        Input(shape=(100,), name='title-input'),
        Embedding(25000, 150, input_length=100, name='title-embed'),
        Dropout(0.2),
        Conv1D(300, 3, padding='valid', activation='relu', strides=1),
        GlobalMaxPool1D(),
        Dense(720, name='title-dense1'), #650 -> 0.81
        Activation('swish', name='title-act1'),
        Dropout(0.2),
        
        Dense(420, name='title-dense2'),
        BatchNormalization(name='title-bn2'),
        Activation('swish', name='title-act2'),
    ], name='title-vec')
    
    margin = ArcMarginProduct(
        n_classes = CLASSES, 
        s = 30, 
        m = 0.5, 
        name='head/arc_margin', 
        dtype='float32'
    )

    concatenate = Concatenate(name='concatenate')([model_image.output, model_title.output])
    label = Input(shape=(), name='arc-input')
    arc_face = margin([concatenate, label])
    output = Dense(CLASSES, activation='softmax', name='output')(arc_face)
    
    model = Model(inputs=[model_image.input, model_title.input, label], outputs=output)
    return model

def effb3(weights='noisy-student'):
    img_inp = Input(shape=(*IMAGE_SIZE, 3))
    effb3 = efn.EfficientNetB3(weights=weights, include_top=False, input_shape=(*IMAGE_SIZE, 3))
    
    pt_depth = effb3.layers[-1].get_output_shape_at(0)[-1]
    pt_features = effb3(img_inp)
    bn_features = BatchNormalization()(pt_features)
    
    attn_layer = Conv2D(64, kernel_size = (1,1), padding = 'same', activation = 'relu')(Dropout(0.5)(bn_features))
    attn_layer = Conv2D(16, kernel_size = (1,1), padding = 'same', activation = 'relu')(attn_layer)
    attn_layer = Conv2D(8, kernel_size = (1,1), padding = 'same', activation = 'relu')(attn_layer)
    attn_layer = Conv2D(1, kernel_size = (1,1), padding = 'valid', activation = 'sigmoid')(attn_layer)
    
    up_c2_w = np.ones((1, 1, 1, pt_depth))
    up_c2 = Conv2D(pt_depth, kernel_size = (1,1), padding = 'same', activation = 'linear', 
                   use_bias = False, weights = [up_c2_w])
    up_c2.trainable = False
    attn_layer = up_c2(attn_layer)
    
    mask_features = multiply([attn_layer, bn_features])
    gap_features = GlobalAveragePooling2D()(mask_features)
    gap_mask = GlobalAveragePooling2D()(attn_layer)
    # to account for missing values from the attention model
    gap = Lambda(lambda x: x[0]/x[1], name = 'RescaleGAP')([gap_features, gap_mask])
    gap_dr = Dropout(0.25)(gap)
    img_embed = Dense(2400, activation = 'swish', name='img-embed')(gap_dr)
    
    model_title = Sequential([
        Input(shape=(100,), name='title-input'),
        Embedding(25000, 150, input_length=100, name='title-embed'),
        Dropout(0.2),
        Conv1D(300, 3, padding='valid', activation='relu', strides=1),
        GlobalMaxPool1D(),
        Dense(720, name='title-dense1'), #650 -> 0.81
        Activation('swish', name='title-act1'),
        Dropout(0.2),
        
        Dense(650, name='title-dense2'),
        BatchNormalization(name='title-bn2'),
        Activation('swish', name='title-act2'),
    ], name='title-vec')
    
    margin = ArcMarginProduct(
        n_classes = CLASSES, 
        s = 30, 
        m = 0.7, 
        name='head/arc_margin', 
        dtype='float32'
    )

    concatenate = Concatenate(name='concatenate')([img_embed, model_title.output])
    label = Input(shape=(), name='arc-input')
    arc_face = margin([concatenate, label])
    output = Dense(CLASSES, activation='softmax', name='output')(arc_face)
    
    model = Model(inputs=[img_inp, model_title.input, label], outputs=output)
    return model

In [12]:
def incepV2(weights='imagenet'):
    inceptionV2 = InceptionResNetV2(weights=weights, include_top=False, input_shape=(*IMAGE_SIZE, 3))
    for layer in inceptionV2.layers:
        layer.trainable = False
        
    model_image = Sequential([
        inceptionV2,
        GlobalAveragePooling2D(name='incep-pooling'),
        BatchNormalization(name='incep_bn1'),
        Dropout(0.2),
        Dense(2400, name='incep_dense1'),
        Activation('swish', name='incep_act1'),
    ], name='incep-img')
    
    model_title = Sequential([
        Input(shape=(100,), name='title-input'),
        Embedding(25000, 70, input_length=100, name='title-embed'),
#         GlobalAveragePooling1D(name='title-pooling'),
        Flatten(name='title-flatten'),
        BatchNormalization(name='title-bn1'),
        Dropout(0.2),
        Dense(650, name='title-dense1'), #650 -> 0.81
        Activation('swish', name='title-act1'),
#         Dropout(0.1),
        
        Dense(650, name='title-dense2'),
        BatchNormalization(name='title-bn2'),
        Activation('swish', name='title-act2'),
    ], name='title-vec')
    
    eff_aux = Model(
        inputs = inceptionV2.input, 
        outputs = inceptionV2.get_layer('block17_7_mixed').output)
    aux_model = Sequential([
        eff_aux,
        Conv2D(128, kernel_size=(3, 3), activation='relu', name='aux-conv1'),
        BatchNormalization(name='aux-bn1'),
        GlobalAveragePooling2D(name='aux-pooling'),
        Dropout(0.5),
        Dense(1024, name='aux-dense1'),
        Activation('swish'),
    ], name='aux-model')
    
    margin = ArcMarginProduct(
        n_classes = CLASSES, 
        s = 30, 
        m = 0.5, 
        name='head/arc_margin', 
        dtype='float32'
    )

    concatenate = Concatenate(name='concatenate')([model_image.output, model_title.output])
    concatenate2 = Concatenate(name='concatenate2')([concatenate, aux_model.output])
    label = Input(shape=(), name='arc-input')
    arc_face = margin([concatenate2, label])
#     embeddings = Dense(3050, activation='swish', name='embedding')(concatenate)
    output = Dense(CLASSES, activation='softmax', name='output')(arc_face)
    
    model = Model(inputs=[model_image.input, aux_model.input, model_title.input, label], outputs=output)
    return model

def incepV2_512_v3(weights='imagenet'):
    inceptionV2 = InceptionResNetV2(weights=weights, include_top=False, input_shape=(*IMAGE_SIZE, 3))
    
    model_image = Sequential([
        inceptionV2,
        GlobalAveragePooling2D(name='incep-pooling'),
        BatchNormalization(name='incep_bn1'),
        Dropout(0.2),
        Dense(2400, name='incep_dense1'),
        Activation('swish', name='incep_act1'),
    ], name='incep-img')
    
    model_title = Sequential([
        Input(shape=(100,), name='title-input'),
        Embedding(25000, 150, input_length=100, name='title-embed'),
        Dropout(0.2),
        Conv1D(300, 3, padding='valid', activation='relu', strides=1),
        GlobalMaxPool1D(),
        Dense(720, name='title-dense1'), #650 -> 0.81
        Activation('swish', name='title-act1'),
        Dropout(0.2),
        
        Dense(650, name='title-dense2'),
        BatchNormalization(name='title-bn2'),
        Activation('swish', name='title-act2'),
    ], name='title-vec')
    
    margin = ArcMarginProduct(
        n_classes = CLASSES, 
        s = 30, 
        m = 0.7, 
        name='head/arc_margin', 
        dtype='float32'
    )

    concatenate = Concatenate(name='concatenate')([model_image.output, model_title.output])
    label = Input(shape=(), name='arc-input')
    arc_face = margin([concatenate, label])
    output = Dense(CLASSES, activation='softmax', name='output')(arc_face)
    
    model = Model(inputs=[model_image.input, model_title.input, label], outputs=output)
    return model

In [13]:
def xception(weights='imagenet'):
    xcep = Xception(weights=weights, include_top=False, input_shape=(*IMAGE_SIZE, 3))
    
    for layer in xcep.layers:
        layer.trainable = False
        
    model_image = Sequential([
        xcep,
        GlobalAveragePooling2D(name='incep-pooling'),
        BatchNormalization(name='incep_bn1'),
        Dropout(0.3),
        Dense(2400, name='incep_dense1'),
        Activation('swish', name='incep_act1'),
    ], name='incep-img')
    
    model_title = Sequential([
        Input(shape=(100,), name='title-input'),
        Embedding(25000, 100, input_length=100, name='title-embed'),
        Dropout(0.2),
        Conv1D(300, 3, padding='valid', activation='relu', strides=1),
        GlobalMaxPool1D(),
        Dense(720, name='title-dense1'), #650 -> 0.81
        Activation('swish', name='title-act1'),
        Dropout(0.2),
        
        Dense(650, name='title-dense2'),
        BatchNormalization(name='title-bn2'),
        Activation('swish', name='title-act2'),
    ], name='title-vec')
    
    aux_model = Model(inputs=xcep.input, outputs=xcep.get_layer('block8_sepconv3_act').output)
    aux_model = Sequential([
        aux_model,
        Conv2D(128, kernel_size=(3, 3), activation='relu', name='aux-conv1'),
        BatchNormalization(name='aux-bn1'),
        GlobalAveragePooling2D(name='aux-pooling'),
        Dropout(0.5),
        Dense(240, name='aux-dense1'),
        Activation('swish'),
    ])
    
    margin = ArcMarginProduct(
        n_classes = CLASSES, 
        s = 30, 
        m = 0.7, 
        name='head/arc_margin', 
        dtype='float32'
    )

    concatenate = Concatenate(name='concatenate')([model_image.output, model_title.output])
    concatenate2 = Concatenate(name='concatenate2')([concatenate, aux_model.output])
    label = Input(shape=(), name='arc-input')
    arc_face = margin([concatenate2, label])
    output = Dense(CLASSES, activation='softmax', name='output')(arc_face)
    
    model = Model(inputs=[model_image.input, aux_model.input, model_title.input, label], outputs=output)
    return model

def xception_512_v3(weights='imagenet'):
    xcep = Xception(weights=weights, include_top=False, input_shape=(*IMAGE_SIZE, 3))
    
    model_image = Sequential([
        xcep,
        GlobalAveragePooling2D(name='incep-pooling'),
        BatchNormalization(name='incep_bn1'),
        Dropout(0.2),
        Dense(2400, name='incep_dense1'),
        Activation('swish', name='incep_act1'),
    ], name='incep-img')
        
    model_title = Sequential([
        Input(shape=(100,), name='title-input'),
        Embedding(25000, 150, input_length=100, name='title-embed'),
        Dropout(0.2),
        Conv1D(300, 3, padding='valid', activation='relu', strides=1),
        GlobalMaxPool1D(),
        Dense(720, name='title-dense1'), #650 -> 0.81
        Activation('swish', name='title-act1'),
        Dropout(0.2),
        
        Dense(650, name='title-dense2'),
        BatchNormalization(name='title-bn2'),
        Activation('swish', name='title-act2'),
    ], name='title-vec')
    
    margin = ArcMarginProduct(
        n_classes = CLASSES, 
        s = 30, 
        m = 0.7, 
        name='head/arc_margin', 
        dtype='float32'
    )

    concatenate = Concatenate(name='concatenate')([model_image.output, model_title.output])
    label = Input(shape=(), name='arc-input')
    arc_face = margin([concatenate, label])
    
    output = Dense(CLASSES, activation='softmax', name='output')(arc_face)
    
    model = Model(inputs=[model_image.input, model_title.input, label], outputs=output)
    return model

In [14]:
def get_nfnet_f0(weights='gs://kds-b135c9e4ed7b978eb1b05abe18f78523f1fe076527cd1f64555bd7b1/NFNET-F0/F0_NFNet'):
    nfnet_ = NFNet(
        num_classes=1000,
        variant='F0',
        drop_rate=0.2,
        label_smoothing=0.1,
        ema_decay=0.99999,
        clipping_factor=0.01,
        include_top=False,
    )
    if weights is not None:
        nfnet_.load_weights(weights)

    model_image = Sequential([
        nfnet_,
        GlobalAveragePooling2D(name='effb1-pooling'),
        BatchNormalization(name='effb1_bn1'),
        Dropout(0.2),
        Dense(2400, name='effb1_dense1'),
        Activation('swish', name='effb1_act1'),
    ], name='effb1-img')
    model_image.build((None, *IMAGE_SIZE, 3))
    
    model_title = Sequential([
        Input(shape=(100,), name='title-input'),
        Embedding(25000, 150, input_length=100, name='title-embed'),
        Dropout(0.2),
        Conv1D(300, 3, padding='valid', activation='relu', strides=1, name='title-conv'),
        GlobalMaxPool1D(name='title-globalMax'),
        Dense(720, name='title-dense1'), #650 -> 0.81
        Activation('swish', name='title-act1'),
        Dropout(0.2),
        
        Dense(650, name='title-dense2'),
        BatchNormalization(name='title-bn2'),
        Activation('swish', name='title-act2'),
    ], name='title-vec')
    
    margin = ArcMarginProduct(
        n_classes = CLASSES, 
        s = 30, 
        m = 0.7, 
        name='head/arc_margin', 
        dtype='float32'
    )

    concatenate = Concatenate(name='concatenate')([model_image.output, model_title.output])
    label = Input(shape=(), name='arc-input')
    arc_face = margin([concatenate, label])
    output = Dense(CLASSES, activation='softmax', name='output')(arc_face)
    
    model = Model(inputs=[model_image.input, model_title.input, label], outputs=output)
    return model

def get_nfnet_f1(weights='gs://kds-1d6d6565dd2a34e7cc1a72d6d664184726bc52e3022d6010d9f9b173/NFNET-F1/F1_NFNet'):
    
    img_inp = Input(shape=(*IMAGE_SIZE, 3))
    nfnet_ = NFNet(
        num_classes=1000,
        variant='F1',
        drop_rate=0.2,
        label_smoothing=0.1,
        ema_decay=0.99999,
        clipping_factor=0.01,
        include_top=False,
    )
    if weights is not None:
        nfnet_.load_weights(weights)
    
    pt_depth = 3072
    pt_features = nfnet_(img_inp)
    bn_features = BatchNormalization()(pt_features)
    
    attn_layer = Conv2D(64, kernel_size = (1,1), padding = 'same', activation = 'relu')(Dropout(0.5)(bn_features))
    attn_layer = Conv2D(16, kernel_size = (1,1), padding = 'same', activation = 'relu')(attn_layer)
    attn_layer = Conv2D(8, kernel_size = (1,1), padding = 'same', activation = 'relu')(attn_layer)
    attn_layer = Conv2D(1, kernel_size = (1,1), padding = 'valid', activation = 'sigmoid')(attn_layer)
    
    up_c2_w = np.ones((1, 1, 1, pt_depth))
    up_c2 = Conv2D(pt_depth, kernel_size = (1,1), padding = 'same', activation = 'linear', 
                   use_bias = False, weights = [up_c2_w])
    up_c2.trainable = False
    attn_layer = up_c2(attn_layer)
    
    mask_features = multiply([attn_layer, bn_features])
    gap_features = GlobalAveragePooling2D()(mask_features)
    gap_mask = GlobalAveragePooling2D()(attn_layer)
    # to account for missing values from the attention model
    gap = Lambda(lambda x: x[0]/x[1], name = 'RescaleGAP')([gap_features, gap_mask])
    gap_dr = Dropout(0.25)(gap)
    img_embed = Dense(2400, activation = 'swish', name='img-embed')(gap_dr)
    
    model_title = Sequential([
        Input(shape=(100,), name='title-input'),
        Embedding(25000, 150, input_length=100, name='title-embed'),
        Dropout(0.2),
        Conv1D(300, 3, padding='valid', activation='relu', strides=1, name='title-conv'),
        GlobalMaxPool1D(name='title-globalMax'),
        Dense(720, name='title-dense1'), #650 -> 0.81
        Activation('swish', name='title-act1'),
        Dropout(0.2),
        
        Dense(650, name='title-dense2'),
        BatchNormalization(name='title-bn2'),
        Activation('swish', name='title-act2'),
    ], name='title-vec')
    
    margin = ArcMarginProduct(
        n_classes = CLASSES, 
        s = 30, 
        m = 0.7, 
        name='head/arc_margin', 
        dtype='float32'
    )

    concatenate = Concatenate(name='concatenate')([img_embed, model_title.output])
    label = Input(shape=(), name='arc-input')
    arc_face = margin([concatenate, label])
    output = Dense(CLASSES, activation='softmax', name='output')(arc_face)
    
    model = Model(inputs=[img_inp, model_title.input, label], outputs=output)
    return model

### Bert model

In [15]:
bert_model = '../input/bert-base-uncased-220421/bert_base'

def get_bert_model(mname):
    
    idx = layers.Input((105), dtype="int32", name="input_idx")
    masks = layers.Input((105), dtype="int32", name="input_masks")
    
    nlp = transformers.TFBertModel.from_pretrained(mname)
    bert_out= nlp([idx, masks])[0]
    
    ## fine-tuning
    x = layers.GlobalAveragePooling1D()(bert_out)
    x = layers.Dense(750, activation="swish", name='text-embed')(x)
    
    model_title = Sequential([
        Input(shape=(100,), name='title-input'),
        Embedding(25000, 100, input_length=100, name='title-embed'),
        Dropout(0.2),
        Conv1D(300, 3, padding='valid', activation='relu', strides=1),
        GlobalMaxPool1D(),
        Dense(720, name='title-dense1'), #650 -> 0.81
        Activation('swish', name='title-act1'),
        Dropout(0.2),
        
        Dense(650, name='title-dense2'),
        BatchNormalization(name='title-bn2'),
        Activation('swish', name='title-act2'),
    ], name='title-vec')
    
    margin = ArcMarginProduct(
        n_classes = CLASSES, 
        s = 30, 
        m = 0.7, 
        name='head/arc_margin', 
        dtype='float32'
    )
    
    concatenate = Concatenate(name='concatenate')([x, model_title.output])
    label = Input(shape=(), name='arc-input')
    arc_face = margin([concatenate, label])
    
    output = Dense(CLASSES, activation='softmax', name='output')(arc_face)

    # Compile model
    model = tf.keras.Model(inputs=[idx, masks, model_title.input, label], outputs=[output])
    return model

xlm_model_base = '../input/tf-xlm-roberta-base'
def xlm_roberta(mname):
    
    idx = layers.Input((105), dtype="int32", name="input_idx")
    masks = layers.Input((105), dtype="int32", name="input_masks")
    
#     nlp = TFAutoModel.from_pretrained(mname)
    nlp = transformers.TFXLMRobertaModel.from_pretrained(mname)
    bert_out= nlp([idx, masks])[0]
    
    ## fine-tuning
    x = layers.GlobalAveragePooling1D()(bert_out)
    x = layers.Dense(750, activation="swish", name='text-embed')(x)
    
    model_title = Sequential([
        Input(shape=(100,), name='title-input'),
        Embedding(25000, 150, input_length=100, name='title-embed'),
        Dropout(0.2),
        Conv1D(300, 3, padding='valid', activation='relu', strides=1),
        GlobalMaxPool1D(),
        Dense(720, name='title-dense1'), #650 -> 0.81
        Activation('swish', name='title-act1'),
        Dropout(0.2),
        
        Dense(650, name='title-dense2'),
        BatchNormalization(name='title-bn2'),
        Activation('swish', name='title-act2'),
    ], name='title-vec')
    
    margin = ArcMarginProduct(
        n_classes = CLASSES, 
        s = 30, 
        m = 0.5, 
        name='head/arc_margin', 
        dtype='float32'
    )
    
    concatenate = Concatenate(name='concatenate')([x, model_title.output])
    label = Input(shape=(), name='arc-input')
    arc_face = margin([concatenate, label])
    output = Dense(CLASSES, activation='softmax', name='output')(arc_face)
    
    model = Model(inputs=[idx, masks, model_title.input, label], outputs=[output])
    
    return model

roberta_base_id_model = '../input/tfroberta-base-indonesian/roberta-base-indonesian-522M'
def get_roberta_base_id(mname = roberta_base_id_model):
    
    idx = layers.Input((105), dtype="int32", name="input_idx")
    masks = layers.Input((105), dtype="int32", name="input_masks")
    
    nlp = TFAutoModel.from_pretrained(mname)
    bert_out= nlp([idx, masks])[0]
    
    ## fine-tuning
    x = layers.GlobalAveragePooling1D()(bert_out)
    x = layers.Dense(750, activation="swish", name='text-embed')(x)
    
    model_title = Sequential([
        Input(shape=(100,), name='title-input'),
        Embedding(25000, 150, input_length=100, name='title-embed'),
        Dropout(0.2),
        Conv1D(300, 3, padding='valid', activation='relu', strides=1),
        GlobalMaxPool1D(),
        Dense(720, name='title-dense1'), #650 -> 0.81
        Activation('swish', name='title-act1'),
        Dropout(0.2),
        
        Dense(650, name='title-dense2'),
        BatchNormalization(name='title-bn2'),
        Activation('swish', name='title-act2'),
    ], name='title-vec')
    
    margin = ArcMarginProduct(
        n_classes = CLASSES, 
        s = 30, 
        m = 0.7, 
        name='head/arc_margin', 
        dtype='float32'
    )
    
    concatenate = Concatenate(name='concatenate')([x, model_title.output])
    label = Input(shape=(), name='arc-input')
    arc_face = margin([concatenate, label])
    output = Dense(CLASSES, activation='softmax', name='output')(arc_face)
    
    model = Model(inputs=[idx, masks, model_title.input, label], outputs=[output])
    
    return model

### Help functions to get embeddings and predictions

In [16]:
def get_image_embeddings(model):
    preds = []
    chunk = 5000
    iterator = np.arange(np.ceil(len(df)/chunk))
    for j in iterator:
        a = int(j * chunk)
        b = int((j+1) * chunk)
        img_paths, input_ids, att_mask, title_vec = read_dataset(df.iloc[a:b])
        image_dataset = get_dataset(img_paths, title_vec)
        img_embeddings = model.predict(image_dataset)
        preds.append(img_embeddings)
    del model, img_paths, title_vec
    img_embeddings = np.concatenate(preds)
    del preds
    return img_embeddings

def get_text_embeddings(model):
    preds = []
    chunk = 5000
    iterator = np.arange(np.ceil(len(df)/chunk))
    for j in iterator:
        a = int(j * chunk)
        b = int((j+1) * chunk)
        img_paths, input_ids, att_mask, title_vec = read_dataset(df.iloc[a:b])
        image_dataset = get_text_dataset(input_ids, att_mask, title_vec)
        img_embeddings = model.predict(image_dataset)
        preds.append(img_embeddings)
    del model, img_paths, title_vec
    img_embeddings = np.concatenate(preds)
    del preds
    return img_embeddings

def get_neighbors(embeddings, KNN=50):
    KNN = 50 if len(embeddings)>3 else 3
        
    model = NearestNeighbors(n_neighbors=KNN, metric = 'correlation')
    nearest_model = model.fit(embeddings)
    distances, indices = nearest_model.kneighbors(embeddings)

    return distances, indices

def get_predictions(number_of_embeds, distances, indices, th=40):
    # get predictions
    predictions = []
    for k in range(number_of_embeds):
        idx = np.where(distances[k,] < th)[0]
        ids = indices[k, idx]
        posting_ids = np.unique(df['posting_id'].iloc[ids].values)
        predictions.append(posting_ids)
        
    for th1 in np.arange(th, th+0.3, 0.02):
        for k in range(number_of_embeds):
            if len(predictions[k]) <= 1:
                idx = np.where(distances[k,] < th1)[0]
                ids = indices[k, idx]
                posting_ids = np.unique(df['posting_id'].iloc[ids].values)
                predictions[k] = np.concatenate([predictions[k], posting_ids])
                predictions[k] = np.unique(predictions[k])
            
    return predictions

### Get model choices

In [17]:
def get_model(weight_path, model_name):
    if model_name == 'effb1':
        model = effb1(weights=None)
        for layer in model.get_layer('efficientnet-b1').layers[-170:]:
            if not isinstance(layer, layers.BatchNormalization):
                layer.trainable = True
        model.load_weights(weight_path)
        model = Model(inputs=[model.input[0], model.input[2]], outputs=model.get_layer('concatenate').output)
    elif model_name == 'effb1_512_v2':
        model = effb1_512_v2(weights=None)
        for layer in model.get_layer('efficientnet-b1').layers[-170:]:
            if not isinstance(layer, layers.BatchNormalization):
                layer.trainable = True
        model.load_weights(weight_path)
        model = Model(inputs=[model.input[0], model.input[2]], outputs=model.get_layer('concatenate').output)
    elif model_name == 'effb1_512_v3':
        model = effb1_512_v3(weights=None)
        model.load_weights(weight_path)
        model = Model(inputs=[model.input[0], model.input[1]], outputs=model.get_layer('concatenate').output)
    elif model_name == 'effb1_244_v4':
        model = effb1_244_v4(weights=None)
        model.load_weights(weight_path)
        model = Model(inputs=[model.input[0], model.input[1]], outputs=model.get_layer('concatenate').output)
    elif model_name == 'effb2':
        model = effb2(weights=None)
        for layer in model.get_layer('efficientnet-b2').layers[-190:]:
            if not isinstance(layer, layers.BatchNormalization):
                layer.trainable = True
        model.load_weights(weight_path)
        model = Model(inputs=[model.input[0]], outputs=model.get_layer('effb1_act1').output)
    elif model_name == 'effb3':
        model = effb3(weights=None)
        model.load_weights(weight_path)
        model = Model(inputs=[model.input[0], model.input[1]], outputs=model.get_layer('concatenate').output)
    elif model_name == 'effb5':
        model = effb5(weights=None)
        model.load_weights(weight_path)
        model = Model(inputs=[model.input[0], model.input[1]], outputs=model.get_layer('concatenate').output)
    elif model_name == 'effb5_v2':
        model = effb5_v2(weights=None)
        model.load_weights(weight_path)
        model = Model(inputs=[model.input[0], model.input[1]], outputs=model.get_layer('concatenate').output)
    elif model_name == 'effb7':
        model = effb7(weights=None)
        model.load_weights(weight_path)
        model = Model(inputs=[model.input[0]], outputs=model.get_layer('img-embed').output)
    elif model_name == 'effb7_v2':
        model = effb7_v2(weights=None)
        model.load_weights(weight_path)
        model = Model(inputs=[model.input[0], model.input[1]], outputs=model.get_layer('concatenate').output)
    elif model_name == 'incepv2':
        model = incepV2(weights=None)
        for layer in model.get_layer('inception_resnet_v2').layers[-380:]:
            if not isinstance(layer, layers.BatchNormalization):
                layer.trainable = True
        model.load_weights(weight_path)
        model = Model(inputs=[model.input[0], model.input[2]], outputs=model.get_layer('concatenate').output)
    elif model_name == 'incepV2_512_v3':
        model = incepV2_512_v3(weights=None)
        model.load_weights(weight_path)
        model = Model(inputs=[model.input[0], model.input[1]], outputs=model.get_layer('concatenate').output)
    elif model_name == 'xception':
        model = xception_512(weights=None)
        for layer in model.get_layer('xception').layers[-80:]:
            if not isinstance(layer, layers.BatchNormalization):
                layer.trainable = True
        model.load_weights(weight_path)
        model = Model(inputs=[model.input[0], model.input[2]], outputs=model.get_layer('concatenate').output)
    elif model_name == 'xception_512_v3':
        model = xception_512_v3(weights=None)
        model.load_weights(weight_path)
        model = Model(inputs=[model.input[0], model.input[1]], outputs=model.get_layer('concatenate').output)
    elif model_name == 'nfnet_f0':
        model = get_nfnet_f0(weights=None)
        model.load_weights(weight_path)
        model = Model(inputs=[model.input[0], model.input[1]], outputs=model.get_layer('concatenate').output)
    elif model_name == 'nfnet_f1':
        model = get_nfnet_f1(weights=None)
        model.load_weights(weight_path)
        model = Model(inputs=[model.input[0], model.input[1]], outputs=model.get_layer('concatenate').output)
    elif model_name == 'bert':
        model = get_bert_model(bert_model)
        model.load_weights(weight_path)
        model = Model(inputs=[model.input[0], model.input[1], model.input[2]], 
                      outputs=model.get_layer('concatenate').output)
    elif model_name == 'xlm-roberta':
        model = xlm_roberta(xlm_model_base)
        model.load_weights(weight_path)
        model = Model(inputs=[model.input[0], model.input[1], model.input[2]], 
                      outputs=model.get_layer('concatenate').output) 
    elif model_name == 'roberta_base_id':
        model = get_roberta_base_id()
        model.load_weights(weight_path)
        model = Model(inputs=[model.input[0], model.input[1], model.input[2]], 
                      outputs=model.get_layer('concatenate').output) 
    
    return model

### Image predictions

In [18]:
model_weights = [
    ('../input/shopee-effb3-512/model_weights.h5', 'effb3', 0.35, (512, 512), False),
    ('../input/shopee-effb5-512-v2/model_weights.h5', 'effb5_v2', 0.35, (512, 512), False),
    ('../input/shopee-nfnet-512/model_weights.h5', 'nfnet_f0', 0.35, (512, 512), False),
    ('../input/shopee-nfnet-f1-512/model_weights.h5', 'nfnet_f1', 0.35, (512, 512), False),
]

use_weight = False
img_predictions = []
img_embeds_avg = []
for n, (weight_path, model_name, th, img_size, only_img) in enumerate(model_weights):
    print(f'Get predictions of model {n}')
    CLASSES = [11014, 11014, 11014, 11014, 11014][n] 
    IMAGE_SIZE = img_size
    ONLY_IMAGE = only_img
    model = get_model(weight_path, model_name)
    # get image embeds
    img_embed = get_image_embeddings(model)
    del model

    # scale embedding
    scaler = StandardScaler()
    img_embed = scaler.fit_transform(img_embed)

    if use_weight:
        weights = [1, 1, 1, 1, 1]
        img_embeds_avg.append(img_embed)
        del img_embed, scaler
    else:
        # get neighbors
        distances, indices = get_neighbors(img_embed)

        # get predictions
        preds = get_predictions(img_embed.shape[0], distances, indices, th=th)
        img_predictions.append(preds)

        del distances, indices, img_embed, preds, scaler
gc.collect()

Get predictions of model 0
Get predictions of model 1
Get predictions of model 2
Get predictions of model 3


77920

In [19]:
if use_weight:
    img_embeds_avg = np.concatenate([
        img_embeds_avg[0], 
        img_embeds_avg[1], 
        img_embeds_avg[2], 
        img_embeds_avg[3],
    ], axis=1)
    distances, indices = get_neighbors(img_embeds_avg)
    preds = get_predictions(img_embeds_avg.shape[0], distances, indices, th=0.35)
    img_predictions.append(preds)
    del img_embeds_avg
gc.collect()

20

### Text predictions

In [20]:
text_predictions = []
bert_predictions = []
txt_embeds_avg = []

# for roberta base indonesian
# extract vocab from train data
df = pd.read_csv('../input/shopee-product-matching/train.csv')
    
df['title'] = df['title'].apply(lambda x: remove_concatenate_2_words(x))
df['title'] = df['title'].str.lower()
df['title'] = df['title'].apply(lambda x: remove_punctuation(x))
df['title'] = df['title'].apply(lambda x: str(x).split())
df['title'] = df['title'].apply(lambda x: remove_stopwords(x))
df['title'] = df['title'].apply(lambda x: remove_zero_val(x))
df['title'] = df['title'].apply(lambda x: remove_strange_words(x))
df['title'] = df['title'].apply(lambda x: list(np.unique(x)))

# title vocab
words = list(df['title'])
train_vocab = list(np.unique(np.concatenate(words)))

df = pd.read_csv('../input/shopee-product-matching/test.csv')
df['tmp'] = df['title'].apply(lambda x: string_escape(x))
df['tmp'] = df['tmp'].apply(lambda x: remove_concatenate_2_words(x))
df['tmp'] = df['tmp'].str.lower()
df['tmp'] = df['tmp'].apply(lambda x: remove_punctuation(x))
df['tmp'] = df['tmp'].apply(lambda x: str(x).split())
df['tmp'] = df['tmp'].apply(lambda x: remove_stopwords(x))
df['tmp'] = df['tmp'].apply(lambda x: remove_zero_val(x))
df['tmp'] = df['tmp'].apply(lambda x: remove_strange_words(x))
df['tmp'] = df['tmp'].apply(lambda x: list(np.unique(x)))

# for mlp input
# title vocab
words = list(df['tmp'])
words = list(np.unique(np.concatenate(words)))
words = train_vocab + words

# Text vectorizer
model = text_vectorizer(max_features = 25000, max_len = 100, vocab = words)
list_text = [' '.join(x) for x in df['tmp']]
title_vec = model.predict(list_text)
df['title_vec'] = list(title_vec)
del model, list_text, title_vec, words, train_vocab

MAX_LEN = 105
MODEL = '../input/tfroberta-base-indonesian/roberta-base-indonesian-522M'
tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL)

df['tmp'] = df['title'].apply(lambda x: string_escape(x))
df["tmp"] = df["tmp"].apply(lambda x: utils_preprocess_text(
    x, flg_stemm=False, flg_lemm=False, lst_stopwords=None))

# for BERT
ids, att_mask = regular_encode(list(df["tmp"].values), tokenizer, maxlen=MAX_LEN)
df['input_ids'] = list(ids)
df['att_mask'] = list(att_mask)
del ids, att_mask

# bad, decrease the LB
model_weights = [
    ('../input/shopee-roberta-base-id/model_weights.h5', 'roberta_base_id', 0.50)
]

for n, (weight_path, model_name, th) in enumerate(model_weights):
    print(f'Get predictions of model {n}')
    CLASSES = [11014, 11014, 11014][n] 
    model = get_model(weight_path, model_name)
    # get text embeds
    txt_embed = get_text_embeddings(model)
    del model

    # scale embedding
    scaler = StandardScaler()
    txt_embed = scaler.fit_transform(txt_embed)
    txt_embeds_avg.append(txt_embed)
    
    del txt_embed, scaler
gc.collect()

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Get predictions of model 0


All model checkpoint layers were used when initializing TFRobertaModel.

All the layers of TFRobertaModel were initialized from the model checkpoint at ../input/tfroberta-base-indonesian/roberta-base-indonesian-522M.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaModel for predictions without further training.


41127

In [21]:
txt_embeds_avg = np.concatenate([
    txt_embeds_avg[0], 
], axis=1)
distances, indices = get_neighbors(txt_embeds_avg)
preds = get_predictions(txt_embeds_avg.shape[0], distances, indices, th=0.50)
text_predictions.append(preds)
del txt_embeds_avg
gc.collect()

20

### TFIDF predictions

In [22]:
def get_text_predictions(df, title_cu, max_features = 25_000):
    
    text_preds = []
    for stopw in ['english', STOPWORDS_ID]:
        model = TfidfVectorizer(stop_words = stopw, binary = True, max_features = max_features)
        text_embeddings = model.fit_transform(title_cu).toarray()

        preds = []
        CHUNK = 1024*4

        print('Finding similar titles...')
        CTS = len(df)//CHUNK
        if len(df)%CHUNK!=0: CTS += 1
        for j in range( CTS ):

            a = j*CHUNK
            b = (j+1)*CHUNK
            b = min(b,len(df))
            print('chunk',a,'to',b)

            # COSINE SIMILARITY DISTANCE
            cts = cupy.matmul( text_embeddings, text_embeddings[a:b].T).T

            for k in range(b-a):
                IDX = cupy.where(cts[k,] > 0.75)[0]
                o = df.iloc[cupy.asnumpy(IDX)].posting_id.values
                preds.append(o)
                
        text_preds.append(preds)
        del model,text_embeddings, preds
        gc.collect()
    return text_preds

def string_escape(s, encoding='utf-8'):
    return (
        s.encode('latin1')  # To bytes, required by 'unicode-escape'
        .decode('unicode-escape')  # Perform the actual octal-escaping decode
        .encode('latin1')  # 1:1 mapping back to bytes
        .decode(encoding)
    )  # Decode original encoding

df = pd.read_csv('../input/shopee-product-matching/test.csv')
df['title'] = df['title'].apply(lambda x: string_escape(x))
df['title'] = df['title'].apply(lambda x: remove_punctuation(x))

title_cu = cudf.Series(df['title'])
text_predictions = text_predictions + (get_text_predictions(df, title_cu, max_features = 25000))
del title_cu
gc.collect()

Finding similar titles...
chunk 0 to 3
Finding similar titles...
chunk 0 to 3


0

### Combine predictions (Image + BertText + TFIDF)

In [23]:
def combine_predictions(row):
    all_preds = []
    res = []
    if len(img_predictions)>0:
        for i in range(len(img_predictions)):
            all_preds.append(row[f'img_prediction_{i}'])
        x = np.concatenate(all_preds)

        # remove item if item count less than 1
        c = Counter(x)
        res = np.array([i for i in x if c[i] >= 3])
        del x
    
    # text preds
    txt_pred=[]
    if len(text_predictions) > 0:
        all_preds = []
        for i in range(len(text_predictions)):
            all_preds.append(row[f'text_prediction_{i}'])
        x = np.concatenate(all_preds)

        # remove item if item count less than 1
        c = Counter(x)
        txt_pred = np.array([i for i in x if c[i] >= 2])
        del x, c
    y=[]
    if len(bert_predictions) > 0:
        all_preds = []
        for i in range(len(bert_predictions)):
            all_preds.append(row[f'bert_prediction_{i}'])
        y = np.concatenate(all_preds)
    del all_preds
    res = [res, txt_pred, y]
    res = np.concatenate(res)
    del txt_pred, y
    
    if GET_CV:
        return np.unique(res)
    else:
        return ' '.join( np.unique(res) )

### Make submission

In [24]:
for n, prediction in enumerate(img_predictions):
    df[f'img_prediction_{n}'] = prediction
for n, prediction in enumerate(bert_predictions):
    df[f'bert_prediction_{n}'] = prediction
for n, prediction in enumerate(text_predictions):
    df[f'text_prediction_{n}'] = prediction
df['matches'] = df.apply(combine_predictions, axis=1)
df = df[['posting_id', 'matches']]
df.to_csv('./submission.csv', index=False)

In [25]:
df = pd.read_csv('./submission.csv')
df.head()

Unnamed: 0,posting_id,matches
0,test_2255846744,test_2255846744
1,test_3588702337,test_3588702337
2,test_4015706929,test_4015706929
