# Inference:

In [1]:
!pip install '../input/shopee-external-models/Keras_Applications-1.0.8-py3-none-any.whl'
!pip install '../input/shopee-external-models/efficientnet-1.1.0-py3-none-any.whl'

Processing /kaggle/input/shopee-external-models/Keras_Applications-1.0.8-py3-none-any.whl
Installing collected packages: Keras-Applications
Successfully installed Keras-Applications-1.0.8
Processing /kaggle/input/shopee-external-models/efficientnet-1.1.0-py3-none-any.whl
Installing collected packages: efficientnet
Successfully installed efficientnet-1.1.0


In [2]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
import random
import sys
import sklearn as sk
from IPython.display import Image, display
from pathlib import Path
from tqdm import tqdm
import pickle
from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
import gc
import math
import re

import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras import applications
from tensorflow.keras import layers
from tensorflow.keras import losses
from tensorflow.keras import optimizers
from tensorflow.keras import metrics
from tensorflow.keras import Model
from tensorflow.keras.models import model_from_json

import efficientnet.tfkeras as efn

In [3]:
AUTO = tf.data.experimental.AUTOTUNE

BATCH_SIZE = 8
IMAGE_SIZE = [512, 512]
SEED = 42
VERBOSE = 1
N_CLASSES = 11014

In [4]:
GET_CV = False
CHECK_SUB = False

In [5]:
df = pd.read_csv('../input/shopee-product-matching/test.csv')
# If we are comitting, replace train set for test set and dont get cv
if len(df) > 3:
    GET_CV = False
del df

# Function to get our f1 score
def f1_score(y_true, y_pred):
    y_true = y_true.apply(lambda x: set(x.split()))
    y_pred = y_pred.apply(lambda x: set(x.split()))
    intersection = np.array([len(x[0] & x[1]) for x in zip(y_true, y_pred)])
    len_y_pred = y_pred.apply(lambda x: len(x)).values
    len_y_true = y_true.apply(lambda x: len(x)).values
    f1 = 2 * intersection / (len_y_pred + len_y_true)
    return f1

# Function to read out dataset
def read_dataset():
    if GET_CV:
        # Get train data from preprocess dataset (here we have our target ready)
        df = pd.read_csv('../input/shopee-tf-records-512-stratified/train_folds.csv')
        if CHECK_SUB:
            df = pd.concat([df, df], axis = 0)
            df.reset_index(drop = True, inplace = True)
        image_paths = '../input/shopee-product-matching/train_images/' + df['image']
    else:
        df = pd.read_csv('../input/shopee-product-matching/test.csv')
        image_paths = '../input/shopee-product-matching/test_images/' + df['image']
        
    return df, image_paths

# Function to decode our images
def decode_image(image_data):
    image = tf.image.decode_jpeg(image_data, channels = 3)
    image = tf.image.resize(image, IMAGE_SIZE)
    image = tf.cast(image, tf.float32) / 255.0
    return image

# Function to read our test image and return image
def read_image(image):
    image = tf.io.read_file(image)
    image = decode_image(image)
    return image

# Function to get our dataset that read images
def get_dataset(image):
    dataset = tf.data.Dataset.from_tensor_slices(image)
    dataset = dataset.map(read_image, num_parallel_calls = AUTO)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTO)
    return dataset

In [6]:
class ArcMarginProduct(tf.keras.layers.Layer):
    '''
    Implements large margin arc distance.

    Reference:
        https://arxiv.org/pdf/1801.07698.pdf
        https://github.com/lyakaap/Landmark2019-1st-and-3rd-Place-Solution/
            blob/master/src/modeling/metric_learning.py
    '''
    def __init__(self, n_classes, s=30, m=0.50, easy_margin=False,
                 ls_eps=0.0, **kwargs):

        super(ArcMarginProduct, self).__init__(**kwargs)

        self.n_classes = n_classes
        self.s = s
        self.m = m
        self.ls_eps = ls_eps
        self.easy_margin = easy_margin
        self.cos_m = tf.math.cos(m)
        self.sin_m = tf.math.sin(m)
        self.th = tf.math.cos(math.pi - m)
        self.mm = tf.math.sin(math.pi - m) * m

    def get_config(self):

        config = super().get_config().copy()
        config.update({
            'n_classes': self.n_classes,
            's': self.s,
            'm': self.m,
            'ls_eps': self.ls_eps,
            'easy_margin': self.easy_margin,
        })
        return config

    def build(self, input_shape):
        super(ArcMarginProduct, self).build(input_shape[0])

        self.W = self.add_weight(
            name='W',
            shape=(int(input_shape[0][-1]), self.n_classes),
            initializer='glorot_uniform',
            dtype='float32',
            trainable=True,
            regularizer=None)

    def call(self, inputs):
        X, y = inputs
        y = tf.cast(y, dtype=tf.int32)
        cosine = tf.matmul(
            tf.math.l2_normalize(X, axis=1),
            tf.math.l2_normalize(self.W, axis=0)
        )
        sine = tf.math.sqrt(1.0 - tf.math.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = tf.where(cosine > 0, phi, cosine)
        else:
            phi = tf.where(cosine > self.th, phi, cosine - self.mm)
        one_hot = tf.cast(
            tf.one_hot(y, depth=self.n_classes),
            dtype=cosine.dtype
        )
        if self.ls_eps > 0:
            one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.n_classes

        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.s
        return output

In [7]:
def get_image_embeddings(image_paths):
    embeds = []
    
    margin = ArcMarginProduct(
                n_classes = N_CLASSES, 
                s = 30, 
                m = 0.5, 
                name='head/arc_margin', 
                dtype='float32'
                )
        
    inp = tf.keras.layers.Input(shape = (*IMAGE_SIZE, 3), name = 'inp1')
    label = tf.keras.layers.Input(shape = (), name = 'inp2')
    x = efn.EfficientNetB3(weights = None, include_top = False)(inp)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = margin([x, label])
        
    output = tf.keras.layers.Softmax(dtype='float32')(x)
    
    model = tf.keras.models.Model(inputs = [inp, label], outputs = [output])
    model.load_weights('../input/shopee/EfficientNetB3_224_42.h5')
    
    model = tf.keras.models.Model(inputs = model.input[0], outputs = model.layers[-4].output)
    
    chunk = 5000
    iterator = np.arange(np.ceil(len(df) / chunk))
    for j in iterator:
        a = int(j * chunk)
        b = int((j + 1) * chunk)
        image_dataset = get_dataset(image_paths[a:b])
        image_embeddings = model.predict(image_dataset)
        embeds.append(image_embeddings)
    del model
    image_embeddings = np.concatenate(embeds)
    
    print(f'Our image embeddings shape is {image_embeddings.shape}')
    del embeds
    gc.collect()
    
    return image_embeddings

In [8]:
def get_text_embeddings(df, max_features = 15500):
    model = TfidfVectorizer(stop_words = 'english', binary = True, max_features = max_features)
    text_embeddings = model.fit_transform(df['title'])
    print(f'Our title text embedding shape is {text_embeddings.shape}')
    del model
    return text_embeddings

In [9]:
def get_neighbors(df, image_embeddings, text_embeddings, KNN = 50):
    # Get distances and indices from image and text embeddings
    neighbors_model = NearestNeighbors(n_neighbors = KNN, metric = 'cosine').fit(image_embeddings)
    image_distances, image_indices = neighbors_model.kneighbors(image_embeddings)
    neighbors_model = NearestNeighbors(n_neighbors = KNN, metric = 'cosine').fit(text_embeddings)
    text_distances, text_indices = neighbors_model.kneighbors(text_embeddings)
  
    # Iterate through different thresholds to maximize cv, run this in interactive mode, then replace else clause with a solid threshold
    if GET_CV:
        predictions = []
        for k in range(df.shape[0]):
            # This are the original thresholds that gives 0.8035 cv (optimize with a for loop)
            idx_image = np.where(image_distances[k,] < 0.46)[0]
            ids_image = image_indices[k,idx_image]
            idx_text = np.where(text_distances[k,] < 0.30)[0]
            ids_text = text_indices[k,idx_text]
            # Get the union of boths ids
            ids = list(set(list(ids_image) + list(ids_text)))
            posting_ids = ' '.join(df['posting_id'].iloc[ids].values)
            predictions.append(posting_ids)
    
    else:
        predictions = []
        for k in range(df.shape[0]):
            # Reduce the thresholds because we are predicting more observations
            idx_image = np.where(image_distances[k,] < 0.37)[0]
            ids_image = image_indices[k,idx_image]
            idx_text = np.where(text_distances[k,] < 0.21)[0]
            ids_text = text_indices[k,idx_text]
            # Get the union of boths ids
            ids = list(set(list(ids_image) + list(ids_text)))
            posting_ids = ' '.join(df['posting_id'].iloc[ids].values)
            predictions.append(posting_ids)
        
    del neighbors_model, image_distances, image_indices, text_distances, text_indices
    gc.collect()
    return df, predictions

In [10]:
df, image_paths = read_dataset()

image_embeddings = get_image_embeddings(image_paths)
gc.collect()

Our image embeddings shape is (3, 1536)


35600

In [11]:
if GET_CV:
    text_embeddings = get_text_embeddings(df, max_features = 15500)
else:
    text_embeddings = get_text_embeddings(df, max_features = 21500)

Our title text embedding shape is (3, 26)


In [12]:
neighbours = 50
if (len(df) <= 3):
    neighbours = 2 

In [13]:
df, predictions = get_neighbors(df, image_embeddings, text_embeddings, KNN = neighbours)

In [14]:
df['matches'] = predictions
df[['posting_id', 'matches']].to_csv('submission.csv', index = False)