In [None]:
import numpy as np
import pandas as pd
import gc,math,pickle,os
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB5
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
image_model_filename="../input/efficientnet-b5-weights/best_model_efnb5.h5"
train=pd.read_csv("../input/shopee-product-matching/train.csv")
HEIGHT,WIDTH=256,256
CHANNELS=3
BATCH_SIZE=32
NUM_CLASSES=11014

In [None]:
def normalize_embeddings(embeddings):
    for x in embeddings:
        norm = np.linalg.norm(x)
        x/=norm
    return embeddings
def create_model(pretrained_model):  
    model=tf.keras.Sequential([
        pretrained_model,
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Lambda(lambda x: tf.math.l2_normalize(x, axis=1))
    ])
    return model

In [None]:
def predictions_cosine_numpy(embeddings,curr_embedding):
    cos_mat=np.matmul(embeddings,curr_embedding.T).T
    THRESHOLD_VALUE=0.0
    cosine_threshold =THRESHOLD_VALUE
    mat=(cos_mat>cosine_threshold)
    mat=np.reshape(mat,(len(embeddings),))
    
    train['values']=np.reshape(cos_mat,(len(embeddings),))
    return train[mat].sort_values(by='values',ascending=False)

In [None]:
def get_image_model():
    pretrained_model=EfficientNetB5(weights=None,include_top=False,input_shape=None)
    model=create_model(pretrained_model)
    model.load_weights(image_model_filename)
    intermediate_model = tf.keras.models.Model(inputs=model.input,outputs=model.get_layer(index=2).output)
    return intermediate_model

In [None]:
def process_data(image_path,label):
    img=tf.io.read_file(image_path)
    img=tf.image.decode_jpeg(img,channels=CHANNELS)
    img=tf.image.resize(img,[HEIGHT,WIDTH])
    # img = tf.cast(img, tf.float32) / 255.0
    return img,label

In [None]:
def get_dataset(image):
    temp_label=pd.Series(-1).values
    filepaths =pd.Series(image).values

    ds=tf.data.Dataset.from_tensor_slices((filepaths,temp_label))
    ds=ds.map(process_data)
#     ds=ds.map(arcface_format)
    ds=ds.batch(1)
    return ds

In [None]:
image_model=get_image_model()
efnb5_embeddings=normalize_embeddings(np.load("../input/final/efficient_net_b5_finetune_embedding.npy"))

In [None]:
image_path = "../input/shopee-product-matching/train_images/00117e4fc239b1b641ff08340b429633.jpg"
ds=get_dataset(image_path)
image_embedding=normalize_embeddings(image_model.predict(ds))
merged_preds=predictions_cosine_numpy(efnb5_embeddings,image_embedding)

data=merged_preds.iloc[:15].reset_index()
similar_data=data[data["values"]>0.4]
# print(similar_data)
# similar_data=similar_data[similar_data["values"]<=1.8]

no_of_related_products=len(similar_data)
related_image_paths="../input/shopee-product-matching/train_images/"+similar_data.image.values
related_text=similar_data.title.values

print("\nSimilar Data")
print(len(related_image_paths))
print(len(related_text))
# print(related_image_paths)

In [None]:
import matplotlib.pyplot as plt
import glob, os

ultrasounds = [img for img in glob.glob("../input/train/*.tif")]
    
for file in related_image_paths:
    im = plt.imread(file)
    plt.figure(figsize=(5,10))
    plt.imshow(im)
    plt.show()