In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Model
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
from tensorflow.keras.layers import GlobalMaxPooling2D


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import cv2
import matplotlib.image as mpimg
from sklearn.metrics.pairwise import pairwise_distances


from scipy import spatial
import os
import PIL
import PIL.Image
import pathlib
import ast

In [None]:
DATASET_PATH = '/kaggle/input'

In [None]:
image_embeddings = pd.read_csv("/kaggle/input/embeddings-advanced/emb_matrix_new.csv")
styles_csv = pd.read_csv("/kaggle/input/styles-image-path/styles_image_paths.csv", nrows=5000)

In [None]:
image_embeddings.head()

In [None]:
image_embeddings.shape

In [None]:
images_list = os.listdir(os.path.join(DATASET_PATH, 'fashion-product-images-dataset', 'fashion-dataset', 'images'))
styles_list = [i.split('/')[-1] for i in styles_csv['image'].values]
missing_images = list(set(styles_list) - set(images_list))
print(missing_images)

styles_csv_ids = [int(i.split('.')[0]) for i in missing_images]
print(styles_csv_ids)

styles_csv = styles_csv[~styles_csv['id'].isin(styles_csv_ids)]

In [None]:
nan_rows = image_embeddings['0'][image_embeddings['0'].isna() == True].index.values
image_embeddings = image_embeddings[~image_embeddings.index.isin(nan_rows)]

In [None]:
styles_csv

In [None]:
def load_image(img_path, resized_fac = 0.1):
    img_object = plt.imread(img_path)
    w, h, c = img_object.shape
    resized = cv2.resize(img_object, (int(h*resized_fac), int(w*resized_fac)))
    return resized

In [None]:
def predict_similiar_products(product_id):
    target_embeddings = image_embeddings.iloc[product_id].values
    similarities = {}
    
    for index, row in image_embeddings.iterrows():
        embedding = row.values
        similarities[index] = 1 - spatial.distance.cosine(target_embeddings, embedding)
    similarities = {k: v for k, v in sorted(similarities.items(), key=lambda item:  - item[1])}
    return similarities
        

In [None]:
def plot_grid(image_list, group):
    fig = plt.figure(figsize=(40, 10), facecolor="#e1ddbf")
    plt.suptitle(group, fontsize=40)
    
    samples = len(image_list)
    
    for i in range(samples):
        ax = plt.subplot(1, 4, i + 1)
        plt.imshow(load_image(image_list[i][0]))
        plt.title(image_list[i][1], fontsize=20)
        plt.axis("off")
    plt.show()  

In [None]:
def plot_image(image_id):
    plt.imshow(load_image(styles_csv.iloc[image_id]['image']))
    plt.title(styles_csv.iloc[image_id]['productDisplayName'])

In [None]:
def plot_top_5(image_id, similarities):
    
    plot_image(image_id)
    fig = plt.figure(figsize=(40, 10))
    
    pos = 1
    for k, v in list(similarities.items())[1:10]:
        ax = plt.subplot(1, 10, pos)
        pos += 1
        plt.imshow(load_image(styles_csv.iloc[k]['image']))
        plt.title(f"{v}, {k}", fontsize=12)
    plt.show()

In [None]:
test_id = np.random.choice(styles_csv.index.values)
print(test_id)
similarities = predict_similiar_products(test_id)
plot_top_5(test_id, similarities)

In [None]:
sample_id = 868
similarities = predict_similiar_products(sample_id)
plot_top_5(sample_id, similarities)

In [None]:
sample_id = 3524
similarities = predict_similiar_products(sample_id)
plot_top_5(sample_id, similarities)

In [None]:
sample_id = 2993
similarities = predict_similiar_products(sample_id)
plot_top_5(sample_id, similarities)

In [None]:
test_id = np.random.choice(styles_csv.index.values)
print(test_id)
similarities = predict_similiar_products(test_id)
plot_top_5(test_id, similarities)

In [None]:
test_id = np.random.choice(styles_csv.index.values)
print(test_id)
similarities = predict_similiar_products(test_id)
plot_top_5(test_id, similarities)