# Product Similarity # Results

In this notebook we are going to use the embeddings extracted from [[H&M] Product Similarity #1 Embeddings&KNN](https://www.kaggle.com/joelqv/h-m-product-similarity-1-embeddings-knn) to retrieve similar products.

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import joblib

df_articles = pd.read_csv('../input/h-and-m-personalized-fashion-recommendations/articles.csv')
knn = joblib.load('../input/h-m-product-similarity-1-embeddings-knn/knn.joblib')
image_embeddings = np.load('../input/h-m-product-similarity-1-embeddings-knn/hm_embeddings_effb0.npy')

In [None]:
import os

def get_article_images_df(path='../input/h-and-m-personalized-fashion-recommendations/images'):
    article_ids = []
    image_paths = []
    for dirname, _, filenames in os.walk(path):
        for filename in filenames:
            fullpath = os.path.join(dirname, filename)
            image_path = fullpath
            article_id = fullpath.split('/')[-1].replace('.jpg', '')
            article_ids.append(article_id)
            image_paths.append(fullpath)
    return pd.DataFrame({'article_id': article_ids, 'image': image_paths})

In [None]:
df = get_article_images_df()

In [None]:
import cv2
import matplotlib.pyplot as plt

def compute_distances(df, idx, model, knn):
    """
    Returns distances indices of most similar products based on embeddings extracted from model
    """
    X = np.zeros((1, 256, 256, 3), dtype='float32')
    img = cv2.imread(df.iloc[idx].image)    # TODO: cv2.readfrombinary
    img = cv2.resize(img, (256, 256))
    X[0,] = img
    model = EfficientNetB0(weights='imagenet', include_top=False, pooling='avg', input_shape=None)
    inf_embeddings = model.predict(X, verbose=1)
    distances, indices = knn.kneighbors(inf_embeddings)
    return distances, indices

def plot_results(df, indices, distances, col_size=3, row_size=4):
    fig, axs = plt.subplots(col_size, row_size, figsize=(20, 15))
    axs = axs.flatten()
    i = 0
    for ax, idx in zip(axs, indices[0]):
        if i == 0: 
            ax.set_title('Query image')
        else: 
            ax.set_title(f'{i} most similar')
        img = cv2.imread(df.iloc[idx].image)
        #img = cv2.resize(img, (256, 256))
        im_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        ax.axis('off')
        ax.imshow(im_bgr, aspect='auto')
        i+=1
    fig.suptitle('Similar products', fontsize=36)

In [None]:
from tensorflow.keras.applications import EfficientNetB0

model = EfficientNetB0(weights='imagenet', include_top=False, pooling='avg', input_shape=None)

In [None]:
distances, indices = compute_distances(df, 0, model, knn)
plot_results(df, indices, distances)

In [None]:
distances, indices = compute_distances(df, 100, model, knn)
plot_results(df, indices, distances)

In [None]:
distances, indices = compute_distances(df, 1000, model, knn)
plot_results(df, indices, distances)

In [None]:
distances, indices = compute_distances(df, 5000, model, knn)
plot_results(df, indices, distances)

In [None]:
distances, indices = compute_distances(df, 10000, model, knn)
plot_results(df, indices, distances)

In [None]:
distances, indices = compute_distances(df, 20000, model, knn)
plot_results(df, indices, distances)

In [None]:
distances, indices = compute_distances(df, 30000, model, knn)
plot_results(df, indices, distances)

In [None]:
distances, indices = compute_distances(df, 40000, model, knn)
plot_results(df, indices, distances)

In [None]:
distances, indices = compute_distances(df, 50000, model, knn)
plot_results(df, indices, distances)

In [None]:
distances, indices = compute_distances(df, 60000, model, knn)
plot_results(df, indices, distances)

In [None]:
distances, indices = compute_distances(df, 70000, model, knn)
plot_results(df, indices, distances)

In [None]:
distances, indices = compute_distances(df, 80000, model, knn)
plot_results(df, indices, distances)

In [None]:
distances, indices = compute_distances(df, 90000, model, knn)
plot_results(df, indices, distances)

In [None]:
distances, indices = compute_distances(df, 100000, model, knn)
plot_results(df, indices, distances)