In [None]:
import numpy as np
import pandas as pd
import os
import re
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, Activation, Dropout, Flatten, Dense, Input, Layer
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
import matplotlib.pyplot as plt
import seaborn as sns

plt.rcParams['font.size'] = 18

In [None]:
MAIN_DIR = '/kaggle/input/fashion-product-images-dataset/fashion-dataset/'
images_df = pd.read_csv(MAIN_DIR+'images.csv')
styles_df = pd.read_csv(MAIN_DIR+'styles.csv', on_bad_lines='skip')

In [None]:
images_df.head()

In [None]:
styles_df.head()

In [None]:
styles_df['filename'] = styles_df['id'].astype(str) + '.jpg'
styles_df.head()

In [None]:
image_files = os.listdir(MAIN_DIR+'images')

In [None]:
styles_df['present'] = styles_df['filename'].apply(lambda x: x in image_files)
styles_df.head()

In [None]:
styles_df['present'].value_counts()

In [None]:
styles_df = styles_df[styles_df['present']].reset_index(drop=True)

In [None]:
styles_df.head()

In [None]:
styles = styles_df.sample(10000).reset_index(drop=True)
styles.head()

In [None]:
IMG_SIZE = (224,224)
datagen = ImageDataGenerator(rescale=1/255.)
generator = datagen.flow_from_dataframe(dataframe=styles,
                                        directory=MAIN_DIR+'images',
                                        target_size=IMG_SIZE,
                                        x_col='filename',
                                        class_mode=None,
                                        batch_size=32,
                                        shuffle=False)

In [None]:
base_model = VGG16(include_top=False, input_shape=IMG_SIZE+(3,))
base_model.trainable = False 

input_layer = Input(shape=IMG_SIZE+(3,))
x = base_model(input_layer)
output = GlobalAveragePooling2D()(x)

embeddings = Model(input_layer, output)
embeddings.summary()

In [None]:
X = embeddings.predict(generator, verbose=1)

In [None]:
from sklearn.decomposition import PCA

pca = PCA(2)
X_pca = pca.fit_transform(X)

In [None]:
styles[['pc1','pc2']] = X_pca

In [None]:
plt.figure(figsize=(20,12))
sns.scatterplot(x='pc1',y='pc2', data=styles, hue='masterCategory')
plt.show()

In [None]:
def read_img(image_path):
    path = MAIN_DIR + 'images/'
    image = load_img(os.path.join(path,image_path),target_size=(224,224,3))
    image = img_to_array(image)
    image = image/255.
    return image

In [None]:
import random
from sklearn.neighbors import KNeighborsClassifier
y = styles['id']

In [None]:
nn = KNeighborsClassifier(n_neighbors=7)
nn.fit(X,y)

In [None]:
for _ in range(10):
    i = random.randint(0,len(styles))
    img1 = read_img(styles.loc[i,'filename'])
    dist, index = nn.kneighbors(X=X[i,:].reshape(1,-1))
    plt.figure(figsize = (4 , 4))
    plt.imshow(img1)
    plt.title("Input Image")
    plt.axis('off')

    plt.figure(figsize = (20 , 20))
    for i in range(1,6):
        plt.subplot(1 , 5, i)
        plt.subplots_adjust(hspace = 0.5 , wspace = 0.3)
        image = read_img(styles.loc[index[0][i],'filename'])
        plt.imshow(image)
        plt.title(f'Similar Product #{i}')
        plt.axis('off')

<div class="alert alert-info">
    <p>
    That's it for this one folks! Embeddings have several applications that can be explored further. If you like this notebook, leave an upvote and comment.
    </p>
</div>