In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Model
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
from tensorflow.keras.layers import GlobalMaxPooling2D


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import cv2
import matplotlib.image as mpimg

import os
import PIL
import PIL.Image
import pathlib

from tqdm import tqdm
from tqdm._tqdm_notebook import tqdm_notebook
tqdm_notebook.pandas()

In [None]:
# File wise constants


IMAGE_WIDTH = 224
IMAGE_HEIGHT = 224

## Loading data

In [None]:
DATASET_PATH = '/kaggle/input'

print(os.listdir(DATASET_PATH))

In [None]:
styles_csv = pd.read_csv(os.path.join(DATASET_PATH, 'styles-image-path', 'styles_image_paths.csv'))
styles_csv.head()

## Using pretrained ResNet-50 model for recommendations

In [None]:
feature_extractor = ResNet50(
    weights='imagenet',
    include_top=False,
    input_shape=(IMAGE_HEIGHT, IMAGE_WIDTH, 3)
) # pretrained model


feature_extractor.trainable = False

In [None]:
recommendation_model = keras.Sequential([
    feature_extractor,
    GlobalMaxPooling2D()
])

In [None]:
recommendation_model.summary()

In [None]:
# utility function to get embeddings for a single image

def get_single_image_embeddings(model, image_path):
    image_object = image.load_img(image_path, target_size=(IMAGE_HEIGHT, IMAGE_WIDTH))
    image_array = image.img_to_array(image_object)
    image_array = np.expand_dims(image_array, axis=0)
    image_array = preprocess_input(image_array)
    
    return model.predict(image_array).reshape(-1)

In [None]:
images_list = os.listdir(os.path.join(DATASET_PATH, 'fashion-product-images-dataset', 'fashion-dataset', 'images'))
styles_list = [i.split('/')[-1] for i in styles_csv['image'].values]
missing_images = list(set(styles_list) - set(images_list))
print(missing_images)

styles_csv_ids = [int(i.split('.')[0]) for i in missing_images]
print(styles_csv_ids)

In [None]:
styles_csv = styles_csv[~styles_csv['id'].isin(styles_csv_ids)]

In [None]:
# getting an embedding for an image

embedding = get_single_image_embeddings(recommendation_model, styles_csv.iloc[0].image)
embedding.shape

In [None]:
def get_all_image_embeddings(model, dataframe):
    try:
        embeddings = dataframe['image'].progress_apply(lambda image_path: get_single_image_embeddings(model, image_path))
    except Exception as e:
        print(e)
        pass
    return embeddings

In [None]:
embeddings = get_all_image_embeddings(recom)

In [None]:
numRows = embeddings.shape[0]
numCols = 2048
emb_matrix = pd.DataFrame(index=range(numRows),columns=range(numCols))
indexes = embeddings.index.values
for r in indexes:
    emb_matrix.loc[r,:] = embeddings[r]
emb_matrix.head()

In [None]:
# emb_matrix.columns = emb_matrix.columns.astype(str)
# emb_matrix.to_feather("emb_matrix.feather")

In [None]:
# emb = pd.read_feather("/kaggle/working/emb_matrix.feather")