# Image similarity for product recommendation.

First step, importing all necessary libraries.

In [93]:
from PIL import Image
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np

import tensorflow as tf
from tqdm import tqdm
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, Input, Flatten, Bidirectional, GRU, SpatialDropout1D, Embedding, GlobalAveragePooling1D, GlobalMaxPooling1D, concatenate
        
        

from tensorflow.keras.preprocessing.image import load_img,img_to_array
from tensorflow.keras.applications.resnet import preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer



from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd

In [94]:
df = pd.read_csv("/kaggle/input/fashion-product-images-dataset/fashion-dataset/fashion-dataset/styles.csv", 
                 error_bad_lines = False, 
                 warn_bad_lines=False)
df.head()

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName
0,15970,Men,Apparel,Topwear,Shirts,Navy Blue,Fall,2011.0,Casual,Turtle Check Men Navy Blue Shirt
1,39386,Men,Apparel,Bottomwear,Jeans,Blue,Summer,2012.0,Casual,Peter England Men Party Blue Jeans
2,59263,Women,Accessories,Watches,Watches,Silver,Winter,2016.0,Casual,Titan Women Silver Watch
3,21379,Men,Apparel,Bottomwear,Track Pants,Black,Fall,2011.0,Casual,Manchester United Men Solid Black Track Pants
4,53759,Men,Apparel,Topwear,Tshirts,Grey,Summer,2012.0,Casual,Puma Men Grey T-shirt


In [95]:
df.isna().sum()

id                      0
gender                  0
masterCategory          0
subCategory             0
articleType             0
baseColour             15
season                 21
year                    1
usage                 317
productDisplayName      7
dtype: int64

In [96]:
df.shape

(44424, 10)

In [97]:
df = df[df['productDisplayName'].notna()]
df.isna().sum()

id                      0
gender                  0
masterCategory          0
subCategory             0
articleType             0
baseColour             10
season                 21
year                    1
usage                 312
productDisplayName      0
dtype: int64

In [98]:
IMAGES_PATH = "/kaggle/input/fashion-product-images-dataset/fashion-dataset/fashion-dataset/images/"

df['id'] = df['id'].apply(lambda x: IMAGES_PATH+str(x) +'.jpg')
image_ids = df.pop('id')

In [103]:
n_rows = 10000

In [106]:
IMAGE_DIMS = (64, 64, 3)

def load_image(path):
    image = cv2.imread(path)
    image = cv2.resize(image, (IMAGE_DIMS[1], IMAGE_DIMS[0]))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = preprocess_input(image)
    return image

image_data = []
failed = []

for i, img_path in tqdm(enumerate(image_ids[:n_rows])):
    try:
        image_data.append(load_image(img_path))
    except:
        failed.append(i)
        continue
    
image_data = np.array(image_data, dtype="float")

10000it [07:20, 22.69it/s]


In [107]:
titles = df["productDisplayName"].tolist()[:n_rows]

In [108]:
failed

[6694]

In [109]:
for fail in failed:
    del titles[fail]

In [110]:
tokenizer = Tokenizer(num_words=25000)
tokenizer.fit_on_texts(titles)

titles_train = tokenizer.texts_to_sequences(titles)

vocab_size = len(tokenizer.word_index) + 1  # Adding 1 because of reserved 0 index
print(titles_train[2])

maxlen = 25

titles_train_padded = pad_sequences(titles_train, padding='post', maxlen=maxlen)
print(titles_train_padded[1])

[187, 2, 37, 13]
[116 117   1 831   5  64   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0]


In [111]:
import random


#### shuffling the exsiting data and getting the negative samples.. pretty naive approach which can be improved. 
titles_neg= titles_train_padded.copy()
image_data_neg = image_data.copy()

random.shuffle(titles_neg)
random.shuffle(image_data_neg)



In [112]:
image_data_neg.shape

(9999, 64, 64, 3)

In [113]:

titles_neg.shape

(9999, 25)

In [114]:
im_train = list(image_data) + list(image_data_neg)

In [115]:
titles_train = list(titles_train_padded) + list(titles_neg)

In [116]:
res50 = ResNet50(weights='imagenet', 
                 include_top=False, 
                 input_shape=IMAGE_DIMS)

res50.trainable=False

image_input = Input(shape=IMAGE_DIMS)
x = res50(image_input, training=False)
x = Flatten()(x)
x = Dense(128, activation='relu')(x)


sequence_input = Input(shape=(maxlen, ))
z = Embedding(vocab_size, 
              50,
              trainable = False)(sequence_input)

z = SpatialDropout1D(0.5)(z)
z = Bidirectional(GRU(32, 
                        return_sequences=True,
                        dropout=0.1,
                        recurrent_dropout=0.1))(z)

avg_pool = GlobalAveragePooling1D()(z)
max_pool = GlobalMaxPooling1D()(z)

z = concatenate([avg_pool, max_pool])
z = Dense(128, activation='relu')(z)


### projectting to common space.
common = Dense(64, activation='relu')
x_s = common(x)
z_s = common(z)

### we can try with dot product of x_s and x_z 

conc = concatenate([x_s, z_s]) 
output = Dense(1, activation="sigmoid")(conc)

In [117]:
model = Model(inputs = [image_input, sequence_input], outputs=output)

In [118]:
model.summary()

Model: "model_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_12 (InputLayer)           [(None, 25)]         0                                            
__________________________________________________________________________________________________
embedding_3 (Embedding)         (None, 25, 50)       200000      input_12[0][0]                   
__________________________________________________________________________________________________
spatial_dropout1d_3 (SpatialDro (None, 25, 50)       0           embedding_3[0][0]                
__________________________________________________________________________________________________
input_11 (InputLayer)           [(None, 64, 64, 3)]  0                                            
____________________________________________________________________________________________

In [119]:
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

In [124]:
y = [1] * len(titles_train_padded)  + [0] * len(titles_train_padded)

In [125]:
len(im_train), len(titles_train), len(y)

(19998, 19998, 19998)

In [None]:
model.fit([np.array(im_train), np.array(titles_train)], 
          np.array(y), 
          validation_split=0.1, 
          shuffle=True, epochs=10)

Train on 17998 samples, validate on 2000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10