In [46]:
import joblib
import numpy as np
import pandas as pd
import tensorflow
from tensorflow.keras.layers import GlobalMaxPooling2D
from tensorflow.keras.preprocessing import image
from tensorflow import keras
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from numpy.linalg import norm
from sklearn.neighbors import NearestNeighbors
from PIL import Image
import requests
from tqdm import tqdm
import os
from shutil import copy

In [6]:
# Reading the Dataset having aaprox. 44000 records
df = pd.read_csv('../data/dataset/newdata.csv')
df.drop(['Unnamed: 0'], axis=1, inplace=True)

In [11]:
# Taking 1000 samples for now.
samples_df = df.head(1000)

In [38]:
samples_df

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,image,filename
0,15970,Men,Apparel,Topwear,Shirts,Navy Blue,Fall,2011,Casual,Turtle Check Men Navy Blue Shirt,http://assets.myntassets.com/v1/images/style/p...,15970.jpg
1,39386,Men,Apparel,Bottomwear,Jeans,Blue,Summer,2012,Casual,Peter England Men Party Blue Jeans,http://assets.myntassets.com/v1/images/style/p...,39386.jpg
2,59263,Women,Accessories,Watches,Watches,Silver,Winter,2016,Casual,Titan Women Silver Watch,http://assets.myntassets.com/v1/images/style/p...,59263.jpg
3,21379,Men,Apparel,Bottomwear,Track Pants,Black,Fall,2011,Casual,Manchester United Men Solid Black Track Pants,http://assets.myntassets.com/v1/images/style/p...,21379.jpg
4,53759,Men,Apparel,Topwear,Tshirts,Grey,Summer,2012,Casual,Puma Men Grey T-shirt,http://assets.myntassets.com/v1/images/style/p...,53759.jpg
...,...,...,...,...,...,...,...,...,...,...,...,...
995,51232,Women,Apparel,Topwear,Kurtas,Green,Fall,2012,Ethnic,Satya Paul Women Green Printed Kaftan,http://assets.myntassets.com/v1/images/style/p...,51232.jpg
996,17427,Men,Accessories,Watches,Watches,White,Winter,2016,Casual,CASIO G-Shock Men White Analogue & Digital Wat...,http://assets.myntassets.com/assets/images/174...,17427.jpg
997,9804,Men,Apparel,Topwear,Shirts,Black,Fall,2011,Formal,John Miller Men Black Checks Shirts,http://assets.myntassets.com/v1/images/style/p...,9804.jpg
998,23612,Women,Personal Care,Fragrance,Perfume and Body Mist,Pink,Spring,2017,Casual,Nike Fragrances Women Sensual Touch Perfume,http://assets.myntassets.com/v1/images/style/p...,23612.jpg


In [52]:
samples_df.to_csv('1000samples.csv')

In [41]:
# Taking the respective images of the products of 1000 samples
dir_src = r"../data/dataset/images"
dir_dst = r"images"
for root, _, files in os.walk(dir_src):
    for img in files:
        if img in samples_df['filename'].values:
             copy(os.path.join(root, img), dir_dst)


In [43]:
model = ResNet50(weights="imagenet", include_top=False, input_shape=(60,80,3))
model.trainable = False

model = tensorflow.keras.Sequential([
    model,
    GlobalMaxPooling2D()
])

In [44]:
def feature_extraction(img_path, model):
    img = image.load_img(img_path, target_size=(60,80))
    img_array = image.img_to_array(img)
    expanded_img_array = np.expand_dims(img_array, axis=0)
    preprocessed_img = preprocess_input(expanded_img_array)
    result = model.predict(preprocessed_img).flatten()
    normalized_result = result /  norm(result)

    return normalized_result

In [47]:
filenames = []

for file in os.listdir('images'):
    filenames.append(os.path.join('images', file))

feature_list = []

for file in tqdm(filenames):
    feature_list.append(feature_extraction(file, model))

100%|██████████| 1000/1000 [01:37<00:00, 10.28it/s]


In [48]:
joblib.dump(filenames, "filenames.pkl")

['filenames.pkl']

In [49]:
joblib.dump(feature_list, "image-embed.pkl")

['image-embed.pkl']

In [50]:
ds = np.array(joblib.load(open('image-embed.pkl', 'rb')))