In [1]:
import joblib
import numpy as np
import pandas as pd
import tensorflow
from tensorflow.keras.layers import GlobalMaxPooling2D
from tensorflow.keras.preprocessing import image
from tensorflow import keras
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from numpy.linalg import norm
from sklearn.neighbors import NearestNeighbors
from PIL import Image
import requests
from tqdm import tqdm
import os
from shutil import copy

In [2]:
# Reading the Dataset having aaprox. 44000 records
df = pd.read_csv('../data/dataset/newdata.csv')
df.drop(['Unnamed: 0'], axis=1, inplace=True)

In [3]:
# Taking 1000 samples for now.
samples_df = df.head(10000)

In [4]:
samples_df

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,image,filename
0,15970,Men,Apparel,Topwear,Shirts,Navy Blue,Fall,2011,Casual,Turtle Check Men Navy Blue Shirt,http://assets.myntassets.com/v1/images/style/p...,15970.jpg
1,39386,Men,Apparel,Bottomwear,Jeans,Blue,Summer,2012,Casual,Peter England Men Party Blue Jeans,http://assets.myntassets.com/v1/images/style/p...,39386.jpg
2,59263,Women,Accessories,Watches,Watches,Silver,Winter,2016,Casual,Titan Women Silver Watch,http://assets.myntassets.com/v1/images/style/p...,59263.jpg
3,21379,Men,Apparel,Bottomwear,Track Pants,Black,Fall,2011,Casual,Manchester United Men Solid Black Track Pants,http://assets.myntassets.com/v1/images/style/p...,21379.jpg
4,53759,Men,Apparel,Topwear,Tshirts,Grey,Summer,2012,Casual,Puma Men Grey T-shirt,http://assets.myntassets.com/v1/images/style/p...,53759.jpg
...,...,...,...,...,...,...,...,...,...,...,...,...
9995,43685,Boys,Apparel,Topwear,Tshirts,White,Summer,2012,Casual,Doodle Boys Printed White T-shirt,http://assets.myntassets.com/v1/images/style/p...,43685.jpg
9996,16487,Men,Apparel,Topwear,Tshirts,Blue,Fall,2011,Casual,Locomotive Men Solid Blue TShirt,http://assets.myntassets.com/v1/images/style/p...,16487.jpg
9997,51340,Men,Accessories,Socks,Socks,Beige,Summer,2016,Casual,Raymond Men Beige Socks,http://assets.myntassets.com/v1/images/style/p...,51340.jpg
9998,6102,Men,Apparel,Topwear,Tshirts,Black,Summer,2011,Casual,UCB Men's Benetton Details Black T-shirt,http://assets.myntassets.com/v1/images/style/p...,6102.jpg


In [5]:
import random
amount = []
for x in range(10000):
    price = random.randint(5, 45) * 5
    amount.append(float(price))

samples_df['price'] = amount

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


In [16]:
from sklearn.utils import shuffle
samples_df = shuffle(samples_df)

In [17]:
samples_df

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,image,filename,price
1743,55400,Women,Personal Care,Makeup,Kajal and Eyeliner,Charcoal,Spring,2017,Casual,Revlon Grow Luscious Graphite Eye Liner,http://assets.myntassets.com/v1/images/style/p...,55400.jpg,30.0
3194,26198,Men,Personal Care,Fragrance,Perfume and Body Mist,Blue,Spring,2017,Casual,Formula 1 Start Men Perfume,http://assets.myntassets.com/v1/images/style/p...,26198.jpg,25.0
3641,35116,Women,Accessories,Jewellery,Necklace and Chains,Black,Summer,2012,Casual,Allen Solly Woman Black Necklace,http://assets.myntassets.com/v1/images/style/p...,35116.jpg,65.0
4018,53131,Women,Footwear,Shoes,Heels,Gold,Winter,2015,Casual,Catwalk Women Gold Toned & Brown Flats,http://assets.myntassets.com/v1/images/style/p...,53131.jpg,95.0
6591,13992,Women,Footwear,Shoes,Sports Shoes,Grey,Fall,2011,Sports,Puma Women Body Train Grey Sports Shoes,http://assets.myntassets.com/v1/images/style/p...,13992.jpg,100.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
7660,7217,Men,Footwear,Shoes,Formal Shoes,Black,Fall,2011,Formal,Rockport Men Carnforth Soft Black Shoe,http://assets.myntassets.com/assets/images/721...,7217.jpg,125.0
4817,4316,Unisex,Apparel,Topwear,Tshirts,Black,Summer,2011,Casual,Tantra Kid's Unisex Play To Win Black Kidswear,http://assets.myntassets.com/v1/images/style/p...,4316.jpg,90.0
1788,16303,Men,Apparel,Topwear,Shirts,Navy Blue,Fall,2011,Casual,Peter England Men Check Navy Blue Shirt,http://assets.myntassets.com/v1/images/style/p...,16303.jpg,170.0
6797,34111,Girls,Apparel,Topwear,Tops,Navy Blue,Summer,2012,Casual,Doodle Girls Striped Navy Blue Top,http://assets.myntassets.com/v1/images/style/p...,34111.jpg,65.0


In [18]:
samples_df.to_csv('10000samples.csv')

In [20]:
# Taking the respective images of the products of 1000 samples
dir_src = r"D:\\data\\images"
dir_dst = r"images"
for root, _, files in os.walk(dir_src):
    for img in files:
        if img in samples_df['filename'].values:
             copy(os.path.join(root, img), dir_dst)


In [21]:
model = ResNet50(weights="imagenet", include_top=False, input_shape=(60,80,3))
model.trainable = False

model = tensorflow.keras.Sequential([
    model,
    GlobalMaxPooling2D()
])

In [22]:
def feature_extraction(img_path, model):
    img = image.load_img(img_path, target_size=(60,80))
    img_array = image.img_to_array(img)
    expanded_img_array = np.expand_dims(img_array, axis=0)
    preprocessed_img = preprocess_input(expanded_img_array)
    result = model.predict(preprocessed_img).flatten()
    normalized_result = result /  norm(result)

    return normalized_result

In [23]:
filenames = []

for file in os.listdir('images'):
    filenames.append(os.path.join('images', file))

feature_list = []

for file in tqdm(filenames):
    feature_list.append(feature_extraction(file, model))

100%|██████████| 9999/9999 [17:43<00:00,  9.41it/s]  


In [24]:
joblib.dump(filenames, "filenames.pkl")

['filenames.pkl']

In [25]:
joblib.dump(feature_list, "image-embed.pkl")

['image-embed.pkl']

In [50]:
ds = np.array(joblib.load(open('image-embed.pkl', 'rb')))