In [1]:
import joblib
import numpy as np
import pandas as pd
import tensorflow
from tensorflow.keras.layers import GlobalMaxPooling2D
from tensorflow.keras.preprocessing import image
from tensorflow import keras
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from numpy.linalg import norm
from sklearn.neighbors import NearestNeighbors
from PIL import Image
import requests
from tqdm import tqdm
import os
from shutil import copy

In [3]:
# Reading the Dataset having aaprox. 44000 records
df = pd.read_csv('../data/dataset/newdata.csv')
df.drop(['Unnamed: 0'], axis=1, inplace=True)

In [4]:
df.loc[df["subCategory"] == "Watches", "subCategory"] = 'Watch'
df.loc[df["articleType"] == "Watches", "articleType"] = 'Watch'
        

In [5]:
ring = df[(df['articleType'] == 'Ring')]

In [6]:
ring

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,image,filename
121,49005,Women,Accessories,Jewellery,Ring,Silver,Summer,2012,Casual,Lucera Women Silver Ring,http://assets.myntassets.com/v1/images/style/p...,49005.jpg
576,48927,Women,Accessories,Jewellery,Ring,Silver,Summer,2012,Casual,Lucera Women Silver Ring,http://assets.myntassets.com/v1/images/style/p...,48927.jpg
736,49034,Women,Accessories,Jewellery,Ring,Silver,Summer,2012,Casual,Lucera Women Silver Ring,http://assets.myntassets.com/v1/images/style/p...,49034.jpg
1060,48929,Women,Accessories,Jewellery,Ring,Silver,Fall,2012,Casual,Lucera Women Silver Ring,http://assets.myntassets.com/v1/images/style/p...,48929.jpg
1094,49033,Women,Accessories,Jewellery,Ring,Silver,Summer,2012,Casual,Lucera Women Silver Ring,http://assets.myntassets.com/v1/images/style/p...,49033.jpg
...,...,...,...,...,...,...,...,...,...,...,...,...
43622,48963,Men,Accessories,Jewellery,Ring,Steel,Summer,2013,Casual,Revv Men Steel Ring,http://assets.myntassets.com/v1/images/style/p...,48963.jpg
43766,49041,Women,Accessories,Jewellery,Ring,Silver,Fall,2012,Casual,Lucera Women Silver Ring,http://assets.myntassets.com/v1/images/style/p...,49041.jpg
43846,59471,Women,Accessories,Jewellery,Ring,Silver,Summer,2013,Casual,Lucera Silver Ring,http://assets.myntassets.com/v1/images/style/p...,59471.jpg
43985,48990,Women,Accessories,Jewellery,Ring,Silver,Summer,2012,Casual,Lucera Women Silver Ring,http://assets.myntassets.com/v1/images/style/p...,48990.jpg


In [7]:
ties_option = ['Black', 'White', 'Brown', 'Blue', 'Red']
ties = df[(df['articleType'] == 'Ties') & (df['baseColour'].isin(ties_option))]

In [8]:
nailpolish = df[(df['articleType'] == 'Nail Polish')]

In [9]:
nailpolish

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,image,filename
933,56473,Women,Personal Care,Nails,Nail Polish,Purple,Spring,2017,Casual,Streetwear Grape Shimmer Nail Polish 41,http://assets.myntassets.com/v1/images/style/p...,56473.jpg
1311,56480,Women,Personal Care,Nails,Nail Polish,Brown,Spring,2017,Casual,Streetwear Chocolat Nail Polish 20,http://assets.myntassets.com/v1/images/style/p...,56480.jpg
1603,55894,Women,Personal Care,Nails,Nail Polish,Blue,Spring,2017,Casual,Colorbar Blue Lagoon Nail Lacquer 05,http://assets.myntassets.com/assets/images/558...,55894.jpg
1702,55860,Women,Personal Care,Nails,Nail Polish,Lavender,Spring,2017,Casual,Colorbar Pro Dazzling Mauve Nail Lacquer 008,http://assets.myntassets.com/assets/images/558...,55860.jpg
1785,55858,Women,Personal Care,Nails,Nail Polish,Brown,Spring,2017,Casual,Colorbar Pro Flirt Nail Lacquer 013,http://assets.myntassets.com/v1/images/style/p...,55858.jpg
...,...,...,...,...,...,...,...,...,...,...,...,...
43191,55877,Women,Personal Care,Nails,Nail Polish,Purple,Spring,2017,Casual,Colorbar Exclusive Nail Lacquer 71,http://assets.myntassets.com/assets/images/558...,55877.jpg
43371,55848,Women,Personal Care,Nails,Nail Polish,Peach,Spring,2017,Casual,Colorbar Pro Peach Rose Nail Lacquer 047,http://assets.myntassets.com/assets/images/558...,55848.jpg
43516,55841,Women,Personal Care,Nails,Nail Polish,Pink,Spring,2017,Casual,Colorbar Pro Pink Late Nail Lacquer 056,http://assets.myntassets.com/assets/images/558...,55841.jpg
43837,55846,Women,Personal Care,Nails,Nail Polish,Pink,Spring,2017,Casual,Colorbar Pro Pleasing Pink Nail Lacquer 049,http://assets.myntassets.com/assets/images/558...,55846.jpg


In [13]:
deodrant = df[(df['articleType'] == 'Deodorant')]
deodrant

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,image,filename
24,18461,Men,Personal Care,Fragrance,Deodorant,White,Spring,2017,Casual,David Beckham Signature Men Deos,http://assets.myntassets.com/v1/images/style/p...,18461.jpg
181,26735,Men,Personal Care,Fragrance,Deodorant,Black,Spring,2017,Casual,Reebok Men Pack of 2 Pirates of the Caribbean ...,http://assets.myntassets.com/v1/images/style/p...,26735.jpg
186,58980,Women,Personal Care,Fragrance,Deodorant,Pink,Spring,2017,Casual,Colour me Women Flowers Deo,http://assets.myntassets.com/v1/images/style/p...,58980.jpg
234,58974,Men,Personal Care,Fragrance,Deodorant,Gold,Spring,2017,Casual,Colour me Men Gold Deo,http://assets.myntassets.com/v1/images/style/p...,58974.jpg
324,46624,Men,Personal Care,Fragrance,Deodorant,Grey,Spring,2017,Casual,Rasasi Men Emotion Deo and Perfume Set,http://assets.myntassets.com/v1/images/style/p...,46624.jpg
...,...,...,...,...,...,...,...,...,...,...,...,...
43737,46667,Men,Personal Care,Fragrance,Deodorant,Brown,Spring,2017,Casual,Rasasi Men Numero Due Deo,http://assets.myntassets.com/v1/images/style/p...,46667.jpg
43789,28244,Men,Personal Care,Fragrance,Deodorant,Blue,Spring,2017,Casual,Park Avenue Men Cool Blue Deo,http://assets.myntassets.com/v1/images/style/p...,28244.jpg
43938,26785,Men,Personal Care,Fragrance,Deodorant,Red,Spring,2017,Casual,Reebok Men Pack of 3 Deos,http://assets.myntassets.com/v1/images/style/p...,26785.jpg
43946,46660,Men,Personal Care,Fragrance,Deodorant,Grey,Spring,2017,Casual,Rasasi Men Deo,http://assets.myntassets.com/v1/images/style/p...,46660.jpg


In [14]:
options = ['Black', 'White', 'Blue']
jeans = df[(df['articleType'] == 'Jeans') & (df['baseColour'].isin(options)) & (df['gender'] == 'Men')]
jeans

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,image,filename
1,39386,Men,Apparel,Bottomwear,Jeans,Blue,Summer,2012,Casual,Peter England Men Party Blue Jeans,http://assets.myntassets.com/v1/images/style/p...,39386.jpg
335,16508,Men,Apparel,Bottomwear,Jeans,Blue,Fall,2011,Casual,Locomotive Men Washed Blue Jeans,http://assets.myntassets.com/v1/images/style/p...,16508.jpg
398,11349,Men,Apparel,Bottomwear,Jeans,Blue,Summer,2017,Casual,Lee Men Blue Chicago Fit Jeans,http://assets.myntassets.com/v1/images/style/p...,11349.jpg
455,39381,Men,Apparel,Bottomwear,Jeans,Black,Summer,2012,Casual,Peter England Men Party Black Jeans,http://assets.myntassets.com/v1/images/style/p...,39381.jpg
888,11340,Men,Apparel,Bottomwear,Jeans,Blue,Summer,2017,Casual,Lee Men Blue Chicago Fit Jeans,http://assets.myntassets.com/v1/images/style/p...,11340.jpg
...,...,...,...,...,...,...,...,...,...,...,...,...
43706,48399,Men,Apparel,Bottomwear,Jeans,Blue,Summer,2012,Casual,French Connection Men Blue Jeans,http://assets.myntassets.com/v1/images/style/p...,48399.jpg
43719,11332,Men,Apparel,Bottomwear,Jeans,Blue,Fall,2011,Casual,United Colors of Benetton Men Washed Blue Jeans,http://assets.myntassets.com/v1/images/style/p...,11332.jpg
43928,11335,Men,Apparel,Bottomwear,Jeans,Blue,Fall,2011,Casual,United Colors of Benetton Men Washed Blue Jeans,http://assets.myntassets.com/v1/images/style/p...,11335.jpg
44040,23455,Men,Apparel,Bottomwear,Jeans,Blue,Summer,2012,Casual,Deni Yo Men Blue Washed Slim Fit Jeans,http://assets.myntassets.com/v1/images/style/p...,23455.jpg


In [16]:
options_trousers = ['Black', 'White', 'Grey']
trousers = df[(df['articleType'] == 'Trousers') & (df['baseColour'].isin(options_trousers))]
trousers

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,image,filename
164,10257,Men,Apparel,Bottomwear,Trousers,Black,Fall,2011,Formal,John Miller Men Solid Type Black Trousers,http://assets.myntassets.com/v1/images/style/p...,10257.jpg
219,26163,Men,Apparel,Bottomwear,Trousers,Grey,Summer,2012,Formal,John Miller Men Dark Grey Trousers,http://assets.myntassets.com/v1/images/style/p...,26163.jpg
586,26164,Men,Apparel,Bottomwear,Trousers,Grey,Summer,2012,Formal,John Miller Men Striped Grey Trousers,http://assets.myntassets.com/v1/images/style/p...,26164.jpg
799,10259,Men,Apparel,Bottomwear,Trousers,Black,Fall,2011,Formal,John Miller Men Reglur Black Trousers,http://assets.myntassets.com/v1/images/style/p...,10259.jpg
884,27622,Men,Apparel,Bottomwear,Trousers,Grey,Summer,2012,Casual,Scullers Men Grey Trousers,http://assets.myntassets.com/v1/images/style/p...,27622.jpg
...,...,...,...,...,...,...,...,...,...,...,...,...
43105,39797,Men,Apparel,Bottomwear,Trousers,Black,Summer,2012,Formal,Peter England Men Black Formal Trousers,http://assets.myntassets.com/v1/images/style/p...,39797.jpg
43194,43912,Women,Apparel,Bottomwear,Trousers,Black,Summer,2012,Casual,Femella Women Black Trousers,http://assets.myntassets.com/v1/images/style/p...,43912.jpg
43200,39763,Women,Apparel,Bottomwear,Trousers,Grey,Fall,2012,Formal,Arrow Woman Grey Trousers,http://assets.myntassets.com/v1/images/style/p...,39763.jpg
43527,21598,Men,Apparel,Bottomwear,Trousers,Black,Fall,2011,Formal,Arrow Men Solid Black Trousers,http://assets.myntassets.com/v1/images/style/p...,21598.jpg


In [24]:
from sklearn.utils import shuffle
options_socks = ['Black', 'White', 'Grey']
socks = df[(df['articleType'] == 'Socks') & (df['baseColour'].isin(options_socks))]
socks = shuffle(socks)
socks = socks.head(300)

In [25]:
socks.groupby('baseColour')['baseColour'].count().nlargest() 

baseColour
Black    131
White    114
Grey      55
Name: baseColour, dtype: int64

In [18]:
socks

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,image,filename
12724,29214,Women,Accessories,Socks,Socks,White,Fall,2014,Sports,Fila Women White Socks,http://assets.myntassets.com/v1/images/style/p...,29214.jpg
8762,32026,Men,Accessories,Socks,Socks,Grey,Summer,2016,Casual,Playboy Men Multi Coloured Socks,http://assets.myntassets.com/v1/images/style/p...,32026.jpg
28450,51148,Men,Accessories,Socks,Socks,Black,Summer,2016,Casual,Parx Men Black Socks,http://assets.myntassets.com/v1/images/style/p...,51148.jpg
39268,18217,Men,Accessories,Socks,Socks,Grey,Fall,2011,Casual,Manchester United Men Solid Grey Socks,http://assets.myntassets.com/v1/images/style/p...,18217.jpg
25252,54343,Men,Accessories,Socks,Socks,White,Summer,2016,Casual,Levis Men Pack of 3 Socks,http://assets.myntassets.com/v1/images/style/p...,54343.jpg
...,...,...,...,...,...,...,...,...,...,...,...,...
21325,54339,Men,Accessories,Socks,Socks,Black,Summer,2016,Casual,Levis Men Pack of 3 Socks,http://assets.myntassets.com/v1/images/style/p...,54339.jpg
14848,9355,Men,Accessories,Socks,Socks,White,Fall,2011,Casual,Puma Men Sport Pack of 3 White Socks,http://assets.myntassets.com/v1/images/style/p...,9355.jpg
21246,46862,Men,Accessories,Socks,Socks,Black,Summer,2012,Casual,Lino Perros Men Pack of 2 Socks,http://assets.myntassets.com/v1/images/style/p...,46862.jpg
3871,29115,Men,Accessories,Socks,Socks,White,Summer,2012,Casual,Puma Men Foundation Quarters White Socks,http://assets.myntassets.com/v1/images/style/p...,29115.jpg


In [38]:
options_sunglasses = ['Black', 'Brown']
sunglasses = df[(df['articleType'] == 'Sunglasses') & (df['baseColour'].isin(options_sunglasses))]
sunglasses = shuffle(sunglasses)
sunglasses = sunglasses.head(200)

In [39]:
sunglasses.groupby('baseColour')['baseColour'].count().nlargest() 

baseColour
Black    108
Brown     92
Name: baseColour, dtype: int64

In [46]:
options_watch = ['Black', 'White', 'Silver']
watch = df[(df['articleType'] == 'Watch') & (df['baseColour'].isin(options_watch))]
watch = shuffle(watch)
watch = watch.head(500)

In [47]:
watch.groupby('baseColour')['baseColour'].count().nlargest() 

baseColour
Black     271
White     171
Silver     58
Name: baseColour, dtype: int64

In [51]:
options_tshirt = ['Black', 'White']
tshirts = df[(df['articleType'] == 'Tshirts') & (df['baseColour'].isin(options_tshirt) & (df['usage'] == 'Casual'))]
tshirts = shuffle(tshirts)
tshirts = tshirts.head(300)

In [52]:
tshirts.groupby('baseColour')['baseColour'].count().nlargest() 

baseColour
White    162
Black    138
Name: baseColour, dtype: int64

In [77]:
options_shoes = ['Black', 'White']
option_s = ['Formal Shoes', 'Casual Shoes']
option_gen = ['Men', 'Women']
options_usage = ['Casual', 'Formal']
shoes = df[(df['articleType'].isin(option_s)) & (df['baseColour'].isin(options_shoes) & (df['gender'].isin(option_gen)) & (df['usage'].isin(options_usage)))]
shoes = shuffle(shoes)
shoes = shoes.head(500)


In [78]:
shoes.groupby('usage')['usage'].count().nlargest() 

usage
Casual    361
Formal    139
Name: usage, dtype: int64

In [79]:
frames = [ring, ties, nailpolish, deodrant, jeans, trousers, socks, sunglasses, watch, tshirts, shoes]
result = pd.concat(frames)

In [80]:
result

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,image,filename
121,49005,Women,Accessories,Jewellery,Ring,Silver,Summer,2012,Casual,Lucera Women Silver Ring,http://assets.myntassets.com/v1/images/style/p...,49005.jpg
576,48927,Women,Accessories,Jewellery,Ring,Silver,Summer,2012,Casual,Lucera Women Silver Ring,http://assets.myntassets.com/v1/images/style/p...,48927.jpg
736,49034,Women,Accessories,Jewellery,Ring,Silver,Summer,2012,Casual,Lucera Women Silver Ring,http://assets.myntassets.com/v1/images/style/p...,49034.jpg
1060,48929,Women,Accessories,Jewellery,Ring,Silver,Fall,2012,Casual,Lucera Women Silver Ring,http://assets.myntassets.com/v1/images/style/p...,48929.jpg
1094,49033,Women,Accessories,Jewellery,Ring,Silver,Summer,2012,Casual,Lucera Women Silver Ring,http://assets.myntassets.com/v1/images/style/p...,49033.jpg
...,...,...,...,...,...,...,...,...,...,...,...,...
966,26536,Men,Footwear,Shoes,Casual Shoes,White,Summer,2012,Casual,ID Men White Shoes,http://assets.myntassets.com/v1/images/style/p...,26536.jpg
40538,44767,Men,Footwear,Shoes,Casual Shoes,White,Summer,2012,Casual,Globalite Men White Eclipse Shoes,http://assets.myntassets.com/v1/images/style/p...,44767.jpg
602,10634,Men,Footwear,Shoes,Formal Shoes,Black,Winter,2012,Formal,Carlton London Men Black Oxford Wingtip Brogues,http://assets.myntassets.com/v1/images/style/p...,10634.jpg
8568,36481,Men,Footwear,Shoes,Casual Shoes,Black,Summer,2012,Casual,Spinn Men Black Elevate Shoes,http://assets.myntassets.com/v1/images/style/p...,36481.jpg


In [123]:
from sklearn.utils import shuffle
new_df = shuffle(result)
new_df = shuffle(new_df)

In [126]:
rest_new_df = new_df.reset_index()

In [127]:
rest_new_df.drop('index', axis=1, inplace=True)

In [131]:
import random
amount = []
for x in range(3216):
    price = random.randint(5, 40) * 5
    amount.append(float(price))

rest_new_df['price'] = amount

In [132]:
rest_new_df

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,image,filename,price
0,10060,Men,Apparel,Topwear,Tshirts,White,Fall,2011,Casual,Puma Men motor sport tee White Tshirts,http://assets.myntassets.com/v1/images/style/p...,10060.jpg,40.0
1,23766,Men,Accessories,Eyewear,Sunglasses,Black,Winter,2016,Casual,Oakley Men Black Fast Jacket Sunglasses,http://assets.myntassets.com/v1/images/style/p...,23766.jpg,135.0
2,37332,Women,Accessories,Watch,Watch,White,Winter,2016,Casual,DKNY Women White Dial Watch NY8482,http://assets.myntassets.com/v1/images/style/p...,37332.jpg,120.0
3,56464,Women,Personal Care,Nails,Nail Polish,Pink,Spring,2017,Casual,Streetwear Pink Petals Nail Polish 34,http://assets.myntassets.com/v1/images/style/p...,56464.jpg,145.0
4,13506,Men,Apparel,Topwear,Tshirts,White,Fall,2011,Casual,Chimp Men Color me Happy White Tshirts,http://assets.myntassets.com/v1/images/style/p...,13506.jpg,130.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3211,37329,Women,Accessories,Watch,Watch,White,Winter,2016,Casual,Casio Sheen Women White Dial Watch SX048,http://assets.myntassets.com/v1/images/style/p...,37329.jpg,130.0
3212,18581,Men,Accessories,Socks,Socks,Black,Summer,2011,Casual,Puma Men Formstripe Short Black Socks,http://assets.myntassets.com/v1/images/style/p...,18581.jpg,100.0
3213,55880,Women,Personal Care,Nails,Nail Polish,Silver,Spring,2017,Casual,Colorbar Exclusive Nail Lacquer 56,http://assets.myntassets.com/assets/images/558...,55880.jpg,165.0
3214,29433,Men,Accessories,Watch,Watch,White,Winter,2016,Casual,Giordano Men White Dial Watch,http://assets.myntassets.com/v1/images/style/p...,29433.jpg,180.0


In [133]:
rest_new_df.to_csv('products_data.csv', index=False)

In [27]:
df.groupby('subCategory')['subCategory'].count().nlargest(20) 

subCategory
Topwear                     15390
Shoes                        7324
Bags                         3053
Bottomwear                   2686
Watch                        2542
Innerwear                    1806
Jewellery                    1080
Eyewear                      1073
Fragrance                    1002
Sandal                        961
Wallets                       925
Flip Flops                    915
Belts                         811
Socks                         698
Dress                         478
Loungewear and Nightwear      464
Saree                         427
Lips                          425
Headwear                      293
Nails                         278
Name: subCategory, dtype: int64

In [38]:
df.groupby('articleType')['articleType'].count().nlargest(50) 

articleType
Tshirts                  7069
Shirts                   3214
Casual Shoes             2846
Watch                    2542
Sports Shoes             2016
Kurtas                   1844
Tops                     1762
Handbags                 1759
Heels                    1323
Sunglasses               1073
Wallets                   928
Flip Flops                916
Sandals                   895
Briefs                    847
Belts                     813
Backpacks                 724
Socks                     686
Formal Shoes              637
Perfume and Body Mist     604
Jeans                     603
Shorts                    547
Trousers                  530
Flats                     500
Bra                       477
Dresses                   464
Sarees                    427
Earrings                  417
Deodorant                 347
Track Pants               304
Clutches                  288
Sweatshirts               285
Caps                      283
Nail Polish               27

In [134]:
# Taking 5000 samples for now.
samples_df = rest_new_df

In [135]:
samples_df

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,image,filename,price
0,10060,Men,Apparel,Topwear,Tshirts,White,Fall,2011,Casual,Puma Men motor sport tee White Tshirts,http://assets.myntassets.com/v1/images/style/p...,10060.jpg,40.0
1,23766,Men,Accessories,Eyewear,Sunglasses,Black,Winter,2016,Casual,Oakley Men Black Fast Jacket Sunglasses,http://assets.myntassets.com/v1/images/style/p...,23766.jpg,135.0
2,37332,Women,Accessories,Watch,Watch,White,Winter,2016,Casual,DKNY Women White Dial Watch NY8482,http://assets.myntassets.com/v1/images/style/p...,37332.jpg,120.0
3,56464,Women,Personal Care,Nails,Nail Polish,Pink,Spring,2017,Casual,Streetwear Pink Petals Nail Polish 34,http://assets.myntassets.com/v1/images/style/p...,56464.jpg,145.0
4,13506,Men,Apparel,Topwear,Tshirts,White,Fall,2011,Casual,Chimp Men Color me Happy White Tshirts,http://assets.myntassets.com/v1/images/style/p...,13506.jpg,130.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3211,37329,Women,Accessories,Watch,Watch,White,Winter,2016,Casual,Casio Sheen Women White Dial Watch SX048,http://assets.myntassets.com/v1/images/style/p...,37329.jpg,130.0
3212,18581,Men,Accessories,Socks,Socks,Black,Summer,2011,Casual,Puma Men Formstripe Short Black Socks,http://assets.myntassets.com/v1/images/style/p...,18581.jpg,100.0
3213,55880,Women,Personal Care,Nails,Nail Polish,Silver,Spring,2017,Casual,Colorbar Exclusive Nail Lacquer 56,http://assets.myntassets.com/assets/images/558...,55880.jpg,165.0
3214,29433,Men,Accessories,Watch,Watch,White,Winter,2016,Casual,Giordano Men White Dial Watch,http://assets.myntassets.com/v1/images/style/p...,29433.jpg,180.0


In [5]:
import random
amount = []
for x in range(3183):
    price = random.randint(5, 40) * 5
    amount.append(float(price))

samples_df['price'] = amount

In [136]:
samples_df

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,image,filename,price
0,10060,Men,Apparel,Topwear,Tshirts,White,Fall,2011,Casual,Puma Men motor sport tee White Tshirts,http://assets.myntassets.com/v1/images/style/p...,10060.jpg,40.0
1,23766,Men,Accessories,Eyewear,Sunglasses,Black,Winter,2016,Casual,Oakley Men Black Fast Jacket Sunglasses,http://assets.myntassets.com/v1/images/style/p...,23766.jpg,135.0
2,37332,Women,Accessories,Watch,Watch,White,Winter,2016,Casual,DKNY Women White Dial Watch NY8482,http://assets.myntassets.com/v1/images/style/p...,37332.jpg,120.0
3,56464,Women,Personal Care,Nails,Nail Polish,Pink,Spring,2017,Casual,Streetwear Pink Petals Nail Polish 34,http://assets.myntassets.com/v1/images/style/p...,56464.jpg,145.0
4,13506,Men,Apparel,Topwear,Tshirts,White,Fall,2011,Casual,Chimp Men Color me Happy White Tshirts,http://assets.myntassets.com/v1/images/style/p...,13506.jpg,130.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3211,37329,Women,Accessories,Watch,Watch,White,Winter,2016,Casual,Casio Sheen Women White Dial Watch SX048,http://assets.myntassets.com/v1/images/style/p...,37329.jpg,130.0
3212,18581,Men,Accessories,Socks,Socks,Black,Summer,2011,Casual,Puma Men Formstripe Short Black Socks,http://assets.myntassets.com/v1/images/style/p...,18581.jpg,100.0
3213,55880,Women,Personal Care,Nails,Nail Polish,Silver,Spring,2017,Casual,Colorbar Exclusive Nail Lacquer 56,http://assets.myntassets.com/assets/images/558...,55880.jpg,165.0
3214,29433,Men,Accessories,Watch,Watch,White,Winter,2016,Casual,Giordano Men White Dial Watch,http://assets.myntassets.com/v1/images/style/p...,29433.jpg,180.0


In [9]:
from sklearn.utils import shuffle
samples_df = shuffle(samples_df)

In [7]:
samples_df.to_csv('products_data.csv', index=False)

In [11]:
samples_df[samples_df['articleType'] == 'Mufflers']

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,image,filename,price


In [10]:
samples_df['articleType'].values

array(['Jeans', 'Jeans', 'Sunglasses', ..., 'Tshirts', 'Ties', 'Watch'],
      dtype=object)

In [40]:
samples_df.groupby('subCategory')['subCategory'].count().nlargest(142) 

subCategory
Topwear                     1620
Shoes                        855
Bags                         387
Watch                        324
Bottomwear                   310
Innerwear                    227
Jewellery                    147
Fragrance                    129
Sandal                       114
Wallets                      109
Flip Flops                   108
Belts                        100
Eyewear                       98
Socks                         68
Lips                          65
Dress                         53
Loungewear and Nightwear      52
Headwear                      41
Saree                         34
Ties                          26
Makeup                        20
Nails                         18
Apparel Set                   17
Accessories                   15
Free Gifts                    14
Scarves                       13
Mufflers                       9
Skin                           9
Skin Care                      6
Shoe Accessories               

In [12]:
samples_df.to_csv('5000samples.csv')

In [137]:
# Taking the respective images of the products of 1000 samples
dir_src = r"D:\\data\\images"
dir_dst = r"images"
for root, _, files in os.walk(dir_src):
    for img in tqdm(files):
        if img in samples_df['filename'].values:
             copy(os.path.join(root, img), dir_dst)


100%|██████████| 44441/44441 [01:43<00:00, 429.96it/s] 


In [2]:
model = ResNet50(weights="imagenet", include_top=False, input_shape=(60,80,3))
model.trainable = False

model = tensorflow.keras.Sequential([
    model,
    GlobalMaxPooling2D()
])

In [3]:
model.save("product_model.h5")



  layer_config = serialize_layer_fn(layer)


In [139]:
def feature_extraction(img_path, model):
    img = image.load_img(img_path, target_size=(60,80))
    img_array = image.img_to_array(img)
    expanded_img_array = np.expand_dims(img_array, axis=0)
    preprocessed_img = preprocess_input(expanded_img_array)
    result = model.predict(preprocessed_img).flatten()
    normalized_result = result /  norm(result)

    return normalized_result

In [140]:
filenames = []

for file in os.listdir('images'):
    filenames.append(os.path.join('images', file))

feature_list = []

for file in tqdm(filenames):
    feature_list.append(feature_extraction(file, model))

 68%|██████▊   | 2195/3215 [03:53<01:48,  9.38it/s]


KeyboardInterrupt: 

In [14]:
joblib.dump(filenames, "file_name_1.pkl")

['file_name_1.pkl']

In [15]:
joblib.dump(feature_list, "image_embed_1.pkl")

['image_embed_1.pkl']

In [50]:
ds = np.array(joblib.load(open('image-embed.pkl', 'rb')))