## Creating dataframe for each class label of paths to 500 newly scraped images

In [1]:
import pandas as pd
import os
from glob import glob
import random

### Women's Kurtas

In [906]:
# Getting all the saved images of women's kurtas
# They are in folders that indicate from where they were scraped
# These are from Flipkart
files_fk = glob('data/external/newly_scraped_images/women_kurta/flipkart/*.jpeg')

In [907]:
len(files_fk)

528

In [908]:
# Getting all the saved images of women's kurtas from Utsav Fashion
files_uf = glob('data/external/newly_scraped_images/women_kurta/utsavfashion/*.jpg')

In [909]:
len(files_uf)

83

Want 500 files

In [910]:
# Making a list with all of the paths of images of women's kurtas
lst = files_uf + files_fk

In [911]:
# Putting the file paths into a dataframe
df_women_kurta = pd.DataFrame()

In [912]:
df_women_kurta['files'] = lst

In [913]:
df_women_kurta.head()

Unnamed: 0,files
0,women_kurta/utsavfashion/chikankari-cotton-sil...
1,women_kurta/utsavfashion/embroidered-art-silk-...
2,women_kurta/utsavfashion/embroidered-cotton-ku...
3,women_kurta/utsavfashion/solid-color-cotton-ku...
4,women_kurta/utsavfashion/block-printed-cotton-...


In [914]:
# Total number of images of women's kurtas
len(df_women_kurta)

611

In [915]:
# Making a list of random numbers with the same length as the dataframe.
r = list(random.sample(range(0, 5000), 611))

In [916]:
len(r)

611

In [917]:
# Adding the random numbers to the df
df_women_kurta['random_number'] = r

In [918]:
df_women_kurta.head()

Unnamed: 0,files,random_number
0,women_kurta/utsavfashion/chikankari-cotton-sil...,2208
1,women_kurta/utsavfashion/embroidered-art-silk-...,4290
2,women_kurta/utsavfashion/embroidered-cotton-ku...,2345
3,women_kurta/utsavfashion/solid-color-cotton-ku...,3084
4,women_kurta/utsavfashion/block-printed-cotton-...,4283


In [919]:
# Sorting the df based on the random numbers to randomize images
df_women_kurta = df_women_kurta.sort_values('random_number')

In [920]:
# Taking only the first 500 images
df_women_kurta = df_women_kurta.iloc[:500, :]

In [921]:
len(df_women_kurta)

500

In [922]:
df_women_kurta.reset_index(drop=True,inplace=True)
df_women_kurta.head()

Unnamed: 0,files,random_number
0,women_kurta/flipkart/6xl-479-sanwariya-creatio...,2
1,women_kurta/flipkart/xxl-ck-11-15-ketaki-fashi...,4
2,women_kurta/flipkart/xxl-47vs1058kr-vasant-app...,26
3,women_kurta/flipkart/s-sa19kr628r-surhi-origin...,33
4,women_kurta/utsavfashion/embroidered-cotton-sh...,34


In [923]:
df_women_kurta.to_csv('data/external/newly_scraped_images/df_women_kurta.csv')

### Sherwanis

In [924]:
# Getting all saved images of sherwanis from Flipkart
files_fk = glob('data/external/newly_scraped_images/sherwanis/flipkart/*.jpeg')

In [925]:
len(files_fk)

443

In [926]:
# Getting all saved images of sherwanis from Karmaplace
files_kp = glob('data/external/newly_scraped_images/sherwanis/karmaplace/*.jpg')

In [927]:
len(files_kp)

589

In [928]:
# Getting all saved images of sherwanis from Utsav Fashion
files_uf = glob('data/external/newly_scraped_images/sherwanis/utsavfashion/*.jpg')

In [929]:
len(files_uf)

114

Want 500 files

In [930]:
# Making combined list
lst = files_uf + files_kp + files_fk

In [931]:
# Putting list into df
df_sherwanis = pd.DataFrame()
df_sherwanis['files'] = lst
df_sherwanis.head()

Unnamed: 0,files
0,sherwanis/utsavfashion/embroidered-dupion-silk...
1,sherwanis/utsavfashion/embroidered-velvet-sher...
2,sherwanis/utsavfashion/brocade-sherwani-in-fuc...
3,sherwanis/utsavfashion/mgv144_2.jpg
4,sherwanis/utsavfashion/brocade-sherwani-in-lig...


In [932]:
# Total number of images
len(df_sherwanis)

1146

In [933]:
# Making list of random numbers
r = list(random.sample(range(0, 5000), 1146))

In [934]:
len(r)

1146

In [935]:
# Adding random numbers to df
df_sherwanis['random_number'] = r

In [936]:
df_sherwanis.head()

Unnamed: 0,files,random_number
0,sherwanis/utsavfashion/embroidered-dupion-silk...,2359
1,sherwanis/utsavfashion/embroidered-velvet-sher...,3100
2,sherwanis/utsavfashion/brocade-sherwani-in-fuc...,3797
3,sherwanis/utsavfashion/mgv144_2.jpg,3133
4,sherwanis/utsavfashion/brocade-sherwani-in-lig...,2438


In [937]:
# Sorting dataframe based on random numbers to randomize images
df_sherwanis = df_sherwanis.sort_values('random_number')

In [938]:
# Taking only first 500 images
df_sherwanis = df_sherwanis.iloc[:500, :]

In [939]:
len(df_sherwanis)

500

In [940]:
df_sherwanis.reset_index(drop=True, inplace=True)
df_sherwanis.head()

Unnamed: 0,files,random_number
0,sherwanis/karmaplace/VSHRMIW026CRnCDCRnMD002CR...,3
1,sherwanis/flipkart/m-instar-141-maroon-pldp-ta...,4
2,sherwanis/flipkart/xxl-aroras-the-complete-fas...,7
3,sherwanis/karmaplace/VASMIW001NBnKRG001nPRG-2_...,11
4,sherwanis/flipkart/m-crkurta1-peb-avaeta-origi...,16


In [941]:
# Saving df with file paths
df_sherwanis.to_csv('data/external/newly_scraped_images/df_sherwanis.csv')

### Sarees

In [942]:
# Getting all saved images of sarees from Flipkart
files_fk = glob('data/external/newly_scraped_images/saree/flipkart/*.jpeg')

In [943]:
len(files_fk)

574

In [944]:
# Getting all saved images of sarees from Utsav Fashion
files_uf = glob('data/external/newly_scraped_images/saree/utsavfashion/*.jpg')

In [945]:
len(files_uf)

294

Want 500 files

In [946]:
# Making combined list
lst = files_uf + files_fk

In [947]:
df_saree = pd.DataFrame()

In [948]:
df_saree['files'] = lst

In [949]:
df_saree.head()

Unnamed: 0,files
0,saree/utsavfashion/embroidered-satin-butterfly...
1,saree/utsavfashion/shibori-printed-organza-sar...
2,saree/utsavfashion/banarasi-saree-in-off-white...
3,saree/utsavfashion/kanchipuram-pure-silk-handl...
4,saree/utsavfashion/embroidered-net-scalloped-s...


In [950]:
len(df_saree)

868

In [951]:
# Making a list of random numbers with the same length as the dataframe.
r = list(random.sample(range(0, 5000), 868))

In [952]:
len(r)

868

In [953]:
# Adding the random numbers to the df
df_saree['random_number'] = r

In [954]:
df_saree.head()

Unnamed: 0,files,random_number
0,saree/utsavfashion/embroidered-satin-butterfly...,4670
1,saree/utsavfashion/shibori-printed-organza-sar...,1750
2,saree/utsavfashion/banarasi-saree-in-off-white...,2739
3,saree/utsavfashion/kanchipuram-pure-silk-handl...,2786
4,saree/utsavfashion/embroidered-net-scalloped-s...,2398


In [955]:
# Sorting the df based on the random numbers to randomize images
df_saree = df_saree.sort_values('random_number')

In [956]:
# Taking only first 500 images
df_saree = df_saree.iloc[:500, :]

In [957]:
len(df_saree)

500

In [958]:
df_saree.reset_index(drop=True, inplace=True)
df_saree.head()

Unnamed: 0,files,random_number
0,saree/utsavfashion/digital-printed-crepe-saree...,1
1,saree/flipkart/free-r-kaju-black-rosyqueen-uns...,6
2,saree/flipkart/free-new-launch-pencil-art-soft...,10
3,saree/flipkart/free-pink-jay-ramdev-fashion-un...,13
4,saree/flipkart/free-7129-pinkeve-unstitched-or...,17


In [959]:
df_saree.to_csv('df_saree.csv')

### Petticoats

In [960]:
# Getting all saved images of petticoats from Flipkart
files_fk = glob('data/external/newly_scraped_images/petticoats/flipkart/*.jpeg')

In [961]:
len(files_fk)

472

In [962]:
# Getting all saved images of petticoats from Utsav Fashion
files_uf = glob('data/external/newly_scraped_images/petticoats/utsavfashion/*.jpg')

In [963]:
len(files_uf)

84

Want 500 files

In [964]:
# Making combined list
lst = files_uf + files_fk

In [965]:
df_petticoats = pd.DataFrame()

In [966]:
df_petticoats['files'] = lst

In [967]:
df_petticoats.head()

Unnamed: 0,files
0,petticoats/utsavfashion/plain-combo-of-cotton-...
1,petticoats/utsavfashion/uub87.jpg
2,petticoats/utsavfashion/uub78.jpg
3,petticoats/utsavfashion/maternity-pre-stitched...
4,petticoats/utsavfashion/uub51.jpg


In [968]:
len(df_petticoats)

556

In [969]:
# Making a list of random numbers with the same length as the dataframe.
r = list(random.sample(range(0, 5000), 556))

In [970]:
len(r)

556

In [971]:
# Adding the random numbers to the df
df_petticoats['random_number'] = r

In [972]:
df_petticoats.head()

Unnamed: 0,files,random_number
0,petticoats/utsavfashion/plain-combo-of-cotton-...,4433
1,petticoats/utsavfashion/uub87.jpg,1980
2,petticoats/utsavfashion/uub78.jpg,3464
3,petticoats/utsavfashion/maternity-pre-stitched...,4094
4,petticoats/utsavfashion/uub51.jpg,318


In [973]:
# Sorting the df based on the random numbers to randomize images
df_petticoats = df_petticoats.sort_values('random_number')

In [974]:
# Taking only first 500 images
df_petticoats = df_petticoats.iloc[:500, :]

In [975]:
len(df_petticoats)

500

In [976]:
df_petticoats.reset_index(drop=True, inplace=True)
df_petticoats.head()

Unnamed: 0,files,random_number
0,petticoats/flipkart/medium-firozi-jamuni-navy-...,38
1,petticoats/flipkart/free-1-104-namra-tex-origi...,42
2,petticoats/utsavfashion/plain-combo-of-cotton-...,43
3,petticoats/flipkart/free-1-gd-1102-silver-gudd...,46
4,petticoats/utsavfashion/solid-color-lycra-cott...,50


In [977]:
df_petticoats.to_csv('data/external/newly_scraped_images/df_petticoats.csv')

### Palazzos

In [978]:
# Getting all saved images of palazzos from Flipkart
files_fk = glob('data/external/newly_scraped_images/palazzos/flipkart/*.jpeg')

In [979]:
len(files_fk)

462

In [980]:
# Getting all saved images of palazzos from Amazon.in
files_ai = glob('data/external/newly_scraped_images/palazzos/amazon.in/*.jpg')

In [981]:
len(files_ai)

248

In [982]:
# Getting all saved images of palazzos from Utsav Fashion
files_uf = glob('data/external/newly_scraped_images/palazzos/utsavfashion/*.jpg')

In [983]:
len(files_uf)

120

Want 500 files

In [984]:
# Making combined list
lst = files_uf + files_ai + files_fk

In [985]:
df_palazzos = pd.DataFrame()

In [986]:
df_palazzos['files'] = lst

In [987]:
df_palazzos.head()

Unnamed: 0,files
0,palazzos/utsavfashion/printed-rayon-kurta-with...
1,palazzos/utsavfashion/block-printed-cotton-pal...
2,palazzos/utsavfashion/foil-printed-art-silk-st...
3,palazzos/utsavfashion/thub12.jpg
4,palazzos/utsavfashion/embroidered-art-silk-kur...


In [988]:
len(df_palazzos)

830

In [989]:
# Making a list of random numbers with the same length as the dataframe.
r = list(random.sample(range(0, 5000), 830))

In [990]:
len(r)

830

In [991]:
# Adding the random numbers to the df
df_palazzos['random_number'] = r

In [992]:
df_palazzos.head()

Unnamed: 0,files,random_number
0,palazzos/utsavfashion/printed-rayon-kurta-with...,2350
1,palazzos/utsavfashion/block-printed-cotton-pal...,2339
2,palazzos/utsavfashion/foil-printed-art-silk-st...,501
3,palazzos/utsavfashion/thub12.jpg,1806
4,palazzos/utsavfashion/embroidered-art-silk-kur...,1188


In [993]:
# Sorting the df based on the random numbers to randomize images
df_palazzos = df_palazzos.sort_values('random_number')

In [994]:
# Taking only first 500 images
df_palazzos = df_palazzos.iloc[:500, :]

In [995]:
len(df_palazzos)

500

In [996]:
df_palazzos.reset_index(drop=True, inplace=True)
df_palazzos.head()

Unnamed: 0,files,random_number
0,palazzos/amazon.in/51A3+RBuc9L._AC_UL320_.jpg,7
1,"palazzos/amazon.in/61V9e2YxiWL._AC_SR180,120_Q...",16
2,palazzos/amazon.in/61BQmTBPlSS._AC_UL320_.jpg,18
3,palazzos/flipkart/32-el-2120-bottom-epilog-ori...,20
4,palazzos/amazon.in/71H2HQNGOXS._AC_UL320_.jpg,27


In [997]:
df_palazzos.to_csv('data/external/newly_scraped_images/df_palazzos.csv')

### Nehru Jackets

In [998]:
# Getting all saved images of Nehru jackets from Flipkart
files_fk = glob('data/external/newly_scraped_images/nehru_jackets/flipkart/*.jpeg')

In [999]:
len(files_fk)

540

In [1000]:
# Getting all saved images of Nehru jackets from Karmaplace
files_kp = glob('data/external/newly_scraped_images/nehru_jackets/karmaplace/*.jpg')

In [1001]:
len(files_kp)

20

In [1002]:
# Getting all saved images of Nehru jackets from Utsav Fashion
files_uf = glob('data/external/newly_scraped_images/nehru_jackets/utsavfashion/*.jpg')

In [1003]:
len(files_uf)

120

Want 500 files

In [1004]:
# Making combined list
lst = files_uf + files_kp + files_fk

In [1005]:
df_nehru_jackets = pd.DataFrame()

In [1006]:
df_nehru_jackets['files'] = lst

In [1007]:
df_nehru_jackets.head()

Unnamed: 0,files
0,nehru_jackets/utsavfashion/embroidered-georget...
1,nehru_jackets/utsavfashion/woven-art-silk-jacq...
2,nehru_jackets/utsavfashion/brocade-brocade-neh...
3,nehru_jackets/utsavfashion/digital-printed-cot...
4,nehru_jackets/utsavfashion/embroidered-art-sil...


In [1008]:
len(df_nehru_jackets)

680

In [1009]:
# Making a list of random numbers with the same length as the dataframe.
r = list(random.sample(range(0, 5000), 680))

In [1010]:
len(r)

680

In [None]:
# Adding the random numbers to the df
df_nehru_jackets['random_number'] = r

In [1012]:
df_nehru_jackets.head()

Unnamed: 0,files,random_number
0,nehru_jackets/utsavfashion/embroidered-georget...,1730
1,nehru_jackets/utsavfashion/woven-art-silk-jacq...,3235
2,nehru_jackets/utsavfashion/brocade-brocade-neh...,1384
3,nehru_jackets/utsavfashion/digital-printed-cot...,4609
4,nehru_jackets/utsavfashion/embroidered-art-sil...,3255


In [1013]:
# Sorting the df based on the random numbers to randomize images
df_nehru_jackets = df_nehru_jackets.sort_values('random_number')

In [1014]:
# Taking only first 500 images
df_nehru_jackets = df_nehru_jackets.iloc[:500, :]

In [1015]:
len(df_nehru_jackets)

500

In [1016]:
df_nehru_jackets.reset_index(drop=True, inplace=True)
df_nehru_jackets.head()

Unnamed: 0,files,random_number
0,nehru_jackets/flipkart/48-nehru-jacket-badoliy...,17
1,nehru_jackets/flipkart/s-no-jkt-201-cream-male...,18
2,nehru_jackets/flipkart/xxl-1-no-cmob00969-o6-c...,34
3,nehru_jackets/flipkart/s-cream-beige-modi-999-...,41
4,nehru_jackets/flipkart/xl-no-wc-jsl-148-silver...,61


In [1017]:
df_nehru_jackets.to_csv('data/external/newly_scraped_images/df_nehru_jackets.csv')

### Women's Mojaris

In [1018]:
# Getting all saved images of women's mojaris from Flipkart
files_fk = glob('data/external/newly_scraped_images/mojaris_women/flipkart/*.jpeg')

In [1019]:
len(files_fk)

475

In [1020]:
# Getting all saved images of women's mojaris from Amazon.in
files_ai = glob('mojaris_women/amazon.in/*.jpg')

In [1021]:
len(files_ai)

136

In [1022]:
# Getting all saved images of women's mojaris from Mojari
files_mi = glob('data/external/newly_scraped_images/mojaris_women/mojari/*.jpg')

Want 500 files

In [1023]:
# Making combined list
lst = files_fk + files_ai + files_mi

In [1024]:
df_mojaris_women = pd.DataFrame()

In [1025]:
df_mojaris_women['files'] = lst

In [1026]:
df_mojaris_women.head()

Unnamed: 0,files
0,mojaris_women/flipkart/dd015-35-the-desi-dulha...
1,mojaris_women/flipkart/4-ljt54-4-sdshopping-cr...
2,mojaris_women/flipkart/6-makkhi-k-white-aissay...
3,mojaris_women/flipkart/8-dd680-41-the-desi-dul...
4,mojaris_women/flipkart/dd111-37-the-desi-dulha...


In [1027]:
len(df_mojaris_women)

946

In [1028]:
# Making a list of random numbers with the same length as the dataframe.
r = list(random.sample(range(0, 5000), 946))

In [1029]:
len(r)

946

In [1030]:
# Adding the random numbers to the df
df_mojaris_women['random_number'] = r

In [1031]:
df_mojaris_women.head()

Unnamed: 0,files,random_number
0,mojaris_women/flipkart/dd015-35-the-desi-dulha...,3161
1,mojaris_women/flipkart/4-ljt54-4-sdshopping-cr...,1604
2,mojaris_women/flipkart/6-makkhi-k-white-aissay...,4934
3,mojaris_women/flipkart/8-dd680-41-the-desi-dul...,2620
4,mojaris_women/flipkart/dd111-37-the-desi-dulha...,74


In [1032]:
# Sorting the df based on the random numbers to randomize images
df_mojaris_women = df_mojaris_women.sort_values('random_number')

In [1033]:
# Taking only first 500 images
df_mojaris_women = df_mojaris_women.iloc[:500, :]

In [1034]:
len(df_mojaris_women)

500

In [1035]:
df_mojaris_women.reset_index(drop=True, inplace=True)
df_mojaris_women.head()

Unnamed: 0,files,random_number
0,mojaris_women/mojari/SS-1320Maroon_jpg_533x.jpg,10
1,mojaris_women/amazon.in/612Ar2Ct9-L._AC_UL320_...,13
2,mojaris_women/mojari/SS-2035Maroon_jpg_533x.jpg,14
3,mojaris_women/flipkart/8-indian-handicraft-syn...,26
4,mojaris_women/flipkart/5-dc2007-38-desicolour-...,29


In [1036]:
df_mojaris_women.to_csv('data/external/newly_scraped_images/df_mojaris_women.csv')

### Men's Mojaris

In [1037]:
# Getting all saved images of men's mojaris from Flipkart
files_fk = glob('data/external/newly_scraped_images/mojaris_men/flipkart/*.jpeg')

In [1038]:
len(files_fk)

491

In [1039]:
# Getting all saved images of men's mojaris from Amazon.in
files_ai = glob('data/external/newly_scraped_images/mojaris_men/amazon.in/*.jpg')

In [1040]:
len(files_ai)

203

In [1041]:
# Getting all saved images of men's mojaris from Rajwadi
files_ri = glob('data/external/newly_scraped_images/mojaris_men/rajwadi/*.jpg')

Want 500 files

In [1042]:
# Making combined list
lst = files_ai + files_fk + files_ri

In [1043]:
df_mojaris_men = pd.DataFrame()

In [1044]:
df_mojaris_men['files'] = lst

In [1045]:
df_mojaris_men.head()

Unnamed: 0,files
0,"mojaris_men/amazon.in/61GVo6+OdoL._AC_UF264,32..."
1,"mojaris_men/amazon.in/61z0M0UDG0L._AC_UF264,32..."
2,"mojaris_men/amazon.in/711V72jOSZL._AC_UF264,32..."
3,mojaris_men/amazon.in/41I5gvBh7-L._AC_UL320_.jpg
4,"mojaris_men/amazon.in/61KOjyXUqdL._AC_SR160,13..."


In [1046]:
len(df_mojaris_men)

725

In [1047]:
# Making a list of random numbers with the same length as the dataframe
r = list(random.sample(range(0, 5000), 725))

In [1048]:
len(r)

725

In [1049]:
# Adding the random numbers to the df
df_mojaris_men['random_number'] = r

In [1050]:
df_mojaris_men.head()

Unnamed: 0,files,random_number
0,"mojaris_men/amazon.in/61GVo6+OdoL._AC_UF264,32...",2483
1,"mojaris_men/amazon.in/61z0M0UDG0L._AC_UF264,32...",1388
2,"mojaris_men/amazon.in/711V72jOSZL._AC_UF264,32...",4015
3,mojaris_men/amazon.in/41I5gvBh7-L._AC_UL320_.jpg,2758
4,"mojaris_men/amazon.in/61KOjyXUqdL._AC_SR160,13...",3745


In [1051]:
# Sorting the df based on the random numbers to randomize images
df_mojaris_men = df_mojaris_men.sort_values('random_number')

In [1052]:
# Taking only first 500 images
df_mojaris_men = df_mojaris_men.iloc[:500, :]

In [1053]:
df_mojaris_men.reset_index(drop=True, inplace=True)
df_mojaris_men.head()

Unnamed: 0,files,random_number
0,mojaris_men/flipkart/11-plain-j7-maroon-royal-...,3
1,mojaris_men/amazon.in/51cJzrSMKaL._AC_UL320_.jpg,7
2,mojaris_men/flipkart/10-anmpvl-007-brown-10-an...,10
3,mojaris_men/rajwadi/brocade-silk-groom-wear-mo...,16
4,mojaris_men/flipkart/550yuv-8-yuvrato-baxi-red...,33


In [1054]:
df_mojaris_men.to_csv('data/external/newly_scraped_images/df_mojaris_men.csv')

### Men's Kurtas

In [1055]:
# Getting all saved images of men's kurtas from Flipkart
files_fk = glob('data/external/newly_scraped_images/kurta_men/flipkart/*.jpeg')

In [1056]:
len(files_fk)

542

In [1057]:
# Getting all saved images of men's kurtas from Karmaplace
files_kp = glob('data/external/newly_scraped_images/kurta_men/karmaplace/*.jpg')

In [1058]:
len(files_kp)

534

Want 500 files

In [1059]:
# Making combined list
lst = files_kp + files_fk

In [1060]:
df_kurta_men = pd.DataFrame()

In [1061]:
df_kurta_men['files'] = lst

In [1062]:
df_kurta_men.head()

Unnamed: 0,files
0,kurta_men/karmaplace/VPLUSMKPV001GN-1_512x769.jpg
1,kurta_men/karmaplace/LK-STR1215-BKC-1_512x769.jpg
2,kurta_men/karmaplace/LK-MMS755-MR0C_512x769.jpg
3,kurta_men/karmaplace/1_6d1c15cf-20e6-46fb-9990...
4,kurta_men/karmaplace/VSHRMK136GY-3_512x769.jpg


In [1063]:
len(df_kurta_men)

1076

In [1064]:
# Making a list of random numbers with the same length as the dataframe.
r = list(random.sample(range(0, 5000), 1076))

In [1065]:
len(r)

1076

In [1066]:
# Adding the random numbers to the df
df_kurta_men['random_number'] = r

In [1067]:
df_kurta_men.head()

Unnamed: 0,files,random_number
0,kurta_men/karmaplace/VPLUSMKPV001GN-1_512x769.jpg,4544
1,kurta_men/karmaplace/LK-STR1215-BKC-1_512x769.jpg,2362
2,kurta_men/karmaplace/LK-MMS755-MR0C_512x769.jpg,4470
3,kurta_men/karmaplace/1_6d1c15cf-20e6-46fb-9990...,1432
4,kurta_men/karmaplace/VSHRMK136GY-3_512x769.jpg,3906


In [1068]:
# Sorting the df based on the random numbers to randomize images
df_kurta_men = df_kurta_men.sort_values('random_number')

In [1069]:
# Taking only first 500 images
df_kurta_men = df_kurta_men.iloc[:500, :]

In [1070]:
len(df_kurta_men)

500

In [1071]:
df_kurta_men.reset_index(drop=True, inplace=True)
df_kurta_men.head()

Unnamed: 0,files,random_number
0,kurta_men/karmaplace/VPLUSMK007BUnPCWH-2_512x7...,0
1,kurta_men/karmaplace/VASMK007PN_512x769.jpg,4
2,kurta_men/karmaplace/LPP-MMS1101-BKC-2_512x769...,7
3,kurta_men/flipkart/s-ic-shk4f-indiclub-origina...,10
4,kurta_men/flipkart/s-rblue-haldi-latest-one-ti...,14


In [1072]:
df_kurta_men.to_csv('data/external/newly_scraped_images/df_kurta_men.csv')

### Lehenga

In [1073]:
# Getting all saved images of lehenga from Flipkart
files_fk = glob('data/external/newly_scraped_images/lehenga/flipkart/*.jpeg')

In [1074]:
len(files_fk)

557

In [1075]:
# Getting all saved images of lehenga from Utsav Fashion
files_uf = glob('data/external/newly_scraped_images/lehenga/utsavfashion/*.jpg')

In [1076]:
len(files_uf)

292

Want 500 files

In [1077]:
# Making combined list
lst = files_fk + files_uf

In [1078]:
df_lehenga = pd.DataFrame()

In [1079]:
df_lehenga['files'] = lst

In [1080]:
df_lehenga.head()

Unnamed: 0,files
0,lehenga/flipkart/free-na-kali-sarkari-dishas-o...
1,lehenga/flipkart/free-3-4-sleeve-riya-yellow-d...
2,lehenga/flipkart/free-sleeveless-aaliya-skytar...
3,lehenga/flipkart/free-full-sleeve-d-w-g-dijitr...
4,lehenga/flipkart/free-na-5005-xaycra-original-...


In [1081]:
len(df_lehenga)

849

In [1082]:
# Making a list of random numbers with the same length as the dataframe.
r = list(random.sample(range(0, 5000), 849))

In [1083]:
len(r)

849

In [3]:
# Adding the random numbers to the df
df_lehenga['random_number'] = r

In [1085]:
df_lehenga.head()

Unnamed: 0,files,random_number
0,lehenga/flipkart/free-na-kali-sarkari-dishas-o...,4951
1,lehenga/flipkart/free-3-4-sleeve-riya-yellow-d...,940
2,lehenga/flipkart/free-sleeveless-aaliya-skytar...,4689
3,lehenga/flipkart/free-full-sleeve-d-w-g-dijitr...,3996
4,lehenga/flipkart/free-na-5005-xaycra-original-...,1275


In [1086]:
# Sorting the df based on the random numbers to randomize images
df_lehenga = df_lehenga.sort_values('random_number')

In [1087]:
# Taking only first 500 images
df_lehenga = df_lehenga.iloc[:500, :]

In [1088]:
len(df_lehenga)

500

In [1089]:
df_lehenga.reset_index(drop=True, inplace=True)
df_lehenga.head()

Unnamed: 0,files,random_number
0,lehenga/utsavfashion/embroidered-georgette-leh...,13
1,lehenga/flipkart/free-half-sleeve-latest-royal...,15
2,lehenga/utsavfashion/embroidered-georgette-leh...,17
3,lehenga/flipkart/free-sleeveless-new-designer-...,20
4,lehenga/utsavfashion/digital-printed-organza-l...,21


In [1090]:
df_lehenga.to_csv('data/external/newly_scraped_images/df_lehenga.csv')

### Gowns

In [1091]:
# Getting all saved images of gowns from Flipkart
files_fk = glob('data/external/newly_scraped_images/gowns/flipkart/*.jpeg')

In [1092]:
len(files_fk)

643

In [1093]:
# Getting all saved images of gowns from Karmaplace
files_kp = glob('data/external/newly_scraped_images/gowns/karmaplace/*.jpg')

In [1094]:
len(files_kp)

21

In [1095]:
# Getting all saved images of gowns from Utsav Fashion
files_uf = glob('data/external/newly_scraped_images/gowns/utsavfashion/*.jpg')

In [1096]:
len(files_uf)

60

In [1097]:
# Getting all saved images of gowns from Amazon.in
files_ai = glob('data/external/newly_scraped_images/gowns/amazon.in/*.jpg')

In [1098]:
len(files_ai)

51

Want 500 files

In [1099]:
# Making combined list
lst = files_kp + files_fk + files_ai + files_uf

In [1100]:
df_gowns = pd.DataFrame()

In [1101]:
df_gowns['files'] = lst

In [1102]:
df_gowns.head()

Unnamed: 0,files
0,gowns/karmaplace/ACU6105_512x769.jpg
1,gowns/karmaplace/ACU6959-1_512x769.jpg
2,gowns/karmaplace/ACT4710._512x769.jpg
3,gowns/karmaplace/ACU6960-1_512x769.jpg
4,gowns/karmaplace/AS8318-C-1_512x769.jpg


In [1103]:
len(df_gowns)

775

In [1104]:
# Making a list of random numbers with the same length as the dataframe.
r = list(random.sample(range(0, 5000), 775))

In [1105]:
len(r)

775

In [1106]:
# Adding the random numbers to the df
df_gowns['random_number'] = r

In [1107]:
df_gowns.head()

Unnamed: 0,files,random_number
0,gowns/karmaplace/ACU6105_512x769.jpg,2018
1,gowns/karmaplace/ACU6959-1_512x769.jpg,1769
2,gowns/karmaplace/ACT4710._512x769.jpg,4911
3,gowns/karmaplace/ACU6960-1_512x769.jpg,4159
4,gowns/karmaplace/AS8318-C-1_512x769.jpg,3031


In [1108]:
# Sorting the df based on the random numbers to randomize images
df_gowns = df_gowns.sort_values('random_number')

In [1109]:
# Taking only first 500 images
df_gowns = df_gowns.iloc[:500, :]

In [1110]:
len(df_gowns)

500

In [1111]:
df_gowns.reset_index(drop=True, inplace=True)
df_gowns.head()

Unnamed: 0,files,random_number
0,gowns/flipkart/na-free-full-sleeve-semi-stitch...,0
1,gowns/flipkart/na-xxl-full-sleeve-stitched-fir...,6
2,gowns/flipkart/na-free-full-sleeve-semi-stitch...,8
3,gowns/flipkart/free-full-sleeve-designer-lehen...,21
4,gowns/flipkart/na-free-3-4-sleeve-semi-stitche...,22


In [1112]:
df_gowns.to_csv('data/external/newly_scraped_images/df_gowns.csv')

### Dupattas

In [1113]:
# Getting all saved images of dupattas from Amazon.in
files_ai = glob('data/external/newly_scraped_images/dupattas/amazon.in/*.jpg')

In [1114]:
len(files_ai)

159

In [1115]:
# Getting all saved images of dupattas from Karmaplace
files_kp = glob('data/external/newly_scraped_images/dupattas/karmaplace/*.jpg')

In [1116]:
len(files_kp)

763

In [1117]:
# Getting all saved images of dupattas from Utsav Fashion
files_uf = glob('data/external/newly_scraped_images/dupattas/utsavfashion/*.jpg')

In [1118]:
len(files_uf)

120

Want 500 files

In [1119]:
# Making combined list
lst = files_kp + files_ai + files_uf

In [1120]:
df_dupattas = pd.DataFrame()

In [1121]:
df_dupattas['files'] = lst

In [1122]:
df_dupattas.head()

Unnamed: 0,files
0,dupattas/karmaplace/N2Zdk0632-S_512x769.jpg
1,dupattas/karmaplace/1405-112RAM_512x769.jpg
2,dupattas/karmaplace/1404-111NBLU_512x769.jpg
3,dupattas/karmaplace/PHT0066-2_512x769.jpg
4,dupattas/karmaplace/N2ZHKS0197S_b77a74d7-3ee9-...


In [1123]:
len(df_dupattas)

1042

In [1124]:
# Making a list of random numbers with the same length as the dataframe.
r = list(random.sample(range(0, 5000), 1042))

In [1125]:
len(r)

1042

In [1126]:
# Adding the random numbers to the df
df_dupattas['random_number'] = r

In [1127]:
df_dupattas.head()

Unnamed: 0,files,random_number
0,dupattas/karmaplace/N2Zdk0632-S_512x769.jpg,1146
1,dupattas/karmaplace/1405-112RAM_512x769.jpg,3894
2,dupattas/karmaplace/1404-111NBLU_512x769.jpg,2899
3,dupattas/karmaplace/PHT0066-2_512x769.jpg,2128
4,dupattas/karmaplace/N2ZHKS0197S_b77a74d7-3ee9-...,3490


In [1128]:
# Sorting the df based on the random numbers to randomize images
df_dupattas = df_dupattas.sort_values('random_number')

In [1129]:
# Taking only first 500 images
df_dupattas = df_dupattas.iloc[:500, :]

In [1130]:
len(df_dupattas)

500

In [1131]:
df_dupattas.reset_index(drop=True, inplace=True)
df_dupattas.head()

Unnamed: 0,files,random_number
0,dupattas/amazon.in/7136CasC1fL._AC_UL320_.jpg,2
1,dupattas/karmaplace/N2ZKDBBB028_3-4Y_7f9d9fe4-...,8
2,dupattas/karmaplace/PHT0003-1_512x769.jpg,17
3,dupattas/karmaplace/AHKUDUSH-COMBO-472-FF-M_af...,19
4,dupattas/utsavfashion/bnj266.jpg,46


In [1132]:
df_dupattas.to_csv('data/external/newly_scraped_images/df_dupattas.csv')

### Dhoti Pants

In [1192]:
# Getting all saved images of dhoti pants from Flipkart
files_fk = glob('data/external/newly_scraped_images/dhoti_pants/flipkart/*.jpeg')

In [1193]:
len(files_fk)

608

In [1194]:
# Getting all saved images of dhoti pants from Karmaplace
files_kp = glob('data/external/newly_scraped_images/dhoti_pants/karmaplace/*.jpg')

In [1195]:
len(files_kp)

302

In [1196]:
# Getting all saved images of dhoti pants from Utsav Fashion
files_uf = glob('data/external/newly_scraped_images/dhoti_pants/utsavfashion/*.jpg')

In [1197]:
len(files_uf)

114

Want 500 files

In [1198]:
# Making combined list
lst = files_kp + files_fk + files_uf

In [1199]:
df_dhoti_pants = pd.DataFrame()

In [1200]:
df_dhoti_pants['files'] = lst

In [1201]:
df_dhoti_pants.head()

Unnamed: 0,files
0,dhoti_pants/karmaplace/VASMDTWH_1_512x776.jpg
1,dhoti_pants/karmaplace/VASMCDWH_512x776.jpg
2,dhoti_pants/karmaplace/AHKUDO-Combo-128-2_512x...
3,dhoti_pants/karmaplace/VASMCD007WN-1_512x769.jpg
4,dhoti_pants/karmaplace/VASMKWH001nDTBU_1_512x7...


In [1202]:
len(df_dhoti_pants)

1024

In [1203]:
# Making a list of random numbers with the same length as the dataframe.
r = list(random.sample(range(0, 5000), 1024))

In [1204]:
len(r)

1024

In [1205]:
# Adding the random numbers to the df
df_dhoti_pants['random_number'] = r

In [1206]:
df_dhoti_pants.head()

Unnamed: 0,files,random_number
0,dhoti_pants/karmaplace/VASMDTWH_1_512x776.jpg,1027
1,dhoti_pants/karmaplace/VASMCDWH_512x776.jpg,4335
2,dhoti_pants/karmaplace/AHKUDO-Combo-128-2_512x...,4310
3,dhoti_pants/karmaplace/VASMCD007WN-1_512x769.jpg,2887
4,dhoti_pants/karmaplace/VASMKWH001nDTBU_1_512x7...,2184


In [1207]:
# Sorting the df based on the random numbers to randomize images
df_dhoti_pants = df_dhoti_pants.sort_values('random_number')

In [1208]:
# Taking only first 500 images
df_dhoti_pants = df_dhoti_pants.iloc[:500, :]

In [1209]:
len(df_dhoti_pants)

500

In [1210]:
df_dhoti_pants.reset_index(drop=True, inplace=True)
df_dhoti_pants.head()

Unnamed: 0,files,random_number
0,dhoti_pants/flipkart/free-1-hp49-icare-origina...,1
1,dhoti_pants/karmaplace/VASMCDCR-2_512x769.jpg,2
2,dhoti_pants/flipkart/free-fpi-india-195-fashio...,8
3,dhoti_pants/flipkart/free-india-2002-whitewhal...,10
4,dhoti_pants/utsavfashion/utsavfashionembroider...,18


In [1211]:
df_dhoti_pants.to_csv('data/external/newly_scraped_images/df_dhoti_pants.csv')

### Blouses

In [17]:
# Getting all saved images of blouses from Flipkart
files_fk = glob('data/external/newly_scraped_images/blouse/flipkart/*.jpg')

In [18]:
len(files_fk)

387

In [19]:
# Getting all saved images of blouses from more pages from Flipkart
files_fk1 = glob('data/external/newly_scraped_images/blouse/flipkart/*.jpeg')

In [20]:
len(files_fk1)

223

In [21]:
# Getting all saved images of blouses from Amazon.in
files_ai = glob('data/external/newly_scraped_images/blouse/amazon.in/*.jpg')

In [22]:
len(files_ai)

157

In [23]:
# Getting all saved images of blouses from Utsav Fashion
files_uf = glob('data/external/newly_scraped_images/blouse/utsavfashion/*.jpg')

In [24]:
len(files_uf)

25

Want 500 files

In [25]:
# Making combined list
lst = files_fk + files_fk1 + files_uf + files_ai

In [26]:
df_blouse = pd.DataFrame()

In [27]:
df_blouse['files'] = lst

In [28]:
df_blouse.head()

Unnamed: 0,files
0,data/external/newly_scraped_images/blouse/flip...
1,data/external/newly_scraped_images/blouse/flip...
2,data/external/newly_scraped_images/blouse/flip...
3,data/external/newly_scraped_images/blouse/flip...
4,data/external/newly_scraped_images/blouse/flip...


In [29]:
len(df_blouse)

792

In [30]:
# Making a list of random numbers with the same length as the dataframe.
r = list(random.sample(range(0, 5000), 792))

In [31]:
len(r)

792

In [32]:
# Adding the random numbers to the df
df_blouse['random_number'] = r

In [33]:
df_blouse.head()

Unnamed: 0,files,random_number
0,data/external/newly_scraped_images/blouse/flip...,4533
1,data/external/newly_scraped_images/blouse/flip...,1756
2,data/external/newly_scraped_images/blouse/flip...,3998
3,data/external/newly_scraped_images/blouse/flip...,2378
4,data/external/newly_scraped_images/blouse/flip...,2702


In [34]:
# Sorting the df based on the random numbers to randomize images
df_blouse = df_blouse.sort_values('random_number')

In [35]:
# Taking only first 500 images
df_blouse = df_blouse.iloc[:500, :]

In [36]:
len(df_blouse)

500

In [37]:
df_blouse.reset_index(drop=True, inplace=True)
df_blouse.head()

Unnamed: 0,files,random_number
0,data/external/newly_scraped_images/blouse/amaz...,2
1,data/external/newly_scraped_images/blouse/flip...,11
2,data/external/newly_scraped_images/blouse/flip...,30
3,data/external/newly_scraped_images/blouse/flip...,38
4,data/external/newly_scraped_images/blouse/flip...,48


In [39]:
df_blouse.to_csv('data/processed/df_blouse.csv')

### Leggings and Salwars

In [1167]:
# Getting all saved images of salwars from Amazon.in
files_ai = glob('leggings_and_salwars/amazon.in_salwars/*.jpg')

In [1168]:
len(files_ai)

237

In [1169]:
# Getting all saved images of leggings from Flipkart
files_fk = glob('leggings_and_salwars/flipkart/*.jpeg')

In [1170]:
len(files_fk)

482

In [1171]:
# Getting all saved images of salvars from Flipkart
files_fkw = glob('leggings_and_salwars/flipkart_salwars/*.jpeg')

In [1172]:
len(files_fkw)

420

In [1173]:
# Getting all saved images of leggings from Utsav Fashion
files_uf = glob('leggings_and_salwars/utsav_leggings/*.jpg')

In [1174]:
len(files_uf)

18

Want 500 files

In [1175]:
# Making combined list
lst = files_ai + files_fk + files_uf + files_fkw

In [1176]:
df_leggings_and_salwars = pd.DataFrame()

In [1177]:
df_leggings_and_salwars['files'] = lst

In [1178]:
df_leggings_and_salwars.head()

Unnamed: 0,files
0,leggings_and_salwars/amazon.in_salwars/61f7bHk...
1,leggings_and_salwars/amazon.in_salwars/41uqpW8...
2,leggings_and_salwars/amazon.in_salwars/61nnI4m...
3,leggings_and_salwars/amazon.in_salwars/71LBFRQ...
4,leggings_and_salwars/amazon.in_salwars/617CAyn...


In [1179]:
len(df_leggings_and_salwars)

1157

In [1183]:
# Making a list of random numbers with the same length as the dataframe.
r = list(random.sample(range(0, 5000), 1157))

In [1184]:
len(r)

1157

In [1185]:
# Adding the random numbers to the df
df_leggings_and_salwars['random_number'] = r

In [1186]:
df_leggings_and_salwars.head()

Unnamed: 0,files,random_number
0,leggings_and_salwars/amazon.in_salwars/61f7bHk...,81
1,leggings_and_salwars/amazon.in_salwars/41uqpW8...,3713
2,leggings_and_salwars/amazon.in_salwars/61nnI4m...,4307
3,leggings_and_salwars/amazon.in_salwars/71LBFRQ...,2074
4,leggings_and_salwars/amazon.in_salwars/617CAyn...,1015


In [1187]:
# Sorting the df based on the random numbers to randomize images
df_leggings_and_salwars = df_leggings_and_salwars.sort_values('random_number')

In [1188]:
# Taking only first 500 images
df_leggings_and_salwars = df_leggings_and_salwars.iloc[:500, :]

In [1189]:
len(df_leggings_and_salwars)

500

In [1190]:
df_leggings_and_salwars.reset_index(drop=True, inplace=True)
df_leggings_and_salwars.head()

Unnamed: 0,files,random_number
0,leggings_and_salwars/flipkart_salwars/free-sbp...,3
1,leggings_and_salwars/flipkart_salwars/yes-2-m-...,8
2,leggings_and_salwars/flipkart/s-ajwa-18-ajwa-o...,16
3,leggings_and_salwars/flipkart_salwars/54-pat-b...,27
4,leggings_and_salwars/flipkart/3xl-pt-brown-ali...,33


In [1191]:
df_leggings_and_salwars.to_csv('df_leggings_and_salwars.csv')