# This notebook looks at the validation data product labels to determine what class labels are present.

In [182]:
import pandas as pd
import numpy as np
import re
import seaborn as sns
import matplotlib.pyplot as plt

In [183]:
df = pd.read_json('/Users/butler/Documents/Image-Classification-Indian-Clothing/data/raw/val_data.json', lines=True)

In [184]:
df.head()

Unnamed: 0,image_url,image_path,brand,product_title,class_label,color
0,https://m.media-amazon.com/images/I/81LOPbFPiQ...,images/val/0.jpeg,Generic,Women's Khadi Cotton Saree With Blouse Piece (...,saree,
1,https://m.media-amazon.com/images/I/81Q8Oktw4s...,images/val/1.jpeg,Yashvi Designer,Women's Net Saree With Unstitched Blouse Piece,saree,
2,https://m.media-amazon.com/images/I/613S1YGCmo...,images/val/2.jpeg,Aarrah,Georgette Strip Print Saree[S_SHIKHA30021SR02_...,saree,
3,https://m.media-amazon.com/images/I/819budhQl1...,images/val/3.jpeg,MIMOSA,Women's Patola Style Art Silk Saree (Green),saree,
4,https://m.media-amazon.com/images/I/61cAZ94ZQV...,images/val/4.jpeg,Generic,Women's Pure Hand Block Patola Printed Cotton ...,saree,


In [185]:
df.tail()

Unnamed: 0,image_url,image_path,brand,product_title,class_label,color
7495,https://m.media-amazon.com/images/I/614GvuaIJu...,images/val/7495.jpeg,ishin,Women's Rayon Pink Printed A-Line Kurta Palazz...,women_kurta,
7496,https://m.media-amazon.com/images/I/81WAbf4-ao...,images/val/7496.jpeg,Cotton Culture,Women's Kesar Pink Cotton Straight Kurta,women_kurta,
7497,https://m.media-amazon.com/images/I/61unbmOA7k...,images/val/7497.jpeg,Bae's Wardrobe,Printed Rayon Anarkali Kurti with Plazzo Pant ...,women_kurta,
7498,https://m.media-amazon.com/images/I/71zULAnVNY...,images/val/7498.jpeg,Aarika,Girl's Regular Dress,women_kurta,
7499,https://m.media-amazon.com/images/I/71mhB7xFHt...,images/val/7499.jpeg,Aurelia,Women's Straight Kurta,women_kurta,


In [186]:
df.shape

(7500, 6)

In [187]:
df.class_label.unique()

array(['saree', 'blouse', 'dhoti_pants', 'dupattas', 'gowns', 'kurta_men',
       'leggings_and_salwars', 'lehenga', 'mojaris_men', 'mojaris_women',
       'nehru_jackets', 'palazzos', 'petticoats', 'sherwanis',
       'women_kurta'], dtype=object)

In [188]:
df_revised = df.copy()

In [189]:
df_revised.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7500 entries, 0 to 7499
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   image_url      7500 non-null   object
 1   image_path     7500 non-null   object
 2   brand          7500 non-null   object
 3   product_title  7500 non-null   object
 4   class_label    7500 non-null   object
 5   color          947 non-null    object
dtypes: object(6)
memory usage: 351.7+ KB


In [190]:
# Resetting index in case the original indices are needed
df_revised = df_revised.reset_index(names='number')

In [191]:
df_revised.head()

Unnamed: 0,number,image_url,image_path,brand,product_title,class_label,color
0,0,https://m.media-amazon.com/images/I/81LOPbFPiQ...,images/val/0.jpeg,Generic,Women's Khadi Cotton Saree With Blouse Piece (...,saree,
1,1,https://m.media-amazon.com/images/I/81Q8Oktw4s...,images/val/1.jpeg,Yashvi Designer,Women's Net Saree With Unstitched Blouse Piece,saree,
2,2,https://m.media-amazon.com/images/I/613S1YGCmo...,images/val/2.jpeg,Aarrah,Georgette Strip Print Saree[S_SHIKHA30021SR02_...,saree,
3,3,https://m.media-amazon.com/images/I/819budhQl1...,images/val/3.jpeg,MIMOSA,Women's Patola Style Art Silk Saree (Green),saree,
4,4,https://m.media-amazon.com/images/I/61cAZ94ZQV...,images/val/4.jpeg,Generic,Women's Pure Hand Block Patola Printed Cotton ...,saree,


In [192]:
df_revised.tail()

Unnamed: 0,number,image_url,image_path,brand,product_title,class_label,color
7495,7495,https://m.media-amazon.com/images/I/614GvuaIJu...,images/val/7495.jpeg,ishin,Women's Rayon Pink Printed A-Line Kurta Palazz...,women_kurta,
7496,7496,https://m.media-amazon.com/images/I/81WAbf4-ao...,images/val/7496.jpeg,Cotton Culture,Women's Kesar Pink Cotton Straight Kurta,women_kurta,
7497,7497,https://m.media-amazon.com/images/I/61unbmOA7k...,images/val/7497.jpeg,Bae's Wardrobe,Printed Rayon Anarkali Kurti with Plazzo Pant ...,women_kurta,
7498,7498,https://m.media-amazon.com/images/I/71zULAnVNY...,images/val/7498.jpeg,Aarika,Girl's Regular Dress,women_kurta,
7499,7499,https://m.media-amazon.com/images/I/71mhB7xFHt...,images/val/7499.jpeg,Aurelia,Women's Straight Kurta,women_kurta,


In [193]:
# Using column names with smallest part of clothing name
columns = ['kurta', 'women', 'men', 'dhoti', 'saree', 'sherwani', 'palazzo', 
           'mojari', 'nehru', 'legging', 'salwar', 'petticoat', 'blouse', 
           'dupatta', 'gown','lehenga']


In [194]:
# Adding these columns to the df with the values being true if the product title contains the column name
for column in columns:
    df_revised[str(column)] = df_revised.product_title.str.contains(str(column), flags=re.IGNORECASE)

In [195]:
# Checking for rows that have girl in the product title
df_revised['girl'] = df_revised.product_title.str.contains('girl', flags=re.IGNORECASE)

In [196]:
# Putting the rows that have girl in the product title into the women category
c1 = (df_revised.women == False)
c2 = (df_revised.girl == True)

df_revised.loc[c2 & c2, 'women'] = True

In [197]:
# Checking for rows that have woman in the product title
df_revised['woman'] = df_revised.product_title.str.contains('woman', flags=re.IGNORECASE)

In [198]:
# Putting the rows that have woman in the product title into the women category
c1 = (df_revised.women == False)
c2 = (df_revised.woman == True)

df_revised.loc[c2 & c2, 'women'] = True

In [199]:
# Checking for rows that have man in the product title
df_revised['man'] = df_revised.product_title.str.contains('man', flags=re.IGNORECASE)

In [200]:
# Checking for rows that have boy in the product title
df_revised['boy'] = df_revised.product_title.str.contains('boy', flags=re.IGNORECASE)

In [201]:
# Putting the rows that have man in the product title into the men category
c1 = (df_revised.men == False)
c2 = (df_revised.man == True)

df_revised.loc[c2 & c2, 'men'] = True

In [202]:
# Putting the rows that have boy in the product title into the men category
c1 = (df_revised.men == False)
c2 = (df_revised.boy == True)

df_revised.loc[c2 & c2, 'men'] = True

In [203]:
# No longer needed
df_revised.drop(['girl', 'woman', 'man', 'boy'], axis=1, inplace=True)
df_revised.head()

Unnamed: 0,number,image_url,image_path,brand,product_title,class_label,color,kurta,women,men,...,palazzo,mojari,nehru,legging,salwar,petticoat,blouse,dupatta,gown,lehenga
0,0,https://m.media-amazon.com/images/I/81LOPbFPiQ...,images/val/0.jpeg,Generic,Women's Khadi Cotton Saree With Blouse Piece (...,saree,,False,True,True,...,False,False,False,False,False,False,True,False,False,False
1,1,https://m.media-amazon.com/images/I/81Q8Oktw4s...,images/val/1.jpeg,Yashvi Designer,Women's Net Saree With Unstitched Blouse Piece,saree,,False,True,True,...,False,False,False,False,False,False,True,False,False,False
2,2,https://m.media-amazon.com/images/I/613S1YGCmo...,images/val/2.jpeg,Aarrah,Georgette Strip Print Saree[S_SHIKHA30021SR02_...,saree,,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,3,https://m.media-amazon.com/images/I/819budhQl1...,images/val/3.jpeg,MIMOSA,Women's Patola Style Art Silk Saree (Green),saree,,False,True,True,...,False,False,False,False,False,False,False,False,False,False
4,4,https://m.media-amazon.com/images/I/61cAZ94ZQV...,images/val/4.jpeg,Generic,Women's Pure Hand Block Patola Printed Cotton ...,saree,,False,True,True,...,False,False,False,False,False,False,True,False,False,False


In [204]:
df_revised.columns

Index(['number', 'image_url', 'image_path', 'brand', 'product_title',
       'class_label', 'color', 'kurta', 'women', 'men', 'dhoti', 'saree',
       'sherwani', 'palazzo', 'mojari', 'nehru', 'legging', 'salwar',
       'petticoat', 'blouse', 'dupatta', 'gown', 'lehenga'],
      dtype='object')

In [205]:
# If the columns entitled 'men' and 'women' are both true, changing the 'men' column to false
c1 = (df_revised['men'] == True) 
c2 = (df_revised['women'] == True)

df_revised.loc[c1 & c2, 'men'] = False

In [206]:
# Getting the data for which the 'men' column is true now
df_revised_men = df_revised[df_revised.men == True]

In [207]:
# Making a column for women's kurtas
df_revised['women_kurta'] = [False] * 7500

In [208]:
# Making a column for men's kurtas
df_revised['kurta_men'] = [False] * 7500

In [209]:
df_revised.head()

Unnamed: 0,number,image_url,image_path,brand,product_title,class_label,color,kurta,women,men,...,nehru,legging,salwar,petticoat,blouse,dupatta,gown,lehenga,women_kurta,kurta_men
0,0,https://m.media-amazon.com/images/I/81LOPbFPiQ...,images/val/0.jpeg,Generic,Women's Khadi Cotton Saree With Blouse Piece (...,saree,,False,True,False,...,False,False,False,False,True,False,False,False,False,False
1,1,https://m.media-amazon.com/images/I/81Q8Oktw4s...,images/val/1.jpeg,Yashvi Designer,Women's Net Saree With Unstitched Blouse Piece,saree,,False,True,False,...,False,False,False,False,True,False,False,False,False,False
2,2,https://m.media-amazon.com/images/I/613S1YGCmo...,images/val/2.jpeg,Aarrah,Georgette Strip Print Saree[S_SHIKHA30021SR02_...,saree,,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,3,https://m.media-amazon.com/images/I/819budhQl1...,images/val/3.jpeg,MIMOSA,Women's Patola Style Art Silk Saree (Green),saree,,False,True,False,...,False,False,False,False,False,False,False,False,False,False
4,4,https://m.media-amazon.com/images/I/61cAZ94ZQV...,images/val/4.jpeg,Generic,Women's Pure Hand Block Patola Printed Cotton ...,saree,,False,True,False,...,False,False,False,False,True,False,False,False,False,False


In [210]:
# Making women_kurta true and kurta false if kurta is true and women is true
c1 = (df_revised['kurta'] == True) 
c2 = (df_revised['women'] == True)

df_revised.loc[c1 & c2, 'women_kurta'] = True
df_revised.loc[c1 & c2, 'kurta'] = False

In [211]:
# Making kurta_men true and kurta false if kurta is true and men is true
c1 = (df_revised['kurta'] == True) 
c2 = (df_revised['men'] == True)

df_revised.loc[c1 & c2, 'kurta_men'] = True
df_revised.loc[c1 & c2, 'kurta'] = False

In [212]:
# Since only women wear dupattas, if the class_label is dupatta and kurta is true, is must
# be a women's kurta
c1 = (df_revised.class_label == 'dupattas')
c2 = (df_revised.kurta == True)

df_revised.loc[c1 & c2, 'women_kurta'] = True
df_revised.loc[c1 & c2, 'kurta'] = False

In [213]:
# Making kurta false when if kurta is true and kurta_men is the class label
c1 = (df_revised['kurta'] == True) 
c2 = (df_revised.class_label == 'kurta_men')

df_revised.loc[c1 & c2, 'kurta'] = False

In [214]:
# Making kurta false when if kurta is true and women_kurta is the class label
c1 = (df_revised['kurta'] == True) 
c2 = (df_revised.class_label == 'women_kurta')

df_revised.loc[c1 & c2, 'kurta'] = False

In [215]:
df_revised.kurta.value_counts()

kurta
False    7485
True       15
Name: count, dtype: int64

### This leaves 15 rows where kurta is true, men and women are false, and the class label does not contain kurta

In [216]:
# Making columns for mojaris_men, mojaris_women, and leggings_and_salwars
df_revised['mojaris_men'] = [False] * 7500
df_revised['mojaris_women'] = [False] * 7500
df_revised['leggings_and_salwars'] = [False] * 7500

In [217]:
# mojaris_women column is true if mojaris and women are true
c1 = (df_revised['mojari'] == True) 
c2 = (df_revised['women'] == True)

df_revised.loc[c1 & c2, 'mojaris_women'] = True

In [218]:
# mojaris_men column is true if mojaris and men are true
c1 = (df_revised['mojari'] == True) 
c2 = (df_revised['men'] == True)

df_revised.loc[c1 & c2, 'mojaris_men'] = True

In [219]:
# Putting the columns legging and salwar together
c1 = (df_revised['legging'] == True) 
c2 = (df_revised['salwar'] == True)

df_revised.loc[c1 | c2, 'leggings_and_salwars'] = True

In [220]:
# Dropping the columns that are no longer needed
df_revised.drop(['men', 'women', 'legging','salwar','mojari'], axis=1, inplace=True)

In [221]:
df_revised.head()

Unnamed: 0,number,image_url,image_path,brand,product_title,class_label,color,kurta,dhoti,saree,...,petticoat,blouse,dupatta,gown,lehenga,women_kurta,kurta_men,mojaris_men,mojaris_women,leggings_and_salwars
0,0,https://m.media-amazon.com/images/I/81LOPbFPiQ...,images/val/0.jpeg,Generic,Women's Khadi Cotton Saree With Blouse Piece (...,saree,,False,False,True,...,False,True,False,False,False,False,False,False,False,False
1,1,https://m.media-amazon.com/images/I/81Q8Oktw4s...,images/val/1.jpeg,Yashvi Designer,Women's Net Saree With Unstitched Blouse Piece,saree,,False,False,True,...,False,True,False,False,False,False,False,False,False,False
2,2,https://m.media-amazon.com/images/I/613S1YGCmo...,images/val/2.jpeg,Aarrah,Georgette Strip Print Saree[S_SHIKHA30021SR02_...,saree,,False,False,True,...,False,False,False,False,False,False,False,False,False,False
3,3,https://m.media-amazon.com/images/I/819budhQl1...,images/val/3.jpeg,MIMOSA,Women's Patola Style Art Silk Saree (Green),saree,,False,False,True,...,False,False,False,False,False,False,False,False,False,False
4,4,https://m.media-amazon.com/images/I/61cAZ94ZQV...,images/val/4.jpeg,Generic,Women's Pure Hand Block Patola Printed Cotton ...,saree,,False,False,True,...,False,True,False,False,False,False,False,False,False,False


In [222]:
# Removing the blouse label from all sarees in class_label because 
# models wearing sarees cannot be shown without a saree blouse

c1 = (df_revised['class_label'] == 'saree') 
c2 = (df_revised['blouse'] == True)

df_revised.loc[c1 & c2, 'blouse'] = False

In [223]:
# Removing the saree label from all blouses in class_label because 
# blouses are blouses for sarees or lehengas. Therefore, it is likely that saree 
# will be in the product description

c1 = (df_revised['class_label'] == 'blouse') 
c2 = (df_revised['saree'] == True)

df_revised.loc[c1 & c2, 'saree'] = False

In [224]:
# Removing the saree label from all blouses in class_label because 
# blouses are blouses for sarees or lehengas.Therefore, it is likely that lehenga 
# will be in the product description

c1 = (df_revised['class_label'] == 'blouse') 
c2 = (df_revised['lehenga'] == True)

df_revised.loc[c1 & c2, 'lehenga'] = False

In [225]:
df_revised.head()

Unnamed: 0,number,image_url,image_path,brand,product_title,class_label,color,kurta,dhoti,saree,...,petticoat,blouse,dupatta,gown,lehenga,women_kurta,kurta_men,mojaris_men,mojaris_women,leggings_and_salwars
0,0,https://m.media-amazon.com/images/I/81LOPbFPiQ...,images/val/0.jpeg,Generic,Women's Khadi Cotton Saree With Blouse Piece (...,saree,,False,False,True,...,False,False,False,False,False,False,False,False,False,False
1,1,https://m.media-amazon.com/images/I/81Q8Oktw4s...,images/val/1.jpeg,Yashvi Designer,Women's Net Saree With Unstitched Blouse Piece,saree,,False,False,True,...,False,False,False,False,False,False,False,False,False,False
2,2,https://m.media-amazon.com/images/I/613S1YGCmo...,images/val/2.jpeg,Aarrah,Georgette Strip Print Saree[S_SHIKHA30021SR02_...,saree,,False,False,True,...,False,False,False,False,False,False,False,False,False,False
3,3,https://m.media-amazon.com/images/I/819budhQl1...,images/val/3.jpeg,MIMOSA,Women's Patola Style Art Silk Saree (Green),saree,,False,False,True,...,False,False,False,False,False,False,False,False,False,False
4,4,https://m.media-amazon.com/images/I/61cAZ94ZQV...,images/val/4.jpeg,Generic,Women's Pure Hand Block Patola Printed Cotton ...,saree,,False,False,True,...,False,False,False,False,False,False,False,False,False,False


In [226]:
df_revised.loc[1]

number                                                                  1
image_url               https://m.media-amazon.com/images/I/81Q8Oktw4s...
image_path                                              images/val/1.jpeg
brand                                                     Yashvi Designer
product_title              Women's Net Saree With Unstitched Blouse Piece
class_label                                                         saree
color                                                                 NaN
kurta                                                               False
dhoti                                                               False
saree                                                                True
sherwani                                                            False
palazzo                                                             False
nehru                                                               False
petticoat                             

In [227]:
# Renaming columns to conform to class labels
df_revised = df_revised.rename({'dhoti':'dhoti_pants', 'sherwani':'sherwanis', 'palazzo':'palazzos', 
             'nehru':'nehru_jackets', 'petticoat':'petticoats', 'dupatta':'dupattas', 
             'gown':'gowns'},axis=1)
df_revised.head()

Unnamed: 0,number,image_url,image_path,brand,product_title,class_label,color,kurta,dhoti_pants,saree,...,petticoats,blouse,dupattas,gowns,lehenga,women_kurta,kurta_men,mojaris_men,mojaris_women,leggings_and_salwars
0,0,https://m.media-amazon.com/images/I/81LOPbFPiQ...,images/val/0.jpeg,Generic,Women's Khadi Cotton Saree With Blouse Piece (...,saree,,False,False,True,...,False,False,False,False,False,False,False,False,False,False
1,1,https://m.media-amazon.com/images/I/81Q8Oktw4s...,images/val/1.jpeg,Yashvi Designer,Women's Net Saree With Unstitched Blouse Piece,saree,,False,False,True,...,False,False,False,False,False,False,False,False,False,False
2,2,https://m.media-amazon.com/images/I/613S1YGCmo...,images/val/2.jpeg,Aarrah,Georgette Strip Print Saree[S_SHIKHA30021SR02_...,saree,,False,False,True,...,False,False,False,False,False,False,False,False,False,False
3,3,https://m.media-amazon.com/images/I/819budhQl1...,images/val/3.jpeg,MIMOSA,Women's Patola Style Art Silk Saree (Green),saree,,False,False,True,...,False,False,False,False,False,False,False,False,False,False
4,4,https://m.media-amazon.com/images/I/61cAZ94ZQV...,images/val/4.jpeg,Generic,Women's Pure Hand Block Patola Printed Cotton ...,saree,,False,False,True,...,False,False,False,False,False,False,False,False,False,False


In [228]:
# Changing each new column to false if the class_label is the same as the column name
columns = ['dhoti_pants', 'saree', 'sherwanis', 'palazzos', 'nehru_jackets', 'petticoats',
           'blouse','dupattas', 'gowns', 'lehenga', 'women_kurta', 'kurta_men', 
           'mojaris_men', 'mojaris_women', 'leggings_and_salwars']

for column in columns:
    c1 = (df_revised[str(column)] == True)
    c2 = (df_revised['class_label'] == str(column))
    df_revised.loc[c1 & c2, str(column)] = False

df_revised.head()

Unnamed: 0,number,image_url,image_path,brand,product_title,class_label,color,kurta,dhoti_pants,saree,...,petticoats,blouse,dupattas,gowns,lehenga,women_kurta,kurta_men,mojaris_men,mojaris_women,leggings_and_salwars
0,0,https://m.media-amazon.com/images/I/81LOPbFPiQ...,images/val/0.jpeg,Generic,Women's Khadi Cotton Saree With Blouse Piece (...,saree,,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,1,https://m.media-amazon.com/images/I/81Q8Oktw4s...,images/val/1.jpeg,Yashvi Designer,Women's Net Saree With Unstitched Blouse Piece,saree,,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,2,https://m.media-amazon.com/images/I/613S1YGCmo...,images/val/2.jpeg,Aarrah,Georgette Strip Print Saree[S_SHIKHA30021SR02_...,saree,,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,3,https://m.media-amazon.com/images/I/819budhQl1...,images/val/3.jpeg,MIMOSA,Women's Patola Style Art Silk Saree (Green),saree,,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,4,https://m.media-amazon.com/images/I/61cAZ94ZQV...,images/val/4.jpeg,Generic,Women's Pure Hand Block Patola Printed Cotton ...,saree,,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [229]:
columns = ['dhoti_pants', 'saree', 'kurta', 'sherwanis', 'palazzos', 'nehru_jackets', 'petticoats',
           'blouse','dupattas', 'gowns', 'lehenga', 'women_kurta', 'kurta_men', 
           'mojaris_men', 'mojaris_women', 'leggings_and_salwars']
# If a column is true, putting the column name as the value; otherwise, 0
def func(x):
    if x == True:
        return str(column)
    else:
        return 0

for column in columns:
    df_revised[str(column)] = df_revised[str(column)].apply(lambda x: func(x))

In [230]:
df_revised.head()

Unnamed: 0,number,image_url,image_path,brand,product_title,class_label,color,kurta,dhoti_pants,saree,...,petticoats,blouse,dupattas,gowns,lehenga,women_kurta,kurta_men,mojaris_men,mojaris_women,leggings_and_salwars
0,0,https://m.media-amazon.com/images/I/81LOPbFPiQ...,images/val/0.jpeg,Generic,Women's Khadi Cotton Saree With Blouse Piece (...,saree,,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,https://m.media-amazon.com/images/I/81Q8Oktw4s...,images/val/1.jpeg,Yashvi Designer,Women's Net Saree With Unstitched Blouse Piece,saree,,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2,https://m.media-amazon.com/images/I/613S1YGCmo...,images/val/2.jpeg,Aarrah,Georgette Strip Print Saree[S_SHIKHA30021SR02_...,saree,,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,3,https://m.media-amazon.com/images/I/819budhQl1...,images/val/3.jpeg,MIMOSA,Women's Patola Style Art Silk Saree (Green),saree,,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,https://m.media-amazon.com/images/I/61cAZ94ZQV...,images/val/4.jpeg,Generic,Women's Pure Hand Block Patola Printed Cotton ...,saree,,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [231]:
df_revised.tail()

Unnamed: 0,number,image_url,image_path,brand,product_title,class_label,color,kurta,dhoti_pants,saree,...,petticoats,blouse,dupattas,gowns,lehenga,women_kurta,kurta_men,mojaris_men,mojaris_women,leggings_and_salwars
7495,7495,https://m.media-amazon.com/images/I/614GvuaIJu...,images/val/7495.jpeg,ishin,Women's Rayon Pink Printed A-Line Kurta Palazz...,women_kurta,,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7496,7496,https://m.media-amazon.com/images/I/81WAbf4-ao...,images/val/7496.jpeg,Cotton Culture,Women's Kesar Pink Cotton Straight Kurta,women_kurta,,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7497,7497,https://m.media-amazon.com/images/I/61unbmOA7k...,images/val/7497.jpeg,Bae's Wardrobe,Printed Rayon Anarkali Kurti with Plazzo Pant ...,women_kurta,,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7498,7498,https://m.media-amazon.com/images/I/71zULAnVNY...,images/val/7498.jpeg,Aarika,Girl's Regular Dress,women_kurta,,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7499,7499,https://m.media-amazon.com/images/I/71mhB7xFHt...,images/val/7499.jpeg,Aurelia,Women's Straight Kurta,women_kurta,,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [232]:
# Number of gowns column that have gowns as value
df_revised.gowns.value_counts()

gowns
0        7488
gowns      12
Name: count, dtype: int64

In [233]:
# Gowns are sometimes called dresses
df_revised['dress'] = df_revised.product_title.str.contains('dress', flags=re.IGNORECASE)

In [234]:
# If dress is true and gowns is not (0), put the value gowns in from gowns column
c1 = (df_revised.gowns == 0)
c2 = (df_revised.dress == True)

df_revised.loc[c1 & c2, 'gowns'] = 'gowns'
df_revised.gowns.value_counts()

gowns
0        7322
gowns     178
Name: count, dtype: int64

In [235]:
df_revised.gowns.value_counts()

gowns
0        7322
gowns     178
Name: count, dtype: int64

In [236]:
# Removing columns not needed
df_revised = df_revised.drop(['number', 'image_url', 'brand', 'color', 'dress'], axis=1)

### Making a dataframe for each label that is different than the class label and exploring the product titles and class labels of some of the rows

In [237]:
df_saree = df_revised[df_revised.saree == 'saree']
df_saree.shape

(183, 19)

In [238]:
df_saree.head()

Unnamed: 0,image_path,product_title,class_label,kurta,dhoti_pants,saree,sherwanis,palazzos,nehru_jackets,petticoats,blouse,dupattas,gowns,lehenga,women_kurta,kurta_men,mojaris_men,mojaris_women,leggings_and_salwars
1421,images/val/1421.jpeg,Blue Silk Blend Checkes Ready-to-Wear Saree Wi...,dhoti_pants,0,0,saree,0,0,0,0,0,0,0,0,0,0,0,0,0
1994,images/val/1994.jpeg,Lami 9 Meter Maroon Elephant Design Saree Lace...,dupattas,0,0,saree,0,0,0,0,0,0,0,lehenga,0,0,0,0,0
2021,images/val/2021.jpeg,Women's Cotton Green & Red Geometric Print Sar...,gowns,0,0,saree,0,0,0,0,blouse,0,0,0,0,0,0,0,0
2209,images/val/2209.jpeg,Women's Mono Net Embroidered And Pearl Work Sa...,gowns,0,0,saree,0,0,0,0,0,0,0,0,0,0,0,0,0
2340,images/val/2340.jpeg,Anytime Clothing Traditionally Half Saree for ...,gowns,0,0,saree,0,0,0,0,0,0,0,0,0,0,0,0,0


In [239]:
df_saree.iloc[0,1]

'Blue Silk Blend Checkes Ready-to-Wear Saree With Attached pant'

In [240]:
df_blouse = df_revised[df_revised.blouse == 'blouse']
df_blouse.shape

(199, 19)

In [241]:
df_blouse.head()

Unnamed: 0,image_path,product_title,class_label,kurta,dhoti_pants,saree,sherwanis,palazzos,nehru_jackets,petticoats,blouse,dupattas,gowns,lehenga,women_kurta,kurta_men,mojaris_men,mojaris_women,leggings_and_salwars
1788,images/val/1788.jpeg,Ethnic Yard Woman's Organza Printed Semi-Stitc...,dupattas,0,0,0,0,0,0,0,blouse,0,0,0,0,0,0,0,0
2021,images/val/2021.jpeg,Women's Cotton Green & Red Geometric Print Sar...,gowns,0,0,saree,0,0,0,0,blouse,0,0,0,0,0,0,0,0
2248,images/val/2248.jpeg,Women Floral Printed Georgette Unstiched Fabri...,gowns,0,0,0,0,0,0,0,blouse,0,gowns,0,0,0,0,0,0
2409,images/val/2409.jpeg,Women's Cotton Rust Self Print Saree with Blou...,gowns,0,0,saree,0,0,0,0,blouse,0,0,0,0,0,0,0,0
2453,images/val/2453.jpeg,Women/Girls Unisex Ethnic Unstiched makhmal Do...,gowns,0,0,0,0,0,0,0,blouse,0,gowns,0,0,0,0,0,0


In [242]:
df_blouse.iloc[1,1]

"Women's Cotton Green & Red Geometric Print Saree with Blouse Piece (Sarita-510_Green & Red_Free Size)"

In [243]:
df_blouse.iloc[3,1]

"Women's Cotton Rust Self Print Saree with Blouse Piece (Samaira-604_Rust_Free Size)"

Incorrect label

In [244]:
df_blouse.iloc[4,1]

'Women/Girls Unisex Ethnic Unstiched makhmal Double Shaded Sequin Dress Material for Shirts,Gown, Blouse, garara, etc.'

Incorrect label

In [245]:
df_dhoti_pants = df_revised[df_revised.dhoti_pants == 'dhoti_pants']
df_dhoti_pants.shape

(36, 19)

In [246]:
df_dhoti_pants.head()

Unnamed: 0,image_path,product_title,class_label,kurta,dhoti_pants,saree,sherwanis,palazzos,nehru_jackets,petticoats,blouse,dupattas,gowns,lehenga,women_kurta,kurta_men,mojaris_men,mojaris_women,leggings_and_salwars
1517,images/val/1517.jpeg,Women's Pink Rayon Straight Short Kurta With D...,dupattas,0,dhoti_pants,0,0,0,0,0,0,0,0,0,women_kurta,0,0,0,0
2570,images/val/2570.jpeg,Men White Solid Dhoti Pant,kurta_men,0,dhoti_pants,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2674,images/val/2674.jpeg,Men Maroon &amp; Blue Solid Kurta with Dhoti P...,kurta_men,0,dhoti_pants,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2789,images/val/2789.jpeg,Men Brown Dupioni Silk Kurta & Beige Dhoti,kurta_men,0,dhoti_pants,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2911,images/val/2911.jpeg,Men White Chikankari Embroidered Kurta with Dh...,kurta_men,0,dhoti_pants,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [247]:
df_dhoti_pants.iloc[0,1]

"Women's Pink Rayon Straight Short Kurta With Dhoti Pant"

In [248]:
df_dupattas = df_revised[df_revised.dupattas == 'dupattas']
df_dupattas.shape

(279, 19)

In [249]:
df_dupattas.head()

Unnamed: 0,image_path,product_title,class_label,kurta,dhoti_pants,saree,sherwanis,palazzos,nehru_jackets,petticoats,blouse,dupattas,gowns,lehenga,women_kurta,kurta_men,mojaris_men,mojaris_women,leggings_and_salwars
1020,images/val/1020.jpeg,VCC Rayon Printed Kurti Palazzo & Dupatta Set ...,dhoti_pants,0,0,0,0,palazzos,0,0,0,dupattas,0,0,0,0,0,0,0
1026,images/val/1026.jpeg,Rayon Green Printed Hand Work Kurta Palazzo an...,dhoti_pants,0,0,0,0,palazzos,0,0,0,dupattas,0,0,women_kurta,0,0,0,0
1064,images/val/1064.jpeg,Women’s Pure Cotton Plain Semi Patiala Dhoti S...,dhoti_pants,0,0,0,0,0,0,0,0,dupattas,0,0,0,0,0,0,leggings_and_salwars
1076,images/val/1076.jpeg,Women's Pure Cotton Plain Semi Patiala Dhoti S...,dhoti_pants,0,0,0,0,0,0,0,0,dupattas,0,0,0,0,0,0,leggings_and_salwars
1127,images/val/1127.jpeg,Women's Red Poly Silk Kurta With Pant And Dupatta,dhoti_pants,0,0,0,0,0,0,0,0,dupattas,0,0,women_kurta,0,0,0,0


In [250]:
df_dupattas.iloc[0,1]

'VCC Rayon Printed Kurti Palazzo & Dupatta Set for Women'

Incorrect class label

In [251]:
df_dupattas.iloc[1,1]

'Rayon Green Printed Hand Work Kurta Palazzo and Dupatta Set for Women'

In [252]:
df_gowns = df_revised[df_revised.gowns == 'gowns']
df_gowns.shape

(178, 19)

In [253]:
df_gowns.head()

Unnamed: 0,image_path,product_title,class_label,kurta,dhoti_pants,saree,sherwanis,palazzos,nehru_jackets,petticoats,blouse,dupattas,gowns,lehenga,women_kurta,kurta_men,mojaris_men,mojaris_women,leggings_and_salwars
1085,images/val/1085.jpeg,Womens Pure Cotton Multicoloured Checks Off-Wh...,dhoti_pants,0,0,0,0,0,0,0,0,0,gowns,0,0,0,0,0,0
1482,images/val/1482.jpeg,Girls Western Dresses Top & Floral Print Plazz...,dhoti_pants,0,0,0,0,0,0,0,0,0,gowns,0,0,0,0,0,0
1538,images/val/1538.jpeg,Women's Georgette Embroidery Dress Material,dupattas,0,0,0,0,0,0,0,0,0,gowns,0,0,0,0,0,0
1540,images/val/1540.jpeg,Women's Crepe Dress Material,dupattas,0,0,0,0,0,0,0,0,0,gowns,0,0,0,0,0,0
1559,images/val/1559.jpeg,MBL-3 Top Self Solid Cotton Designer Heavy Exc...,dupattas,0,0,0,0,0,0,0,0,0,gowns,0,0,0,0,0,0


In [254]:
df_gowns.iloc[3,1]

"Women's Crepe Dress Material"

Incorrect label

In [255]:
df_kurta = df_revised[df_revised.kurta == 'kurta']
df_kurta.shape

(15, 19)

In [256]:
df_kurta.head()

Unnamed: 0,image_path,product_title,class_label,kurta,dhoti_pants,saree,sherwanis,palazzos,nehru_jackets,petticoats,blouse,dupattas,gowns,lehenga,women_kurta,kurta_men,mojaris_men,mojaris_women,leggings_and_salwars
1058,images/val/1058.jpeg,Brown Block Printed Cotton Slub Short Kurta,dhoti_pants,kurta,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1437,images/val/1437.jpeg,Taani Creations Kurta Dhoti Set Red,dhoti_pants,kurta,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2036,images/val/2036.jpeg,Rayon Printed Regular Fit Long Kurta/Gown,gowns,kurta,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2225,images/val/2225.jpeg,by FBB Asymmetric Kurta with Mock Pockets,gowns,kurta,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2277,images/val/2277.jpeg,Asymmetric Kurta with Chevron Jacket,gowns,kurta,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [257]:
df_kurta.iloc[0,1]

'Brown Block Printed Cotton Slub Short Kurta'

In [258]:
df_kurta_men = df_revised[df_revised.kurta_men == 'kurta_men']
df_kurta_men.shape

(49, 19)

In [259]:
df_kurta_men.head()

Unnamed: 0,image_path,product_title,class_label,kurta,dhoti_pants,saree,sherwanis,palazzos,nehru_jackets,petticoats,blouse,dupattas,gowns,lehenga,women_kurta,kurta_men,mojaris_men,mojaris_women,leggings_and_salwars
1013,images/val/1013.jpeg,Men's Embroidery Kurta & Salwar Dhoti Set Spec...,dhoti_pants,0,0,0,0,0,0,0,0,0,0,0,0,kurta_men,0,0,leggings_and_salwars
1039,images/val/1039.jpeg,Men's Dupion Silk Regular Kurta and Dhoti Pant...,dhoti_pants,0,0,0,0,0,0,0,0,0,0,0,0,kurta_men,0,0,0
1174,images/val/1174.jpeg,Men's Silk Blend Kurta Dhoti and Printed Modi ...,dhoti_pants,0,0,0,0,0,0,0,0,0,0,0,0,kurta_men,0,0,0
1259,images/val/1259.jpeg,"Men's Aqua Cotton Blend Ethnic Jacket, Kurta a...",dhoti_pants,0,0,0,0,0,0,0,0,0,0,0,0,kurta_men,0,0,0
1302,images/val/1302.jpeg,Mens Silk Self Design Kurta And Dhoti Pant (Blue),dhoti_pants,0,0,0,0,0,0,0,0,0,0,0,0,kurta_men,0,0,0


In [260]:
# Obviously mislabeled

df_kurta_men.iloc[0,1]

"Men's Embroidery Kurta & Salwar Dhoti Set Special for Boys & Men's"

Looks like ncorrect class label

In [261]:
df_leggings_and_salwars = df_revised[df_revised.leggings_and_salwars == 'leggings_and_salwars']
df_leggings_and_salwars.shape

(243, 19)

In [262]:
df_leggings_and_salwars.head()

Unnamed: 0,image_path,product_title,class_label,kurta,dhoti_pants,saree,sherwanis,palazzos,nehru_jackets,petticoats,blouse,dupattas,gowns,lehenga,women_kurta,kurta_men,mojaris_men,mojaris_women,leggings_and_salwars
1001,images/val/1001.jpeg,Sweet Threadz Cotton Viscose Lycra Dhoti Patiy...,dhoti_pants,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,leggings_and_salwars
1004,images/val/1004.jpeg,Areal Fashion Women's Cotton Traditional Patia...,dhoti_pants,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,leggings_and_salwars
1006,images/val/1006.jpeg,Women Beige Solid Dhoti Salwar,dhoti_pants,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,leggings_and_salwars
1007,images/val/1007.jpeg,Combo Offer (Pack of 3) Cotton Viscose Lycra D...,dhoti_pants,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,leggings_and_salwars
1008,images/val/1008.jpeg,Combo Offer Pack of 3 Cotton Viscose Lycra Dho...,dhoti_pants,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,leggings_and_salwars


In [263]:
df_leggings_and_salwars.iloc[0,1]

'Sweet Threadz Cotton Viscose Lycra Dhoti Patiyala Salwar Harem Bottoms Pants Light Skin Yellow Combo Pack of 2'

In [264]:
df_leggings_and_salwars.iloc[4,1]

'Combo Offer Pack of 3 Cotton Viscose Lycra Dhoti Patiyala Salwar Harem Bottoms Pants for Womens White Dark Skin Baby Pink'

Incorrect label

In [265]:
df_lehenga = df_revised[df_revised.lehenga == 'lehenga']
df_lehenga.shape

(22, 19)

In [266]:
df_lehenga.head()

Unnamed: 0,image_path,product_title,class_label,kurta,dhoti_pants,saree,sherwanis,palazzos,nehru_jackets,petticoats,blouse,dupattas,gowns,lehenga,women_kurta,kurta_men,mojaris_men,mojaris_women,leggings_and_salwars
182,images/val/182.jpeg,Women' Navy Blue Colour Heavy Work Sleeve Blou...,saree,0,0,0,0,0,0,0,0,0,0,lehenga,0,0,0,0,0
1686,images/val/1686.jpeg,Chanderi Digital Printed Latest Design Dupatta...,dupattas,0,0,0,0,0,0,0,0,0,0,lehenga,women_kurta,0,0,0,0
1783,images/val/1783.jpeg,Women's Silk Semi-stitched Lehenga Choli (pala...,dupattas,0,0,0,0,0,0,0,0,0,0,lehenga,0,0,0,0,0
1799,images/val/1799.jpeg,Women's Art Silk Semi-stitched Lehenga Choli w...,dupattas,0,0,0,0,0,0,0,0,0,0,lehenga,0,0,0,0,0
1810,images/val/1810.jpeg,Women's Net Semi-stitched Lehenga Choli With D...,dupattas,0,0,0,0,0,0,0,0,0,0,lehenga,0,0,0,0,0


In [267]:
df_lehenga.iloc[0,1]

"Women' Navy Blue Colour Heavy Work Sleeve Blouse For Saree And Lehenga (Blue,Size 38+margin)"

In [268]:
df_mojaris_men = df_revised[df_revised.mojaris_men == 'mojaris_men']
df_mojaris_men.shape

(18, 19)

In [269]:
df_mojaris_men.head()

Unnamed: 0,image_path,product_title,class_label,kurta,dhoti_pants,saree,sherwanis,palazzos,nehru_jackets,petticoats,blouse,dupattas,gowns,lehenga,women_kurta,kurta_men,mojaris_men,mojaris_women,leggings_and_salwars
4521,images/val/4521.jpeg,Boys Jutti/Mojari for Kids Red (SPF-2011,mojaris_women,0,0,0,0,0,0,0,0,0,0,0,0,0,mojaris_men,0,0
4530,images/val/4530.jpeg,Men's Ethnic Mojari/Juttis/Nagra/Loafers,mojaris_women,0,0,0,0,0,0,0,0,0,0,0,0,0,mojaris_men,0,0
4627,images/val/4627.jpeg,Wedding Mojaris Shoes for Men Loafer Stylish B...,mojaris_women,0,0,0,0,0,0,0,0,0,0,0,0,0,mojaris_men,0,0
4631,images/val/4631.jpeg,Pathani Mojaris for Men,mojaris_women,0,0,0,0,0,0,0,0,0,0,0,0,0,mojaris_men,0,0
4665,images/val/4665.jpeg,"Men's Premium Brown Traditional Leather Jutti,...",mojaris_women,0,0,0,0,0,0,0,0,0,0,0,0,0,mojaris_men,0,0


Many of the images labeled mojaris_women are actually men's

In [270]:
df_mojaris_women = df_revised[df_revised.mojaris_women == 'mojaris_women']
df_mojaris_women.shape

(3, 19)

In [271]:
df_mojaris_women.head()

Unnamed: 0,image_path,product_title,class_label,kurta,dhoti_pants,saree,sherwanis,palazzos,nehru_jackets,petticoats,blouse,dupattas,gowns,lehenga,women_kurta,kurta_men,mojaris_men,mojaris_women,leggings_and_salwars
4197,images/val/4197.jpeg,Rahi Fasion Designer Embroidery Work Velvet Ja...,mojaris_men,0,0,0,0,0,0,0,0,0,0,0,0,0,0,mojaris_women,0
4394,images/val/4394.jpeg,Ladies Jutti Women Jutti Women Mojari Brown,mojaris_men,0,0,0,0,0,0,0,0,0,0,0,0,0,0,mojaris_women,0
4484,images/val/4484.jpeg,Women's Mojaris,mojaris_men,0,0,0,0,0,0,0,0,0,0,0,0,0,0,mojaris_women,0


In [272]:
df_mojaris_women.iloc[0,1]

'Rahi Fasion Designer Embroidery Work Velvet Jaipuri Mojari for Women and Girls'

These are actually women's mojaris

In [273]:
df_nehru_jackets = df_revised[df_revised.nehru_jackets == 'nehru_jackets']
df_nehru_jackets.shape

(7, 19)

In [274]:
df_nehru_jackets.head()

Unnamed: 0,image_path,product_title,class_label,kurta,dhoti_pants,saree,sherwanis,palazzos,nehru_jackets,petticoats,blouse,dupattas,gowns,lehenga,women_kurta,kurta_men,mojaris_men,mojaris_women,leggings_and_salwars
545,images/val/545.jpeg,Kullu Karishma Hand Woven Merino wool Sleevele...,blouse,0,0,0,0,0,nehru_jackets,0,0,0,0,0,0,0,0,0,0
2536,images/val/2536.jpeg,Men Beige Solid Kurta with Trousers &amp; Nehr...,kurta_men,0,0,0,0,0,nehru_jackets,0,0,0,0,0,0,0,0,0,0
2641,images/val/2641.jpeg,Men White &amp; Green Solid Kurta with Churida...,kurta_men,0,0,0,0,0,nehru_jackets,0,0,0,0,0,0,0,0,0,0
2744,images/val/2744.jpeg,Men Grey &amp; Golden Solid Kurta with Churida...,kurta_men,0,0,0,0,0,nehru_jackets,0,0,0,0,0,0,0,0,0,0
2798,images/val/2798.jpeg,Men Orange Solid Kurta with Churidar &amp; Neh...,kurta_men,0,0,0,0,0,nehru_jackets,0,0,0,0,0,0,0,0,0,0


In [275]:
df_nehru_jackets.iloc[0,1]

'Kullu Karishma Hand Woven Merino wool Sleeveless Nehru Jacket Waistcoat'

In [276]:
df_palazzos = df_revised[df_revised.palazzos == 'palazzos']
df_palazzos.shape

(203, 19)

In [277]:
df_palazzos.head()

Unnamed: 0,image_path,product_title,class_label,kurta,dhoti_pants,saree,sherwanis,palazzos,nehru_jackets,petticoats,blouse,dupattas,gowns,lehenga,women_kurta,kurta_men,mojaris_men,mojaris_women,leggings_and_salwars
1020,images/val/1020.jpeg,VCC Rayon Printed Kurti Palazzo & Dupatta Set ...,dhoti_pants,0,0,0,0,palazzos,0,0,0,dupattas,0,0,0,0,0,0,0
1026,images/val/1026.jpeg,Rayon Green Printed Hand Work Kurta Palazzo an...,dhoti_pants,0,0,0,0,palazzos,0,0,0,dupattas,0,0,women_kurta,0,0,0,0
1045,images/val/1045.jpeg,Women's Palazzo Bottom,dhoti_pants,0,0,0,0,palazzos,0,0,0,0,0,0,0,0,0,0,0
1047,images/val/1047.jpeg,"Women's Regular Fit Full Length, Design for Mu...",dhoti_pants,0,0,0,0,palazzos,0,0,0,0,0,0,0,0,0,0,0
1051,images/val/1051.jpeg,"Women's Regular Fit Full Length, Design for Mu...",dhoti_pants,0,0,0,0,palazzos,0,0,0,0,0,0,0,0,0,0,0


In [278]:
df_palazzos.iloc[0,1]

'VCC Rayon Printed Kurti Palazzo & Dupatta Set for Women'

In [279]:
df_palazzos.iloc[2,1]

"Women's Palazzo Bottom"

There are a number of these that should be labeled palazzos.

In [280]:
df_petticoats = df_revised[df_revised.petticoats == 'petticoats']
df_petticoats.shape

(1, 19)

In [281]:
df_petticoats.head()

Unnamed: 0,image_path,product_title,class_label,kurta,dhoti_pants,saree,sherwanis,palazzos,nehru_jackets,petticoats,blouse,dupattas,gowns,lehenga,women_kurta,kurta_men,mojaris_men,mojaris_women,leggings_and_salwars
30,images/val/30.jpeg,Women's Pure Cotton Readymade Inskirt Saree Pe...,saree,0,0,0,0,0,0,petticoats,0,0,0,0,0,0,0,0,0


In [282]:
df_petticoats.iloc[0,1]

"Women's Pure Cotton Readymade Inskirt Saree Petticoats Combo with Handmade Nada (Light Sky Blue)"

Mislabeled

In [283]:
df_sherwanis = df_revised[df_revised.sherwanis == 'sherwanis']
df_sherwanis.shape

(44, 19)

In [284]:
df_sherwanis.head()

Unnamed: 0,image_path,product_title,class_label,kurta,dhoti_pants,saree,sherwanis,palazzos,nehru_jackets,petticoats,blouse,dupattas,gowns,lehenga,women_kurta,kurta_men,mojaris_men,mojaris_women,leggings_and_salwars
1288,images/val/1288.jpeg,Boy's Blended Sherwani and Breeches Set,dhoti_pants,0,0,0,sherwanis,0,0,0,0,0,0,0,0,0,0,0,0
4016,images/val/4016.jpeg,"Men's Traditional Wedding and Sherwani Jutti, ...",mojaris_men,0,0,0,sherwanis,0,0,0,0,0,0,0,0,0,0,0,0
4043,images/val/4043.jpeg,Mens Sherwani Jutti Tradition Ethnic Wedding I...,mojaris_men,0,0,0,sherwanis,0,0,0,0,0,0,0,0,0,0,0,0
4051,images/val/4051.jpeg,"Men's Traditional Black Wedding, Ethnic, Party...",mojaris_men,0,0,0,sherwanis,0,0,0,0,0,0,0,0,0,0,0,0
4056,images/val/4056.jpeg,"Men's Red Wedding, Party & Sherwani Mojaris (J...",mojaris_men,0,0,0,sherwanis,0,0,0,0,0,0,0,0,0,0,0,0


In [285]:
df_sherwanis.iloc[3,1]

"Men's Traditional Black Wedding, Ethnic, Party and Sherwani Jutti,Mojari"

In [286]:
df_women_kurta = df_revised[df_revised.women_kurta == 'women_kurta']
df_women_kurta.shape

(157, 19)

In [287]:
df_women_kurta.head()

Unnamed: 0,image_path,product_title,class_label,kurta,dhoti_pants,saree,sherwanis,palazzos,nehru_jackets,petticoats,blouse,dupattas,gowns,lehenga,women_kurta,kurta_men,mojaris_men,mojaris_women,leggings_and_salwars
1011,images/val/1011.jpeg,Rayon Multi Color Printed Jacket Style Kurta f...,dhoti_pants,0,0,0,0,0,0,0,0,0,0,0,women_kurta,0,0,0,0
1015,images/val/1015.jpeg,Women's Regular Kurta,dhoti_pants,0,0,0,0,0,0,0,0,0,0,0,women_kurta,0,0,0,0
1019,images/val/1019.jpeg,Women's Rama Green And Navy Blue Cotton Kurta Set,dhoti_pants,0,0,0,0,0,0,0,0,0,0,0,women_kurta,0,0,0,0
1026,images/val/1026.jpeg,Rayon Green Printed Hand Work Kurta Palazzo an...,dhoti_pants,0,0,0,0,palazzos,0,0,0,dupattas,0,0,women_kurta,0,0,0,0
1104,images/val/1104.jpeg,Star Fashions Women's Rayon A-Line Kurta & Dho...,dhoti_pants,0,0,0,0,0,0,0,0,0,0,0,women_kurta,0,0,0,leggings_and_salwars


In [288]:
df_women_kurta.iloc[0,1]

'Rayon Multi Color Printed Jacket Style Kurta for Women'

In [289]:
df_women_kurta.iloc[1,1]

"Women's Regular Kurta"

A number of these are mislabeled

In [290]:
df_revised.to_csv('/Users/butler/Documents/Image-Classification-Indian-Clothing/data/processed/val_other_categories.csv')

## While not all of the items in each dataframe are mislabeled, many are. 
## There are 183 rows in the saree dataframe;
## 199 rows in the blouse dataframe;
## 36 rows in the dhoti_pants dataframe;
## 279 rows in the dupatta dataframe;
## 178 rows in the gown dataframe;
## 15 rows in the kurta dataframe;
## 49 rows in the kurta_men dataframe;
## 243 rows in the leggings_and_salwars dataframe;
## 22 rows in the lehenga dataframe;
## 18 rows in the mojaris_men dataframe;
## 3 rows in the mojaris_women dataframe;
## 7 rows in the Nehru_jackets dataframe;
## 203 rows in the palazzos dataframe;
## 1 rows in the petticoats dataframe;
## 44 rows in the sherwani dataframe;
## and 157 rows in the women_kurta dataframe.