In [1]:
# @copyright 2022, Shardul Rajhans.

# This is a research study for the course 'Master of Science in Data Science' performed by Shardul Rajhans 
# under the supervision of Dr Sandeep Raghuwanshi.

In [2]:
# Importing Necessary Libraries.

# Suppressing Warnings
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

# Importing matplotlib and seaborn
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

#Standardize price values used across datasets
import re

In [3]:
victoria_secret = pd.read_csv('Dataset/victoriassecret_com.csv')
victoria_secret.head()

Unnamed: 0,product_name,mrp,price,pdp_url,brand_name,product_category,retailer,description,rating,review_count,style_attributes,total_sizes,available_size,color
0,Very Sexy Strappy Lace Thong Panty,$14.50,$14.50,https://www.victoriassecret.com/panties/shop-a...,Victoria's Secret,Strappy Lace Thong Panty,Victoriassecret US,"Lots of cheek peek, pretty lace, a strappy bac...",,,,"[""XS"", ""S"", ""M"", ""L"", ""XL""]",S,peach melba
1,Very Sexy Strappy Lace Thong Panty,$14.50,$14.50,https://www.victoriassecret.com/panties/shop-a...,Victoria's Secret,Strappy Lace Thong Panty,Victoriassecret US,"Lots of cheek peek, pretty lace, a strappy bac...",,,,"[""XS"", ""S"", ""M"", ""L"", ""XL""]",S,black
2,Very Sexy Strappy Lace Thong Panty,$14.50,$14.50,https://www.victoriassecret.com/panties/shop-a...,Victoria's Secret,Strappy Lace Thong Panty,Victoriassecret US,"Lots of cheek peek, pretty lace, a strappy bac...",,,,"[""XS"", ""S"", ""M"", ""L"", ""XL""]",S,plum dust
3,Very Sexy Strappy Lace Thong Panty,$14.50,$14.50,https://www.victoriassecret.com/panties/shop-a...,Victoria's Secret,Strappy Lace Thong Panty,Victoriassecret US,"Lots of cheek peek, pretty lace, a strappy bac...",,,,"[""XS"", ""S"", ""M"", ""L"", ""XL""]",S,ensign blue
4,Very Sexy Strappy Lace Thong Panty,$14.50,$14.50,https://www.victoriassecret.com/panties/shop-a...,Victoria's Secret,Strappy Lace Thong Panty,Victoriassecret US,"Lots of cheek peek, pretty lace, a strappy bac...",,,,"[""XS"", ""S"", ""M"", ""L"", ""XL""]",S,fair orchid


In [4]:
# Printing shape
victoria_secret.shape

(453386, 14)

In [5]:
# Displaying the information of all the columns
victoria_secret.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 453386 entries, 0 to 453385
Data columns (total 14 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   product_name      453386 non-null  object 
 1   mrp               453386 non-null  object 
 2   price             453386 non-null  object 
 3   pdp_url           453386 non-null  object 
 4   brand_name        453386 non-null  object 
 5   product_category  453386 non-null  object 
 6   retailer          453386 non-null  object 
 7   description       453386 non-null  object 
 8   rating            137734 non-null  float64
 9   review_count      137734 non-null  float64
 10  style_attributes  0 non-null       float64
 11  total_sizes       453386 non-null  object 
 12  available_size    453386 non-null  object 
 13  color             453386 non-null  object 
dtypes: float64(3), object(11)
memory usage: 48.4+ MB


In [6]:
# Displaying the information of all the numeric columns
victoria_secret.describe()

Unnamed: 0,rating,review_count,style_attributes
count,137734.0,137734.0,0.0
mean,4.165819,9.202883999999999e+35,
std,0.487524,1.162929e+37,
min,0.0,2.0,
25%,4.0,39.0,
50%,4.3,147.0,
75%,4.5,410.0,
max,5.0,1.5600000000000001e+38,


# Cleaning the Dataset
In this section, we will analyze the missing values, remove the unnecessary columns, and perform all the necessary handling before Exploratory Data Analysis.

In [7]:
# There are few columns that are not required throughout this study, hence removing the unnecessary columns.
columns_to_drop = ['pdp_url', 'brand_name', 'retailer', 'rating', 'review_count', 'style_attributes']
victoria_secret.drop(columns_to_drop, axis=1, inplace=True)
victoria_secret.head()

Unnamed: 0,product_name,mrp,price,product_category,description,total_sizes,available_size,color
0,Very Sexy Strappy Lace Thong Panty,$14.50,$14.50,Strappy Lace Thong Panty,"Lots of cheek peek, pretty lace, a strappy bac...","[""XS"", ""S"", ""M"", ""L"", ""XL""]",S,peach melba
1,Very Sexy Strappy Lace Thong Panty,$14.50,$14.50,Strappy Lace Thong Panty,"Lots of cheek peek, pretty lace, a strappy bac...","[""XS"", ""S"", ""M"", ""L"", ""XL""]",S,black
2,Very Sexy Strappy Lace Thong Panty,$14.50,$14.50,Strappy Lace Thong Panty,"Lots of cheek peek, pretty lace, a strappy bac...","[""XS"", ""S"", ""M"", ""L"", ""XL""]",S,plum dust
3,Very Sexy Strappy Lace Thong Panty,$14.50,$14.50,Strappy Lace Thong Panty,"Lots of cheek peek, pretty lace, a strappy bac...","[""XS"", ""S"", ""M"", ""L"", ""XL""]",S,ensign blue
4,Very Sexy Strappy Lace Thong Panty,$14.50,$14.50,Strappy Lace Thong Panty,"Lots of cheek peek, pretty lace, a strappy bac...","[""XS"", ""S"", ""M"", ""L"", ""XL""]",S,fair orchid


In [8]:
# Conversion to USD for MRP and Price Coloumn
# Reference: https://www.kaggle.com/code/justinekays/eda-for-lingerie?scriptVersionId=8012060&cellId=14

colon_to_dollar_conversion_rate = 0.0017
ind_rp_to_dollar_conversion_rate = 0.000066

def extract_usd_value(value_str):
    if type(value_str) is str:
        value_str= value_str.strip().lower().replace('usd', '').replace('$','').replace("\-.*","").strip()
        value_str = re.sub(r"-.*", "", value_str)
        value_str = re.sub(r"–.*", "", value_str)
        value_str = re.sub(r"\s.*", "", value_str)
        value_str.strip()
        
        if "₡" in value_str:
            value_str = value_str.replace("₡", "").strip()
            value_str = pd.to_numeric(value_str) * colon_to_dollar_conversion_rate            
        elif "rp" in value_str:
            value_str = value_str.replace("rp", "").strip()
            value_str = pd.to_numeric(value_str) * ind_rp_to_dollar_conversion_rate                
            
        return value_str
    else:
        return value_str
    
victoria_secret['mrp']=victoria_secret['mrp'].apply(extract_usd_value).apply(pd.to_numeric)
victoria_secret['price']=victoria_secret['price'].apply(extract_usd_value).apply(pd.to_numeric)
victoria_secret.head(10)

Unnamed: 0,product_name,mrp,price,product_category,description,total_sizes,available_size,color
0,Very Sexy Strappy Lace Thong Panty,14.5,14.5,Strappy Lace Thong Panty,"Lots of cheek peek, pretty lace, a strappy bac...","[""XS"", ""S"", ""M"", ""L"", ""XL""]",S,peach melba
1,Very Sexy Strappy Lace Thong Panty,14.5,14.5,Strappy Lace Thong Panty,"Lots of cheek peek, pretty lace, a strappy bac...","[""XS"", ""S"", ""M"", ""L"", ""XL""]",S,black
2,Very Sexy Strappy Lace Thong Panty,14.5,14.5,Strappy Lace Thong Panty,"Lots of cheek peek, pretty lace, a strappy bac...","[""XS"", ""S"", ""M"", ""L"", ""XL""]",S,plum dust
3,Very Sexy Strappy Lace Thong Panty,14.5,14.5,Strappy Lace Thong Panty,"Lots of cheek peek, pretty lace, a strappy bac...","[""XS"", ""S"", ""M"", ""L"", ""XL""]",S,ensign blue
4,Very Sexy Strappy Lace Thong Panty,14.5,14.5,Strappy Lace Thong Panty,"Lots of cheek peek, pretty lace, a strappy bac...","[""XS"", ""S"", ""M"", ""L"", ""XL""]",S,fair orchid
5,Very Sexy Strappy Lace Thong Panty,14.5,14.5,Strappy Lace Thong Panty,"Lots of cheek peek, pretty lace, a strappy bac...","[""XS"", ""S"", ""M"", ""L"", ""XL""]",S,windy blue
6,Very Sexy Strappy Lace Thong Panty,14.5,14.5,Strappy Lace Thong Panty,"Lots of cheek peek, pretty lace, a strappy bac...","[""XS"", ""S"", ""M"", ""L"", ""XL""]",S,coconut white
7,Very Sexy Strappy Lace Thong Panty,14.5,14.5,Strappy Lace Thong Panty,"Lots of cheek peek, pretty lace, a strappy bac...","[""XS"", ""S"", ""M"", ""L"", ""XL""]",XL,peach melba
8,Very Sexy Strappy Lace Thong Panty,14.5,14.5,Strappy Lace Thong Panty,"Lots of cheek peek, pretty lace, a strappy bac...","[""XS"", ""S"", ""M"", ""L"", ""XL""]",XL,black
9,Very Sexy Strappy Lace Thong Panty,14.5,14.5,Strappy Lace Thong Panty,"Lots of cheek peek, pretty lace, a strappy bac...","[""XS"", ""S"", ""M"", ""L"", ""XL""]",XL,plum dust


In [22]:
# Categorizing the Products into Sub-Groups for generalization.
# Reference: https://www.kaggle.com/code/jkokatjuhha/data-driven-lingerie-shopping?scriptVersionId=6849300&cellId=16

def manipul_regex(str_array):
    string = '|'.join(str_array)
    return '(^|\s)(' + string + ')(\s\.|$)'

def categorize(data):
    panties = manipul_regex(['thong','g string','pant','v kini','boypant','pants','panty','thongs','panties',
                             'ladypant','knickers','thong','twist knicker','brief','boyshort',
                             'lace v front short','signature lace bike short','side tie bikini',
                             'signature lace string bikini','tanga','panty','hipster','vikini',
                             'cheekster', 'boypants','ladypants', 'boyshorts', 'hiphugger', 
                             'pink high leg logo bikini', 'pink shortie', 'pink logo bikini',
                             'lace sexy shortie', 'body base shorty', 'bikini bottom', 'lace mini bikini', 
                             'ruched mini bikini', 'high leg bikini'])  
    
    bodys = manipul_regex(['bodysuit', 'teddy', 'wink plaything', 'legging', 'dress'])
    bras = manipul_regex(['bra', 'bustier', 'strapless', 'balconette', 'bandeau', 'body by victoria unlined demi', 
                          'push-up', 'push up', 'push', 'pink seamless lightly lined racerback',
                          'body by wacoal seamless underwire','basic beauty wireless contour',
                          'pink lace lightly lined triangle', 'lace cross front unlined halter', 
                          'high neck keyhole halter', 'high-neck wrap'])
    
    activewear = manipul_regex(['sports bra', 'sport bra', 'sport bralette', 'sports bralette', 'strappybralette', 'full-zip'])
    suspenders = manipul_regex(['suspenders','belt'])
    bralettes = manipul_regex(['bralettes','bralette', 'bralet', 'silicone petal'])
    tops = manipul_regex(['tops','top','tee', 'tunic'])
    babydoll = manipul_regex(['babydoll','camisole and bikini set by bluebella','chemise'])
    shorts = manipul_regex(['short','shorts','chiffon tap pant'])
    slip = manipul_regex(['slip', 'waist slip', 'half-slip', 'slips', 'half-slips', 'petticoat'])
    robe = manipul_regex(['robe', 'kimonos','kimono'])
    camisole = manipul_regex(['camisole','cami','tank'])
    rompers = manipul_regex(['romper']) 
    onepiece = manipul_regex(['One-Piece', 'one piece', 'One-Piece Halter'])
    
    categories_list = [['slip',slip], ['shorts',shorts], ['robe', robe], ['tops',tops], ['suspenders',suspenders],
                 ['rompers', rompers], ['babydoll', babydoll], ['bodys',bodys], ['bralettes', bralettes], 
                 ['activewear', activewear], ['camisoles',camisole], ['bras',bras], ['panties',panties], ['onepiece', onepiece]]

    for items in categories_list:
        naming, reg = items
        data.loc[(data['product_name'].str.contains(reg, case=False)), 'product_category_wide'] = naming
    return data

In [32]:
victoria_secret_c = categorize(victoria_secret)
victoria_secret_c['product_category_wide'].unique()

array(['panties', 'bras', 'robe', 'bralettes', 'tops', 'bodys',
       'onepiece', 'slip', nan, 'babydoll', 'shorts', 'camisoles',
       'rompers', 'activewear', 'suspenders'], dtype=object)

In [33]:
victoria_secret_c['product_category_wide'].value_counts()

bras          323536
panties        72354
bralettes      37467
bodys           3228
slip            2952
tops            2937
onepiece        1658
babydoll        1599
camisoles        552
rompers          450
robe             385
activewear       199
shorts           168
suspenders         2
Name: product_category_wide, dtype: int64

In [34]:
victoria_secret_c['product_category_wide'].isnull().sum()

5899

In [45]:
# There are some products like bottle, basketball, etc. that are not relevant for this study, thus can be dropped.
# Also, the suspenders cateogory can be removed as it has only 2 entries.

victoria_secret_s = victoria_secret_c[~(victoria_secret_c['product_category_wide'].isnull()) & 
                                      ~(victoria_secret_c['product_category_wide'] == 'suspenders')]
victoria_secret_s.head()

Unnamed: 0,product_name,mrp,price,product_category,description,total_sizes,available_size,color,product_category_wide
0,Very Sexy Strappy Lace Thong Panty,14.5,14.5,Strappy Lace Thong Panty,"Lots of cheek peek, pretty lace, a strappy bac...","[""XS"", ""S"", ""M"", ""L"", ""XL""]",S,peach melba,panties
1,Very Sexy Strappy Lace Thong Panty,14.5,14.5,Strappy Lace Thong Panty,"Lots of cheek peek, pretty lace, a strappy bac...","[""XS"", ""S"", ""M"", ""L"", ""XL""]",S,black,panties
2,Very Sexy Strappy Lace Thong Panty,14.5,14.5,Strappy Lace Thong Panty,"Lots of cheek peek, pretty lace, a strappy bac...","[""XS"", ""S"", ""M"", ""L"", ""XL""]",S,plum dust,panties
3,Very Sexy Strappy Lace Thong Panty,14.5,14.5,Strappy Lace Thong Panty,"Lots of cheek peek, pretty lace, a strappy bac...","[""XS"", ""S"", ""M"", ""L"", ""XL""]",S,ensign blue,panties
4,Very Sexy Strappy Lace Thong Panty,14.5,14.5,Strappy Lace Thong Panty,"Lots of cheek peek, pretty lace, a strappy bac...","[""XS"", ""S"", ""M"", ""L"", ""XL""]",S,fair orchid,panties


In [46]:
victoria_secret_s['product_category_wide'].value_counts()

bras          323536
panties        72354
bralettes      37467
bodys           3228
slip            2952
tops            2937
onepiece        1658
babydoll        1599
camisoles        552
rompers          450
robe             385
activewear       199
shorts           168
Name: product_category_wide, dtype: int64