In [1]:
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
import nltk.stem.porter as porter

# Data Exploration, Pre-Processing, Feature Engineering

In [2]:
pd.set_option('display.max_rows', 100)

In [3]:
product_data = pd.read_excel("Behold+product+data+04262021.xlsx")
brands = pd.read_csv("behold_brands.csv")

In [4]:
product_data.head()

Unnamed: 0,product_id,brand,brand_category,name,details,created_at,brand_canonical_url,description,brand_description,brand_name,product_active
0,01EX0PN4J9WRNZH5F93YEX6QAF,Two,Unknown,Khadi Stripe Shirt-our signature shirt,,2021-01-27 01:17:19.305 UTC,https://two-nyc.myshopify.com/products/white-k...,Our signature khadi shirt\navailable in black ...,Our signature khadi shirt\n\navailable in blac...,Khadi Stripe Shirt-our signature shirt,True
1,01F0C4SKZV6YXS3265JMC39NXW,Collina Strada,Unknown,RUFFLE MARKET DRESS LOOPY PINK SISTINE TOMATO,,2021-03-09 18:43:10.457 UTC,https://collina-strada-2.myshopify.com/product...,Mid-length dress with ruffles and adjustable s...,Mid-length dress with ruffles and adjustable s...,RUFFLE MARKET DRESS LOOPY PINK SISTINE TOMATO,True
2,01EY4Y1BW8VZW51BWG5VZY82XW,Cariuma,Unknown,IBI Slip On Raw Red Knit Sneaker Women,,2021-02-10 02:58:59.591 UTC,https://cariuma.myshopify.com/products/ibi-sli...,IBI Slip On Raw Red Knit Sneaker Women,IBI Slip On Raw Red Knit Sneaker Women,IBI Slip On Raw Red Knit Sneaker Women,False
3,01EY50E27A0P5V6KCW01XPDB43,Cariuma,Unknown,IBI Slip On Black Knit Sneaker Women,,2021-02-10 03:40:52.842 UTC,https://cariuma.myshopify.com/products/ibi-sli...,IBI Slip On Black Knit Sneaker Women,IBI Slip On Black Knit Sneaker Women,IBI Slip On Black Knit Sneaker Women,False
4,01EY6DWHC2W5HPNEGXKEJ4A1CX,Cariuma,Unknown,CATIBA PRO Skate Black Suede and Canvas Contra...,,2021-02-10 16:55:13.024 UTC,https://cariuma.myshopify.com/products/catiba-...,,,CATIBA PRO Skate Black Suede and Canvas Contra...,False


In [5]:
product_data.shape

(61355, 11)

In [6]:
brands.head()

Unnamed: 0,brand_id,brand,brand_value,bio,quote,quote_attribute,intro,lifestyle_copy,short_bio,listing_bio
0,01ESKR0CH2KYC7KBNTN0S38EQA,Mari Giudicelli,Handmade / Artisan Crafted,,,,,,,Behold Mari Giudicelli! This Brazilian shoe de...
1,01ESKR0CH2KYC7KBNTN0S38EQA,Mari Giudicelli,Sustainable,,,,,,,Behold Mari Giudicelli! This Brazilian shoe de...
2,01ESKR0CH2KYC7KBNTN0S38EQA,Mari Giudicelli,Women Owned,,,,,,,Behold Mari Giudicelli! This Brazilian shoe de...
3,01ESVD80YYAJQEQPADZ7HTHZXA,Warm,,,,,,,,"Behold Warm! Founded by specialty store owner,..."
4,01EFJFZ329YVC8SK87R2BNJGV0,Vaara,Emerging,,,,,,,


In [7]:
outfit_combos = pd.read_csv("outfit_combinations.csv")
tags = pd.read_csv("usc_additional_tags.csv")

In [8]:
outfit_combos.head()

Unnamed: 0,outfit_id,product_id,outfit_item_type,brand,product_full_name
0,01DDBHC62ES5K80P0KYJ56AM2T,01DMBRYVA2P5H24WK0HTK4R0A1,bottom,Eileen Fisher,Slim Knit Skirt
1,01DDBHC62ES5K80P0KYJ56AM2T,01DMBRYVA2PEPWFTT7RMP5AA1T,top,Eileen Fisher,Rib Mock Neck Tank
2,01DDBHC62ES5K80P0KYJ56AM2T,01DMBRYVA2S5T9W793F4CY41HE,accessory1,kate spade new york,medium margaux leather satchel
3,01DDBHC62ES5K80P0KYJ56AM2T,01DMBRYVA2ZFDYRYY5TRQZJTBD,shoe,Tory Burch,Penelope Mid Cap Toe Pump
4,01DMHCX50CFX5YNG99F3Y65GQW,01DMBRYVA2P5H24WK0HTK4R0A1,bottom,Eileen Fisher,Slim Knit Skirt


In [9]:
tags.head()

Unnamed: 0,product_id,product_color_id,attribute_name,attribute_value
0,01E5ZXP5H0BTEZT9QD2HRZJ47A,01E5ZXP5JCREDC7WJVMWHK5Q40,materialclothing,linenblend
1,01E5ZXP5H0BTEZT9QD2HRZJ47A,01E5ZXP5JCREDC7WJVMWHK5Q40,materialclothing,cottonblend
2,01E5ZXP5H0BTEZT9QD2HRZJ47A,01E5ZXP5JCREDC7WJVMWHK5Q40,style,modern
3,01E5ZXP5H0BTEZT9QD2HRZJ47A,01E5ZXP5JCREDC7WJVMWHK5Q40,style,businesscasual
4,01E5ZXP5H0BTEZT9QD2HRZJ47A,01E5ZXP5JCREDC7WJVMWHK5Q40,style,classic


In [10]:
'''
Create a dataframe with unique product IDs called final tags
Create a new dataframe with product IDs that are in the tags table 
If an attribute is not in the final tags dataframe, set the attribute to a null object
'''

final_tags = pd.DataFrame()
final_tags["product_id"] = tags.product_id.unique()
for product_id in tags.product_id.unique():
    temp_df_product = tags.loc[tags["product_id"] == product_id]
    for attribute in temp_df_product.attribute_name.values:
        if attribute not in final_tags.columns:
            final_tags[attribute] = np.nan
            final_tags[attribute] = final_tags[attribute].astype(object)
        final_tags.loc[final_tags["product_id"] == product_id, attribute] = (",").join(set(temp_df_product.loc[temp_df_product.attribute_name == attribute].attribute_value.values))


In [11]:
final_tags.head().T

Unnamed: 0,0,1,2,3,4
product_id,01E5ZXP5H0BTEZT9QD2HRZJ47A,01E606ZSSETA2X60TY021JG303,01E603S8HDAQM7HKKYDYQQR9R9,01E5ZT24JNG1DDMNE32A47C2XS,01E5ZSVW5PV46KVVB52DBX1DFY
materialclothing,"linenblend,cottonblend",,"denim,purecotton",polyester,"linenblend,viscose"
style,"businesscasual,classic,modern","glam,edgy,modern","casual,modern","edgy,modern","retro,casual,classic"
primarycolor,blues,blacks,blues,"yellows,blacks",oranges
occasion,work,nightout,"daytonight,weekend","weekend,nightout","daytonight,weekend,vacation"
category,bottom,shoe,bottom,top,onepiece
drycleanonly,yes,,yes,yes,yes
subcategorybottom,pantsleggings,,pantsleggings,,
legstyle,straight,,,,
lengthpantsandleggings,regular,,,,


In [12]:
final_tags.isna().sum()

product_id                              0
materialclothing                     1046
style                                  54
primarycolor                           45
occasion                               56
category                                1
drycleanonly                         1194
subcategorybottom                    3078
legstyle                             3465
lengthpantsandleggings               3441
classpantsandleggings                3312
closurepantsandleggings              3403
rise                                 3386
sizing                               1029
gender                                 57
fit                                  1021
uppermaterial                        3312
toestyle                             3309
heelheight                           3499
subcategoryshoe                      3301
closureshoe                          3371
classbooties                         3914
embellishment                        3641
shoewidth                         

In [13]:
#drop NA values from final_tags, requires that a column has at least 2000 non-NA
final_tags = final_tags.dropna(thresh=2000, axis=1)

In [14]:
#left join product and final tags tables 
full_product_data = pd.merge(
                                product_data,
                                final_tags,
                                how="left",
                                on=None,
                                left_on="product_id",
                                right_on="product_id",
                                left_index=False,
                                right_index=False,
                                sort=True,
                                copy=True,
                                indicator=False,
                                validate=None,
                            )

In [15]:
'''
Groups clothing into different categories using regex
'''

def findCategory(txt):
    """ Function to determine the article of clothing """
    
    txt = str(txt)
    val = np.nan
    if re.search(r'pant(?:s)?|trousers|jeans|shorts|leggings|skirt', txt, re.IGNORECASE ):
        val = "Bottom"
    elif re.search(r'\bdress\b|gown|jumpsuit|romper', txt, re.IGNORECASE ):
        val = "One Piece"
    elif re.search(r'shoe(?:s)*|sneaker(?:s)*|heels|pumps|sandals', txt, re.IGNORECASE ):
        val = "Shoe"
    elif re.search(r'purse|handbag|tote|clutch', txt, re.IGNORECASE ):
        val = "Handbag"
    elif re.search(r'scar(?:f|ves)|bandana', txt, re.IGNORECASE ):
        val = "Scarf"
    elif re.search(r'(?:tee|t)?-?shirt|blouse|blazer|\bvest\b|tee', txt, re.IGNORECASE ):
        val = "Top"
    elif re.search(r'sweater|jacket|coat|pullover|sweater|hoodie|poncho|sweatshirt', txt, re.IGNORECASE ):
        val = "Winter"
    elif re.search(r'glasses|shades', txt, re.IGNORECASE ):
        val = "Eyewear"
    elif re.search(r'necklace|earring|bracelet|watch', txt, re.IGNORECASE ):
        val = "Jewelry"
    return val

In [16]:
full_product_data["product_category"] = full_product_data.name.apply( findCategory ).combine_first( full_product_data.details.apply( findCategory ).combine_first( full_product_data.brand_category.apply( findCategory ).combine_first( full_product_data.brand_description.apply(findCategory).combine_first( full_product_data.description.apply(findCategory) ) ) ) )

In [17]:
def find_country(txt):
    """ Function to determine the 'Made in' country """
    
    txt = str(txt)
    val = np.nan
    if re.search(r'(?:m|M)ade in (?:the)?((\b[A-Z]+\w+\s)+)', txt ):
            val = re.findall(r'(?:m|M)ade in (?:the)?((\b[A-Z]+\w+\s)+)', txt )[0][0]
    return val

In [18]:
full_product_data['made_in_country'] = full_product_data.description.apply( find_country )

In [19]:
full_product_data.loc[21200:21300].T

Unnamed: 0,21200,21201,21202,21203,21204,21205,21206,21207,21208,21209,...,21291,21292,21293,21294,21295,21296,21297,21298,21299,21300
product_id,01EEZT73NP5CW57FSP2WGPHFP8,01EEZT76QQYW26WTAV3PTTHMHR,01EEZT79R20X12A4T807DT0DN9,01EEZT7B1SRGJZ5STPXC36ZVG2,01EEZT7DZ9K0M26P9GJJ5WB3EW,01EEZT7HQKF0KBSJP2JJ1WMA2E,01EEZT7N1V6XNTQ3VDK13ER42W,01EEZT7R8TNGE59Q7YMHTP4HV8,01EEZT7WPEZ1J32B71ZSG9Z8DT,01EEZT7YZ4QDWYD8F0Q69HFY4B,...,01EEZV776N91F89HG6BTNDW8YZ,01EEZVCJWF8REZH1FMS9ESTW4K,01EEZW2HMBPJNHFHNA98FKKTVE,01EEZW4B0353Y3VW173AEVRF71,01EEZZ8FMBN35YYH3TE39DAJWM,01EEZZJDGPDG4E1MMC2AYZBBGW,01EF000GCZBEY2DT68XQHPQXX5,01EF004892VD0NDMD3P046A1SM,01EF00MQVFC02ZNWVCERHT34BF,01EF00TPF2V1Q8Z65T96G3XZZ2
brand,Chufy,Chufy,Chufy,Chufy,Chufy,Chufy,Chufy,Chufy,Chufy,Chufy,...,Intentionally Blank,Intentionally Blank,Intentionally Blank,Intentionally Blank,Maia Bergman,ASTR the Label,ASTR the Label,Cynthia Rowley,Collina Strada,Collina Strada
brand_category,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,...,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown
name,TR Jamal Jumpsuit,TR Jiraff Skirt,TR Jiraffe Safari Dress,TR KAF ABAYA,TR Kaf Dress,TR KAF LONG ROBE,TR Kaf Shirt,TR Kangoo Long Kimono,TR Kangoo Short Kimono,TR Khuyana Blouse,...,PAPRIKA White Blouse,ORE Polka Dot Dress,FUCK IT Black Tie Dye,FUCK IT Clouds Tie Dye,Zoe (Jaguar),Micah Striped Midi Wrap Dress,Long Sleeve Draped Dress,Heather One piece,Wave Nonna Tote,Swirl Quilted Nonna Tote
details,,,,,,,,,,,...,,,,,,• 100% Viscose\n • Snap Button & Self Tie Clos...,• 95% Polyester / 5% Elastane\n • Lining: 100%...,,,
created_at,2020-08-05 17:20:33.966 UTC,2020-08-05 17:20:37.103 UTC,2020-08-05 17:20:40.186 UTC,2020-08-05 17:20:41.521 UTC,2020-08-05 17:20:44.512 UTC,2020-08-05 17:20:48.362 UTC,2020-08-05 17:20:51.763 UTC,2020-08-05 17:20:55.058 UTC,2020-08-05 17:20:59.589 UTC,2020-08-05 17:21:01.915 UTC,...,2020-08-05 17:38:06.156 UTC,2020-08-05 17:41:01.867 UTC,2020-08-05 17:53:01.566 UTC,2020-08-05 17:54:00.308 UTC,2020-08-05 18:48:41.808 UTC,2020-08-05 18:54:07.374 UTC,2020-08-05 19:01:49.079 UTC,2020-08-05 19:03:51.833 UTC,2020-08-05 19:12:52.07 UTC,2020-08-05 19:16:07.258 UTC
brand_canonical_url,https://under-our-sky.myshopify.com/products/c...,https://under-our-sky.myshopify.com/products/t...,https://under-our-sky.myshopify.com/products/t...,https://under-our-sky.myshopify.com/products/t...,https://under-our-sky.myshopify.com/products/c...,https://under-our-sky.myshopify.com/products/t...,https://under-our-sky.myshopify.com/products/c...,https://under-our-sky.myshopify.com/products/t...,https://under-our-sky.myshopify.com/products/c...,https://under-our-sky.myshopify.com/products/c...,...,https://intentionallyblank.myshopify.com/produ...,https://intentionallyblank.myshopify.com/produ...,https://intentionallyblank.myshopify.com/produ...,https://intentionallyblank.myshopify.com/produ...,https://maiabergman.myshopify.com/products/zoe...,https://astr-the-label.myshopify.com/products/...,https://astr-the-label.myshopify.com/products/...,https://cynthiarowley.myshopify.com/products/h...,https://collina-strada-2.myshopify.com/product...,https://collina-strada-2.myshopify.com/product...
description,There is something about jumpsuits that make y...,"""Memories of Kenya"" is inspired by the country...","""Memories of Kenya"" is inspired by the country...","Long Abaya with embroidered front, round and w...","The Kaf Dress with ""V"" neck is a favorite of D...",The Kaf Long Robe is an easy to wear style fea...,Forever searching for the perfect shirt. The K...,The Japanese collection is inspired by the vib...,The Japanese collection is inspired by the vib...,V-neck blouse with balloon sleeves.\nPrinted C...,...,MEET PAPRIKA: This item is one sized and best ...,MEET ORE: This item is one sized and best fits...,FUCK IT: WE USE DEAD STOCK SUPREME TEE SHIRTS ...,FUCK IT: WE USE DEAD STOCK SUPREME TEE SHIRTS ...,My very favourite Zoe dress gets the jungle tr...,"100% Viscose, Length: 50 7/8"" (Size M), Snap B...",Draped in all the right places! This flatterin...,,Large tote bag made from deadstock upholstery ...,Large quilted tote bag made from deadstock uph...
brand_description,There is something about jumpsuits that make y...,"""Memories of Kenya"" is inspired by the country...","""Memories of Kenya"" is inspired by the country...","Long Abaya with embroidered front, round and w...","The Kaf Dress with ""V"" neck is a favorite of D...",The Kaf Long Robe is an easy to wear style fea...,Forever searching for the perfect shirt. The K...,The Japanese collection is inspired by the vib...,The Japanese collection is inspired by the vib...,V-neck blouse with balloon sleeves.\n\nPrinted...,...,MEET PAPRIKA: This item is one sized and best ...,MEET ORE: This item is one sized and best fits...,FUCK IT: WE USE DEAD STOCK SUPREME TEE SHIRTS ...,FUCK IT: WE USE DEAD STOCK SUPREME TEE SHIRTS ...,My very favourite Zoe dress gets the jungle tr...,"100% Viscose, Length: 50 7/8"" (Size M), Snap B...",Draped in all the right places! This flatterin...,,Large tote bag made from deadstock upholstery ...,Large quilted tote bag made from deadstock uph...
brand_name,TR Jamal Jumpsuit,TR Jiraff Skirt,TR Jiraffe Safari Dress,TR KAF ABAYA,TR Kaf Dress,TR KAF LONG ROBE,TR Kaf Shirt,TR Kangoo Long Kimono,TR Kangoo Short Kimono,TR Khuyana Blouse,...,PAPRIKA White Blouse,ORE Polka Dot Dress,FUCK IT Black Tie Dye,FUCK IT Clouds Tie Dye,Zoe (Jaguar),Micah Striped Midi Wrap Dress,Long Sleeve Draped Dress,Heather One piece,Wave Nonna Tote,Swirl Quilted Nonna Tote


In [20]:
full_product_data.isna().sum()

product_id                 0
brand                      0
brand_category           459
name                       1
details                52155
created_at                 0
brand_canonical_url        0
description            10117
brand_description      10121
brand_name                 1
product_active             0
materialclothing       58578
style                  57598
primarycolor           57590
occasion               57600
category               57576
drycleanonly           58717
sizing                 58566
gender                 57598
fit                    58558
sleevelength           59414
product_category       24457
made_in_country        53372
dtype: int64

In [21]:
#for all null values, fill in unknown country

full_product_data.made_in_country = full_product_data.made_in_country.fillna("UNKNOWN_COUNTRY")

In [22]:
def make_singular_color(colors):
    val = []
    try:
        for color in colors.split(","):
            if color[-3:] == "ies":
                color = color[:-3]+'y'
            elif color[-1] == 's':
                color = color[:-1]
            val.append(color)
    except Exception as e:
        val = np.nan
    return val

In [23]:
full_product_data.primarycolor = full_product_data.primarycolor.apply(make_singular_color)

In [24]:
colors_re = r'\b(white|yellow|gray|brown|darkbrown|orange|red|green|black|silver|burgundy|lightbrown|gold|beige|pink|purple|blue|floral(?:s)?)\b'

In [25]:
def findColors(txt):
    """ Function to determine the color of item """
    
    val = []
    txt = str(txt)
    if re.findall(colors_re, txt, re.IGNORECASE ):
        val = re.findall(colors_re, txt, re.IGNORECASE )
    return val

In [26]:
'''
Create a temporary column to find up the color of the item
Set the color to be lowercase 

'''
full_product_data['colors_temp'] = full_product_data.description.apply(findColors) + full_product_data.name.apply(findColors)
full_product_data.colors_temp = full_product_data.colors_temp.apply(lambda x: set(y.lower() for y in x))
full_product_data.colors_temp = full_product_data.colors_temp.replace({""}, np.nan).replace(set(), np.nan)

In [27]:
full_product_data.primarycolor = full_product_data.primarycolor.combine_first(full_product_data.colors_temp)

In [28]:
full_product_data.primarycolor = full_product_data.primarycolor.apply(lambda x: (",").join(x) if type(x) == list or type(x) ==set else x )
full_product_data.primarycolor = full_product_data.primarycolor.fillna("UNKNOWN_COLOR")

In [29]:
all_materials = []
for materials in full_product_data.materialclothing.unique():
    try:
        all_materials.extend(materials.split(","))
    except Exception as e:
        print(e, materials)
all_materials = set(all_materials)

'float' object has no attribute 'split' nan


In [30]:
"|".join(all_materials)

'tweed|crepedechine|ramie|denim|chenille|woolblend|chambray|nylon|puresilk|synthetic|satincharmeuse|velvet|chiffon|linenblend|acetate|purecotton|purelinen|viscose|lyocell|rayon|cottonblend|acrylic|jerseyknit|modal|silkblend|fauxshearling|twill|elastane|corduroy|sateen|fauxfur|purewool|fauxleather|spandex|leather|mohair|suede|shearling|cashmereblend|alpaca|purecashmere|polyester|fleece|cupro'

In [31]:
materials_re = r'\b(chambray|denim|alpaca|silkblend|chenille|tweed|mohair|woolblend|elastane|purewool|lyocell|twill|ramie|fleece|polyester|cupro|puresilk|crepedechine|fauxleather|synthetic|purelinen|acetate|chiffon|fauxshearling|corduroy|satincharmeuse|leather|rayon|purecashmere|modal|velvet|shearling|fauxfur|sateen|cottonblend|cashmereblend|spandex|nylon|acrylic|purecotton|viscose|linenblend|jerseyknit|suede|silk|wool|leather|metal|metallic)\b'

In [32]:
def find_materials(txt):
    """ Function to determine the material of item """
    
    val = []
    txt = str(txt)
    if re.findall(materials_re, txt, re.IGNORECASE ):
        val = re.findall(materials_re, txt, re.IGNORECASE )
    return val

In [33]:
full_product_data['materials_temp'] = full_product_data.description.apply(find_materials) + full_product_data.name.apply(find_materials) + full_product_data.details.apply(find_materials) + full_product_data.description.apply(find_materials)
full_product_data.materials_temp = full_product_data.materials_temp.apply(lambda x: set(y.lower() for y in x))
full_product_data.materials_temp = full_product_data.materials_temp.replace({""}, np.nan).replace(set(), np.nan)

In [34]:
full_product_data.materialclothing = full_product_data.materialclothing.combine_first(full_product_data.materials_temp)

In [35]:
full_product_data.materialclothing = full_product_data.materialclothing.apply(lambda x: (",").join(x) if type(x) == list or type(x)==set else x)
full_product_data.materialclothing = full_product_data.materialclothing.fillna("UNKNOWN_MATERIAL")

In [36]:
'''
Function to perform stemming
'''
def stem_column(column):

    stp = set(stopwords.words("english")) 
    stemmer = porter.PorterStemmer()

    cleaned_stemmed_column = []
    for doc in column:
        doc = str(doc)
        tokens = nltk.word_tokenize(str.lower(doc))
        cleaned_stemmed_tokens = [stemmer.stem(token) for token in tokens if token not in stp]
        cleaned_stemmed_column.append(" ".join(cleaned_stemmed_tokens))
    return cleaned_stemmed_column

In [37]:
'''
Replace \n with white space 
Stem the full product details column
'''
full_product_data.details = full_product_data.details.str.replace("\n", " ")
full_product_data["details_stemmed"] = stem_column(full_product_data.details)

In [38]:
full_product_data.description = full_product_data.description.str.replace("\n", " ")
full_product_data["description_stemmed"] = stem_column(full_product_data.description)

In [39]:
brands.head(10).T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
brand_id,01ESKR0CH2KYC7KBNTN0S38EQA,01ESKR0CH2KYC7KBNTN0S38EQA,01ESKR0CH2KYC7KBNTN0S38EQA,01ESVD80YYAJQEQPADZ7HTHZXA,01EFJFZ329YVC8SK87R2BNJGV0,01EFJFYPRSP57FG4WC6NN3W4AX,01EGK48V5A9H3524T57YY6CHAP,01EGK48V5A9H3524T57YY6CHAP,01EGK48V5A9H3524T57YY6CHAP,01EFCRCM541YY74XDXQ3FJXQGD
brand,Mari Giudicelli,Mari Giudicelli,Mari Giudicelli,Warm,Vaara,Ancient Greek Sandals,Avavav,Avavav,Avavav,Misa
brand_value,Handmade / Artisan Crafted,Sustainable,Women Owned,,Emerging,Handmade / Artisan Crafted,Women Owned,Sustainable,Emerging,Emerging
bio,,,,,,Ancient Greek Sandals are handmade locally by ...,AV finds new ways of working within the tradit...,AV finds new ways of working within the tradit...,AV finds new ways of working within the tradit...,Guided by our core principle of effortless fem...
quote,,,,,,"A luxury, keep-forever version of the roadside...",WE GIVE NEW LIFE TO LEFTOVER FABRICS FROM THE ...,WE GIVE NEW LIFE TO LEFTOVER FABRICS FROM THE ...,WE GIVE NEW LIFE TO LEFTOVER FABRICS FROM THE ...,"Interesting textures and fabrications, at a fr..."
quote_attribute,,,,,,Vogue UK,"Linda Friberg, Co-Founder","Linda Friberg, Co-Founder","Linda Friberg, Co-Founder",Vogue
intro,,,,,,"According to an Ancient Greek myth, the gods a...","AVAVAV offers luxury, contemporary fashion at ...","AVAVAV offers luxury, contemporary fashion at ...","AVAVAV offers luxury, contemporary fashion at ...","In 2016, Shadi Askari-Farhat launched MISA, wh..."
lifestyle_copy,,,,,,"Our Thais in Natural under the sun, Get ready ...",...,...,...,All of our pieces are handmade in Los Angeles ...
short_bio,,,,,,Handmade leather sandals with traditional tech...,"A little bit of Sweden, A little bit of Italy ...","A little bit of Sweden, A little bit of Italy ...","A little bit of Sweden, A little bit of Italy ...","We embody a romantic heart, bohemian spirit an..."
listing_bio,Behold Mari Giudicelli! This Brazilian shoe de...,Behold Mari Giudicelli! This Brazilian shoe de...,Behold Mari Giudicelli! This Brazilian shoe de...,"Behold Warm! Founded by specialty store owner,...",,Behold Ancient Greek Sandals! For when you don...,"Behold Avavav! Florence-based design duo, Adam...","Behold Avavav! Florence-based design duo, Adam...","Behold Avavav! Florence-based design duo, Adam...","Behold Misa! Made in LA, these effortless bohe..."


In [40]:
full_product_data.to_csv("PreprocessedData.csv")