## Import libraries

In [1]:
import pandas as pd
import numpy as np 
from sklearn.manifold import TSNE

## Creating options (for dropdown menu later)

In [2]:
# Load the data 
df = pd.read_csv('body-clean.csv')

In [5]:
option_1 = df.Label.unique().tolist()

In [6]:
option_2 = df.columns[6:].tolist()
# all possible combinations for the option choices 

print(option_1)
print(option_2)

['soap', 'moisturizers', 'SPF', 'body', 'supplements']
['Combination', 'Dry', 'Normal', 'Oily']


In [7]:
# Example: Moisturizers for Dry skin

# Filter data by given options 
df2 = df[df['Label'] == 'moisturizers'][df['Dry'] == 1]
df2 = df2.reset_index() 

  after removing the cwd from sys.path.


In [8]:
df2.head(5)

Unnamed: 0,level_0,index,Label,brand,name,price,ingredients,Combination,Dry,Normal,Oily
0,124,127,moisturizers,JOSIE MARAN,Whipped Argan Oil Body Butter,['36'],-100 Percent Pure Argan Oil: Nature's richest ...,1,1,1,1
1,128,131,moisturizers,CAUDALIE,Vinosculpt Lift & Firm Body Cream,['45'],-Grape Polyphenols and Iris Extract: Provide a...,0,1,0,0
2,139,142,moisturizers,SOL DE JANEIRO,Samba 2-Step Foot Fetish Care,['27'],"-Cupuaçu Butter: Full of fatty-acids, phytoste...",1,1,1,1
3,144,147,moisturizers,KIEHL'S SINCE 1851,Creme de Corps Nourishing Dry Body Oil,['35'],-Grapeseed Oil: Rich in essential fatty acids ...,0,1,1,0
4,147,150,moisturizers,REN CLEAN SKINCARE,AHA Smart Renewal Body Serum,['42'],-Lactic Acid: A natural L(+) lactic acid produ...,1,1,1,1


## 1. Tokenizing ingredients list
## 2. Apply dimensionality reduction to reduce features

In [12]:
# tokenizing the list of ingredients in Ingredients column 
# after splitting into tokens, make a binary bag of words
# create dictionary with tokens, ingredient_idx 

In [9]:
def my_recommender(op_1, op_2): 
    new = df[df['Label'] == op_1][df[op_2] == 1]
    new = new.reset_index()
    
    # embedding each ingredient
    ingredient_idx = {}
    corpus = []
    idx = 0 
    
    for i in range(len(new)):
        ingred = new['ingredients'][i]
        ingred = ingred.lower()
        tokens = ingred.split(', ')
        corpus.append(tokens)
        
        # tokenized words put into corpus 
        
        for ingredient in tokens:
            if ingredient not in ingredient_idx:
                ingredient_idx[ingredient] = idx
                idx += 1
                
                # words put into dictionary 
                # checks duplication 
                # index increases by 1 
    
    # DTM (cosmetic-ingredient)
    # cosmetic product corresponds to a document
    # chemical composition corresponds to term 
    
    # number of items, tokens (M, N)
    M = len(new)
    N = len(ingredient_idx)
    
    # initialize matrix of zeros 
    A = np.zeros(shape = (M,N))
    
    # define one-hot encoder func
    # 1 if ingredient is in cosmetic, otherwise 0 
    def oh_enc(tokens):
        x = np.zeros(N)
        for t in tokens: 
            idx = ingredient_idx[t]
            # '1' at corresponding indices
            x[idx] = 1 
        return x 
    
    # apply oh_enc to tokens in corpus 
    # set values at each row of matrix 
    
    i = 0 
    for tokens in corpus:
        A[i, :]= oh_enc(tokens)
        i += 1
    
    # dimension reduction w t-SNE
    model = TSNE(n_components =2, learning_rate = 200)
    tsne_features = model.fit_transform(A) 
    
    # make X, Y columns
    new['X'] = tsne_features[:,0]
    new['Y'] = tsne_features[:,1]
    
    return new

In [10]:
# Create df for all combos 
combo_df = pd.DataFrame() 
for op_1 in option_1:
    for op_2 in option_2:
        temp = my_recommender(op_1,op_2)
        temp['Label'] = op_1 + '_' + op_2
        combo_df = pd.concat([combo_df, temp])

  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  


ValueError: Found array with 1 sample(s) (shape=(1, 6)) while a minimum of 2 is required.

## Save file

In [11]:
combo_df.to_csv('body_tsne.csv', encoding = 'utf-8-sig', index = False)

In [13]:
combo_df.head(5)

Unnamed: 0,level_0,index,Label,brand,name,price,ingredients,Combination,Dry,Normal,Oily,X,Y
0,0,0,soap_Combination,FIRST AID BEAUTY,KP Bump Eraser Body Scrub with 10% AHA,['28'],-Pumice Buffing Beads: Exfoliate particles to ...,1,0,0,0,-83.188995,-80.32206
1,6,7,soap_Combination,HERBIVORE,Coco Rose Exfoliating Body Scrub,['36'],-Virgin Coconut Oil: Provides intensive hydrat...,1,1,1,1,-84.823898,96.266724
2,19,20,soap_Combination,HERBIVORE,Coconut Milk Bath Soak,['18'],"-Coconut Milk Powder: Hydrates and soothes, le...",1,1,1,1,101.310699,107.358665
3,40,41,soap_Combination,MOROCCANOIL,Shower Gel,['30'],-Argan Oil: Extremely rich in tocopherols (vit...,1,1,1,1,-141.323334,7.435887
4,49,50,soap_Combination,CAUDALIE,Crushed Cabernet Scrub,['38'],-Crushed Grape Seeds and Brown Sugar: Exfoliat...,1,1,1,1,2.820092,-126.10321
