In [1]:
import pandas as pd
import numpy as np 
from sklearn.manifold import TSNE

In [2]:
# Load the data 
cosm_2 = pd.read_csv('cosmetic_p.csv')

In [24]:
option_2 = ['Combination','Dry','Normal','Oily','Sensitive']
print(option_2)

['Combination', 'Dry', 'Normal', 'Oily', 'Sensitive']


In [23]:
option_1 = cosm_2.Label.unique().tolist()
# look through Label column
# unique values in list 

option_2 = cosm_2.columns[6:].tolist()
# all possible combinations for the option choices 

print(option_1)
print(option_2)

['Moisturizer', 'Cleanser', 'Treatment', 'Mask', 'Eye', 'SPF']
['Combination', 'Dry', 'Full', 'Light', 'Matte', 'Medium', 'Natural', 'Normal', 'Oily', 'Radiant', 'Sensitive']


In [36]:
# Example: Moisturizers for Dry skin

ex_opt1 = cosm_2.Label.unique().tolist()
ex_opt2 = ['Combination','Dry','Normal','Oily','Sensitive']

# Filter data by given options 
df = cosm_2[cosm_2['Label'] == 'Moisturizer'][cosm_2['Dry'] == 1]
df = df.reset_index() 

  import sys


In [29]:
# tokenizing the list of ingredients in Ingredients column 
# after splitting into tokens, make a binary bag of words
# create dictionary with tokens, ingredient_idx 

def my_recommender(op_1, op_2): 
    df = cosm_2[cosm_2['Label'] == op_1][cosm_2[op_2] == 1]
    df = df.reset_index()
    
    # embedding each ingredient
    ingredient_idx = {}
    corpus = []
    idx = 0 
    
    for i in range(len(df)):
        ingred = df['ingredients'][i]
        ingred = ingred.lower()
        tokens = ingred.split(', ')
        corpus.append(tokens)
        
        # tokenized words put into corpus 
        
        for ingredient in tokens:
            if ingredient not in ingredient_idx:
                ingredient_idx[ingredient] = idx
                idx += 1
                
                # words put into dictionary 
                # checks duplication 
                # index increases by 1 
    
    # DTM (cosmetic-ingredient)
    # cosmetic product corresponds to a document
    # chemical composition corresponds to term 
    
    # number of items, tokens (M, N)
    M = len(df)
    N = len(ingredient_idx)
    
    # initialize matrix of zeros 
    A = np.zeros(shape = (M,N))
    
    # define one-hot encoder func
    # 1 if ingredient is in cosmetic, otherwise 0 
    def oh_enc(tokens):
        x = np.zeros(N)
        for t in tokens: 
            idx = ingredient_idx[t]
            # '1' at corresponding indices
            x[idx] = 1 
        return x 
    
    # apply oh_enc to tokens in corpus 
    # set values at each row of matrix 
    
    i = 0 
    for tokens in corpus:
        A[i, :]= oh_enc(tokens)
        i += 1
    
    # dimension reduction w t-SNE
    model = TSNE(n_components =2, learning_rate = 200)
    tsne_features = model.fit_transform(A) 
    
    # make X, Y columns
    df['X'] = tsne_features[:,0]
    df['Y'] = tsne_features[:,1]
    
    return df

In [30]:
# Create df for all combos 
df_all = pd.DataFrame() 
for op_1 in option_1:
    for op_2 in option_2:
        temp = my_recommender(op_1,op_2)
        temp['Label'] = op_1 + '_' + op_2
        df_all = pd.concat([df_all, temp])

  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  


In [31]:
df_all.to_csv('cosmetic_tsne.csv', encoding = 'utf-8-sig', index = False)

In [37]:
df_all.sample(5)

Unnamed: 0,index,Label,brand,name,price,rank,ingredients,Combination,Dry,Full,Light,Matte,Medium,Natural,Normal,Oily,Radiant,Sensitive,X,Y
70,666,Treatment_Normal,TATA HARPER,Rejuvenating Serum,110,3.7,-Buckbean Extract: Helps promote a visibly fir...,1,1,0,0,0,0,0,1,1,0,1,-65.381439,8.259336
127,748,Treatment_Oily,PERRICONE MD,Essential Fx Acyl-Glutathione Deep Crease Serum,179,4.6,-Acyl-Glutathione\n-Flaxseed Oil\n-Chia Seed O...,1,1,0,0,0,0,0,1,1,0,1,-29.95936,61.692757
34,1138,Eye_Sensitive,LANCÔME,Rénergie Lift Multi-Action Eye,75,3.9,-Caffeine\n-Hyaluronic Acid\n-Shea Butter\n\nW...,1,1,0,0,0,0,0,1,1,0,1,6.729945,-128.341095
130,1293,Eye_Normal,ESTÉE LAUDER,DayWear Eye Cooling Anti-Oxidant Moisture GelC...,40,3.7,Daywear Eye Jelly Division: El (Estee Lauder)I...,1,1,0,0,0,0,0,1,1,0,0,39.573494,-137.422592
54,1166,Eye_Sensitive,LANCER,Eye Contour Lifting Cream with Diamond Powder,95,4.4,-Alfalfa Seed Extract and Lupine Protein: Rich...,1,1,0,0,0,0,0,1,1,0,1,40.90023,-30.014637
