In [1]:
# import libraries
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

from bokeh.io import show, curdoc, output_notebook, push_notebook
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, HoverTool, Select, Paragraph, TextInput
from bokeh.layouts import widgetbox, column, row
from ipywidgets import interact 

In [2]:
df = pd.read_csv('../data/cosmetic_TSNE.csv')
df.head()

Unnamed: 0,Label,Name,brand,price,rank,ingredients,Combination,Dry,Normal,Oil,X,Y
0,moisturizer_Combination,Revealer Skin-Improving Foundation SPF25 with ...,Kosas,42,4.0,"Zinc Oxide (7.5%), Water, Ethylhexyl Olivate, ...",1,1,1,1,1.377293,-2.710624
1,moisturizer_Combination,Plum Plump Hyaluronic Acid Moisturizer,Glow Recipe,39,4.5,"Water/Aqua/Eau, Propanediol, Glycerin, C13-15 ...",1,1,1,1,4.284342,-3.638289
2,moisturizer_Combination,The Water Cream Oil-Free Pore Minimizing Moist...,Tatcha,69,4.0,"Water, Saccharomyces/Camellia Sinensis Leaf/Cl...",1,1,1,1,1.072965,-4.175159
3,moisturizer_Combination,Vitamin Enriched Face Base Priming Moisturizer,Bobbi Brown,64,4.5,"Water, Cyclopentasiloxane, Bis-Diglyceryl Poly...",1,0,1,1,-1.305105,1.498212
4,moisturizer_Combination,Ultra Repair® Cream Intense Hydration,First Aid Beauty,38,4.5,"Colloidal Oatmeal 0.50%, Water, Stearic Acid, ...",1,1,1,0,2.768894,-5.801363


In [3]:
df.Label.unique()

array(['moisturizer_Combination', 'moisturizer_Dry', 'moisturizer_Normal',
       'moisturizer_Oil', 'cleanser_Combination', 'cleanser_Dry',
       'cleanser_Normal', 'cleanser_Oil', 'face_treatment_Combination',
       'face_treatment_Dry', 'face_treatment_Normal',
       'face_treatment_Oil', 'face_mask_Combination', 'face_mask_Dry',
       'face_mask_Normal', 'face_mask_Oil', 'eye_treatment_Combination',
       'eye_treatment_Dry', 'eye_treatment_Normal', 'eye_treatment_Oil',
       'sunscreen_Combination', 'sunscreen_Dry', 'sunscreen_Normal',
       'sunscreen_Oil'], dtype=object)

In [4]:
df.columns

Index(['Label', 'Name', 'brand', 'price', 'rank', 'ingredients', 'Combination',
       'Dry', 'Normal', 'Oil', 'X', 'Y'],
      dtype='object')

In [5]:
option_1 =['moisturizer', 'cleanser', 'face_treatment', 'face_mask', 'eye_treatment', 'sunscreen']
option_2 = ['Combination', 'Dry', 'Normal', 'Oil']

# Mapping with Bokeh

In [6]:
output_notebook()

In [7]:
# make a source and scatter bokeh plot  
source = ColumnDataSource(df)
plot = figure(x_axis_label = 'T-SNE 1', y_axis_label = 'T-SNE 2', 
              width = 500, height = 400)
plot.circle(x = 'X', y = 'Y', source = source, 
            size = 10, color = '#FF7373', alpha = .8)

plot.background_fill_color = "beige"
plot.background_fill_alpha = 0.2

# add hover tool
hover = HoverTool(tooltips = [
        ('Item', '@Name'),
        ('brand', '@brand'),
        ('Price', '$ @price'),
        ('Rank', '@rank')])
plot.add_tools(hover)

In [8]:
def update(op1 = option_1[0], op2 = option_2[0]):
    a_b = op1 + '_' + op2
    new_data = {
        'X' : df[df['Label'] == a_b]['X'],
        'Y' : df[df['Label'] == a_b]['Y'],
        'name' : df[df['Label'] == a_b]['Name'],
        'brand' : df[df['Label'] == a_b]['brand'],
        'price' : df[df['Label'] == a_b]['price'],
        'rank' : df[df['Label'] == a_b]['rank'],
    }
    source.data = new_data
    push_notebook()

In [9]:
output_notebook()

interact(update, op1 = option_1, op2 = option_2)
show(plot, notebook_handle = True)

interactive(children=(Dropdown(description='op1', options=('moisturizer', 'cleanser', 'face_treatment', 'face_…

# Cosine similarity

Taking exam of Peat Miracle Revital Cream

In [10]:
df_2 = df.loc[df.Label == 'moisturizer_Dry'].reset_index().drop('index', axis = 1)
df_2['dist'] = 0.0
myItem = df_2[df_2.Name.str.contains('Peat Miracle Revital Cream')]
myItem

Unnamed: 0,Label,Name,brand,price,rank,ingredients,Combination,Dry,Normal,Oil,X,Y,dist
287,moisturizer_Dry,Peat Miracle Revital Cream,belif,58,4.5,"Water, Dipropylene Glycol, Glycerin, Caprylic/...",1,1,1,1,-0.917154,-7.641208,0.0


In [11]:
# getting the array for myItem
P1 = np.array([myItem.X.values, myItem.Y.values]).reshape(1, -1)

In [12]:
# cosine similarities with other items
for i in range(len(df_2)):
    P2 = np.array([df_2['X'][i], df_2['Y'][i]]).reshape(-1, 1)
    df_2.dist[i] = cosine_similarity(P1, P2.reshape(1,2))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2.dist[i] = cosine_similarity(P1, P2.reshape(1,2))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2.dist[i] = cosine_similarity(P1, P2.reshape(1,2))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2.dist[i] = cosine_similarity(P1, P2.reshape(1,2))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2.d

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2.dist[i] = cosine_similarity(P1, P2.reshape(1,2))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2.dist[i] = cosine_similarity(P1, P2.reshape(1,2))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2.dist[i] = cosine_similarity(P1, P2.reshape(1,2))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2.d

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2.dist[i] = cosine_similarity(P1, P2.reshape(1,2))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2.dist[i] = cosine_similarity(P1, P2.reshape(1,2))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2.dist[i] = cosine_similarity(P1, P2.reshape(1,2))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2.d

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2.dist[i] = cosine_similarity(P1, P2.reshape(1,2))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2.dist[i] = cosine_similarity(P1, P2.reshape(1,2))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2.dist[i] = cosine_similarity(P1, P2.reshape(1,2))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2.d

In [13]:
df_2 = df_2.sort_values('dist', ascending=False)
df_2[['Name', 'brand', 'dist']].head(6)

Unnamed: 0,Name,brand,dist
287,Peat Miracle Revital Cream,belif,1.0
396,CC+ Airbrush Perfecting Powder,IT Cosmetics,0.99999
212,Black Label Detox BB Beauty Balm SPF 30,Dr. Jart+,0.999954
370,Bye Bye Redness Neutralizing Color-Correcting ...,IT Cosmetics,0.999944
188,+Retinol Vitamin C Moisturizer,Kate Somerville,0.999917
288,NightWear Plus Anti-Oxidant Night Detox Moistu...,Estée Lauder,0.999838


These are the top 5 items that are similar to the product type Moisturizer with name Peat Miracle Revital Cream and skin type dry with respect to the ingredients