## 1. Importing the libraries

In [None]:
# import libraries
import pandas as pd
import numpy as np
import math as m
from sklearn.metrics.pairwise import cosine_similarity

from bokeh.io import show, curdoc, output_notebook, push_notebook
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, HoverTool, Select, Paragraph, TextInput
from bokeh.layouts import widgetbox, column, row
from ipywidgets import interact 

In [None]:
df = pd.read_csv('data/cosmetic_TSNE.csv')
df.head()

In [None]:
df.columns

In [None]:
# the 30 different combinations of options
df.Label.unique()

In [None]:
# cosmetic filtering options 
option_1 = ['Moisturizer', 'Cleanser', 'Treatment', 'Face Mask', 'Eye cream', 'Sun protect']
option_2 = ['Combination', 'Dry', 'Normal', 'Oily', 'Sensitive']

## 2. Mapping with Bokeh

In [None]:
output_notebook()

In [None]:
# make a source and scatter bokeh plot  
source = ColumnDataSource(df)
plot = figure(x_axis_label = 'T-SNE 1', y_axis_label = 'T-SNE 2', 
              width = 500, height = 400)
plot.circle(x = 'X', y = 'Y', source = source, 
            size = 10, color = '#FF7373', alpha = .8)

plot.background_fill_color = "beige"
plot.background_fill_alpha = 0.2

# add hover tool
hover = HoverTool(tooltips = [
        ('Item', '@name'),
        ('brand', '@brand'),
        ('Price', '$ @price'),
        ('Rank', '@rank')])
plot.add_tools(hover)

In [None]:
# define the callback
def update(op1 = option_1[0], op2 = option_2[0]):
    a_b = op1 + '_' + op2
    new_data = {
        'X' : df[df['Label'] == a_b]['X'],
        'Y' : df[df['Label'] == a_b]['Y'],
        'name' : df[df['Label'] == a_b]['name'],
        'brand' : df[df['Label'] == a_b]['brand'],
        'price' : df[df['Label'] == a_b]['price'],
        'rank' : df[df['Label'] == a_b]['rank'],
    }
    source.data = new_data
    push_notebook()    

In [None]:
# interact the plot with callback 
output_notebook()

interact(update, op1 = option_1, op2 = option_2)
show(plot, notebook_handle = True)

# 3. Cosine similarity

Myitem is [Peat Miracle Revital Cream](https://www.sephora.com/product/peat-miracle-revital-cream-P412440)

In [None]:
df_2 = df[df.Label == 'Moisturizer_Dry'].reset_index().drop('index', axis = 1)
df_2['dist'] = 0.0

myItem = df_2[df_2.name.str.contains('Peat Miracle Revital')]
myItem

In [None]:
# getting the array for myItem
P1 = np.array([myItem.X.values, myItem.Y.values]).reshape(1, -1)
P1

In [None]:
# cosine similarities with other items
for i in range(len(df_2)):
    P2 = np.array([df_2['X'][i], df_2['Y'][i]]).reshape(-1, 1)
    #print((P1 * P2).sum() / np.sum(abs(P1))*np.sum(abs(P2)))
    #break
    df_2["dist"].iloc[i]  = abs(((P1 * P2).sum() / np.sum(abs(P1))*np.sum(abs(P2))))
    #print((P1 * P2).sum() / (np.sum(abs(P1)))*(np.sum(abs(P2)))
    #df_2.dist = (P1 * P2).sum() / (abs(np.sum(P1))*abs(np.sum(P2)))
    #print((P1 * P2).sum() / np.sum(abs(P1))*np.sum(abs(P2)) )


In [None]:
#Top 5 closest cosmetic items like below.
df_2 = df_2.sort_values('dist')
df_2[['name', 'brand', 'dist']].head(5)

In [None]:
#Top 5 the worst choice
df_2 = df_2.sort_values('dist',ascending=False)
df_2[['name', 'brand', 'dist','rank']].head(5)