In [None]:
import pandas as pd
import numpy as np

In [None]:
#Load the cosmetics dataset
df=pd.read_csv("cosmetics.csv")


In [None]:
#inspect the dataset - display first few rows
display(df.head)


In [None]:
#Check the sructur of dataset and types of column
df.head()

In [None]:
#count the number of unique product categories in the  dataset
print(df['Label'].value_counts())

In [None]:
#Filter dataset for Moisturizer targeting dry skin  
moisturizers=df[df['Label']=='Moisturizer']
moisturizers_dry=moisturizers[moisturizers['Dry']==1].reset_index(drop=True)


In [None]:
#Tokenize the ingredients
corpus=[]
for product in moisturizers_dry['Ingredients']:
    tokens=product.lower().split(', ') #Split ingredients into individual components
    corpus.append(tokens)
    
    

In [None]:
#Display a sample of the tokenized ingredients
print(corpus[:2])

In [None]:
from sklearn.feature_extraction.text import CountVectorizer


In [None]:
#Create a document-term matrix
vectorizer=CountVectorizer(tokenizer=lambda x: x.split(', '))
dtm=vectorizer.fit_transform(moisturizers_dry['Ingredients'])

In [None]:
#convert the matrix to a dense array
ingredient_matrix=dtm.toarray()


In [None]:
#Get the feature names(Ingredients)
ingredients=vectorizer.get_feature_names_out()
print(ingredients[:10])#Display the first ten unique ingredients

In [None]:
from sklearn.manifold import TSNE

In [None]:
#Apply t-sne to reduce dimensions
model=TSNE(n_components=2,random_state=42)
tsne_features=model.fit_transform(ingredient_matrix)

In [None]:
#Add the t-sne features back to the dataframe
#Filter dataset for Moisturizer targeting dry skin  

moisturizers_dry['x']=tsne_features[:,0]# x-coordinate
moisturizers_dry['y']=tsne_features[:,1]# y-coordinate


In [None]:
from bokeh.plotting import figure,show,ColumnDataSource
from bokeh.models import HoverTool
from bokeh.io import output_notebook
output_notebook()

In [None]:
#Create a ColumnDataSource for Bokeh
source=ColumnDataSource(moisturizers_dry)

In [None]:
#Creat a scatterplot
plot=figure(title="Cosmtic Ingredient Similarity",
x_axis_label='T-SNE 1',y_axis_label='T-SNE 2',plot_width=300,plot_height=400)



In [None]:
# #Add circles to represent products
plot.circle(x='x',y='y',size=10,source=source,color="navy",alpha=0.6)


In [None]:
#Add hover tool to show product details
hover=HoverTool(tooltips=[("Product","@`Product Name`"),("Brand","@Brand"),("Price","@Price")])
plot.add_tools(hover)

In [None]:
show(plot)

In [None]:
#Adding hover funtionality (already implemented in the visualization code above)
plot.add_tools(HoverTool(tooltips=[("Brand","@Brand"),("Product","@`Product Name`"),("Price","@Price")]))

In [None]:
show(plot)

In [None]:
#Example of how we can analyze nearest products using distances
from sklearn.metrics.pairwise import cosine_similarity


In [None]:
#Calculate similarity between products
similarity_matrix=cosine_similarity(ingredient_matrix)

In [None]:
#function to find top similar products for a given product
def recommend_similar_product(product_idx,top_n=5):
    similarity_scores=similarity_matrix[product_idx]
    similar_indices=similarity_scores.argsort()[::-1][1:top_n+1]
    return moisturizers_dry.iloc[similar_indices][['Product Name','Brand','Price']]




In [None]:
#Example:Recommend proucts similar to the first product
recommend_similar_product(0)