In [None]:
import pandas as pd
df = pd.read_csv('sephora_website_dataset.csv')

# Filter products based on the specified ingredients
ingredients = ["avobenzone", "ecamsule", "oxybenzone", "octocrylene", "zinc oxide", "titanium dioxide", "octinoxate"]
df_filtered = df[df["ingredients"].str.contains('|'.join(ingredients), case=False)]

# Use TF-IDF to vectorize the ingredients of the products
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(df_filtered["ingredients"])

from sklearn.metrics.pairwise import cosine_similarity
cosine_similarities = cosine_similarity(tfidf_matrix)


from sklearn.neighbors import NearestNeighbors
k = 3
neigh = NearestNeighbors(n_neighbors=k, metric='cosine')
neigh.fit(tfidf_matrix)
top_k_indexes = neigh.kneighbors(tfidf_matrix, return_distance=False)


# Define the target products for which recommendations need to be generated
filtered_target_products = df_filtered[df_filtered["name"].str.contains("sunscreen|spf", case=False)]
filtered_target_products = filtered_target_products.sort_values(by=['rating', 'number_of_reviews'], ascending=False)

# Get user input for the product name
product_name = input("Enter a product name: ")

# Get the index of the first filtered target product
index = filtered_target_products.index[0]

# Get the top k similar products
top_k_indexes = neigh.kneighbors(tfidf_matrix[index], n_neighbors=k+1, return_distance=False)

# Remove the index of the current product from the list of similar products
top_k_indexes = top_k_indexes[0][1:]

# Get the recommended products based on the indexes
recommended_products = df_filtered.iloc[top_k_indexes]

# Sort the recommended products by rating and number of reviews
recommended_products = recommended_products.sort_values(by=['rating', 'number_of_reviews'], ascending=False)

# Print the recommended products
print('\nProduct Name:', filtered_target_products.iloc[0]['name'])
print('Rating:', filtered_target_products.iloc[0]['rating'])
print('Number of Reviews:', filtered_target_products.iloc[0]['number_of_reviews'])
print('Recommended products:')
for index, recommended_product in recommended_products.iterrows():
    print('- ' + recommended_product['name'])



In [20]:
df_filtered.shape


(3480, 21)

In [9]:
tfidf_matrix.shape

(21, 21)

In [34]:
cosine_similarities.shape

(3480, 3480)

In [39]:
recommended_products

Unnamed: 0,id,brand,category,name,size,rating,number_of_reviews,love,price,value_price,...,MarketingFlags,MarketingFlags_content,options,details,how_to_use,ingredients,online_only,exclusive,limited_edition,limited_time_offer
7078,1507367,SEPHORA COLLECTION,Foundation,Perfection Mist Airbrush Foundation,2.5 oz/ 74 mL,4.5,2000,81000,28.0,28.0,...,True,exclusive,no options,What it is: An airbrush- mist foundation- with...,Suggested Usage:\n-Shake well for five seconds...,Isobutane- Water- Cyclopentasiloxane- Butane- ...,0,1,0,0
6582,2189371,philosophy,Face Primer,anti-wrinkle miracle worker primer+ line-corre...,0.9 oz/ 26 mL,4.5,54,1300,38.0,38.0,...,False,0,no options,What it is: A line-correcting primer. \nSkin ...,Suggested Usage:-Apply a thin- even layer over...,Cyclopentasiloxane- Dimethicone Crosspolymer- ...,0,0,0,0
8520,2311272,Urban Decay,Concealer,Stay Naked Correcting Concealer Mini,2.5 g,4.5,2,1900,14.0,14.0,...,False,0,no options,What it is: An ultra-pigmented- lightweight c...,Suggested Usage:-The patented- wave-tip applic...,Water- Cyclopentasiloxane- Butylene Glycol- Si...,0,0,0,0
747,1229384,Benefit Cosmetics,Eye Primer,Stay Don’t Stray 360 Degree Stay Put Eyeshadow...,0.33 oz/ 10 mL,4.0,2000,29000,26.0,26.0,...,False,0,no options,What it is:\nA primer for concealers and eyesh...,Suggested Usage:\n-Apply three tiny dots under...,Water (Aqua)- Isododecane- Titanium Dioxide (C...,0,0,0,0
1409,2310514,Charlotte Tilbury,Moisturizers,Magic Cream Moisturizer - Lunar New Year Limit...,no size,3.5,2,1900,100.0,100.0,...,True,limited edition · online only,no options,What it is: An award-winning hydrating moistu...,Suggested Usage:-Apply Magic Cream to your fin...,Aqua/Water/Eau- Homosalate- Glyceryl Stearate ...,1,0,1,0
