## LUSH Recommender (Content based) - Item to Item

In [1]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
df_lush = pd.read_excel(r"Data\LushDataset.xlsx")

lush_ingredients = df_lush[['Name', 'Ingredients']]

In [4]:
vectorizer = TfidfVectorizer(stop_words = "english", min_df=2)

TF_IDF_matrix = vectorizer.fit_transform(lush_ingredients['Ingredients'])

In [5]:
similarities = cosine_similarity(TF_IDF_matrix, dense_output=False)

def content_recommender(name, lush_ingredients, similarities) :
    
    article_position = lush_ingredients[lush_ingredients['Name'] == name].index[0]
    
    sim_df = pd.DataFrame(
        {'name': lush_ingredients['Name'], 
         'similarity': np.array(similarities[article_position, :].todense()).squeeze()
        })

    top_articles = sim_df.sort_values(by='similarity', ascending=False).head(4).tail(3)
    
    return top_articles

In [6]:
def magic(name):
    if name not in df_lush['Name'].values:
        print('not found')
        return None
    
    similar_articles = content_recommender(name, lush_ingredients, similarities)
    similar_articles_description = pd.merge(similar_articles, 
                                            df_lush, 
                                            left_on='name', 
                                            right_on='Name', 
                                            how='left')

    similar_articles_description = similar_articles_description[['name', 'Type', 'similarity', 'Price']]
    return similar_articles_description.sort_values(by='similarity', ascending=False)

In [7]:
magic('Minamisoma')

Unnamed: 0,name,Type,similarity,Price
0,Purity & Clarity,Massage Bar,0.762592,13.0
1,Coco Loco,Naked Shower Oil,0.317748,10.5
2,Scrubee,Body Butter,0.316269,11.5
