# Recommendation System for Skincare Products
---
## Overview
This recommender model was inspired and adapted from Vik Paruchuri's recommender model for movies.

*YouTube video:* https://www.youtube.com/watch?v=eyEabQRBMQA

*GitHub repository:* https://github.com/dataquestio/project-walkthroughs/blob/master/movie_recs/movie_recommendations.ipynb 

- Data containing product and rating information was loaded
- Product and brand names were cleaned for further processing
- A search function was implemented using cosine similarity and TF-IDF for finding similar products
- A recommendation function was built to calculate recommendation scores based on user preferences and product popularity
- An interactive recommendation widget was created 

## Imports

In [538]:
# Imports needed to build recommender
import pandas as pd
import numpy as np
import re
from sklearn.feature_extraction.text import TfidfVectorizer
import ipywidgets as widgets
from IPython.display import display
from sklearn.metrics.pairwise import cosine_similarity

## Load in product/rating data

In [554]:
# Load in data
prod = pd.read_csv('../data/Clean-ALL_reviews.csv', lineterminator='\n', low_memory=False , 
                   usecols=['category','rating','size','price_usd', 'loves_count', 'product_id','author_id', 'is_recommended', 'brand_name', 'product_name'])
prod.head()

Unnamed: 0,author_id,rating,is_recommended,product_id,product_name,brand_name,price_usd,loves_count,size,category
0,1741593524,5,1,P504322,Gentle Hydra-Gel Face Cleanser,NUDESTIX,19,177,2.4 oz / 70 ml,Cleansers
1,31423088263,1,0,P420652,Lip Sleeping Mask Intense Hydration with Vitam...,LANEIGE,24,1081315,0.7 oz/ 20 g,Lip Balms & Treatments
2,5061282401,5,1,P420652,Lip Sleeping Mask Intense Hydration with Vitam...,LANEIGE,24,1081315,0.7 oz/ 20 g,Lip Balms & Treatments
3,6083038851,5,1,P420652,Lip Sleeping Mask Intense Hydration with Vitam...,LANEIGE,24,1081315,0.7 oz/ 20 g,Lip Balms & Treatments
4,47056667835,5,1,P420652,Lip Sleeping Mask Intense Hydration with Vitam...,LANEIGE,24,1081315,0.7 oz/ 20 g,Lip Balms & Treatments


In [540]:
# Check out how many different products there are in this dataset
prod['product_name'].nunique()

2334

## Feature Engineering

In [544]:
# Create new column that combines the brand and product name 
prod['brand_product_name'] = prod['brand_name'] + ' ' + prod['product_name']
prod['brand_product_name']

0                    NUDESTIX Gentle Hydra-Gel Face Cleanser
1          LANEIGE Lip Sleeping Mask Intense Hydration wi...
2          LANEIGE Lip Sleeping Mask Intense Hydration wi...
3          LANEIGE Lip Sleeping Mask Intense Hydration wi...
4          LANEIGE Lip Sleeping Mask Intense Hydration wi...
                                 ...                        
1092732    StriVectin Multi Action Clear Acne Clearing Tr...
1092733    StriVectin Multi Action Clear Acne Clearing Tr...
1092734    StriVectin Multi Action Clear Acne Clearing Tr...
1092735    StriVectin Multi Action Clear Acne Clearing Tr...
1092736    StriVectin Multi Action Clear Acne Clearing Tr...
Name: brand_product_name, Length: 1092737, dtype: object

## Create search function

### Clean brand product name

In [545]:
# Create a function that cleans the product's name to simplify search, remove special characters
def clean_product_name(product):
    special_chars = '!@#$%^&*()+='
    for i in special_chars:
        product = product.replace(i, '')
    product = ' '.join(product.split())
    return product.lower()

In [548]:
# Create new column for the clean brand product name
prod['clean_brand_product_name'] = prod['brand_product_name'].apply(clean_product_name)

### Use TFIDF to convert names into numbers

In [549]:
# Convert product name into numbers for search engine
# Instantiate vectorizer and set nrgram to (1,3) so it looks at unigrams, bigrams, and trigrams
vectorizer = TfidfVectorizer(ngram_range=(1,3))

# Convert set of numbers into matrix
tfidf = vectorizer.fit_transform(prod['clean_brand_product_name'])

### Compute similarities between entered term and all of the products

In [550]:
# Create a search function
def search(product):
    
    product = clean_product_name(product)
    search_term_num = vectorizer.transform([product])
    similarity = cosine_similarity(search_term_num, tfidf).flatten()# numpy vector
    indices =np.argpartition(similarity, -1000)[-1000:]# Finds the 1000 most similar products to our search term
    results = prod.iloc[indices][::-1] # Inverse the order of results
    unique_prods = []  # Create a list comp that filters the indicies so it only returns unique product names
    filtered_index = [i for i in indices if (prod.iloc[i]['product_name'] not in unique_prods) and not unique_prods.append(prod.iloc[i]['product_name'])]
    results = prod.iloc[filtered_index]
    
    return results

In [551]:
search('The ordinary cleanser')

Unnamed: 0,rating,product_id,product_name,brand_name,price_usd,loves_count,size,category,brand_product_name,clean_brand_product_name
989090,1,P442757,"""B"" Oil",The Ordinary,11,49358,1 oz/ 30 mL,Moisturizers,"The Ordinary ""B"" Oil","the ordinary ""b"" oil"
707043,5,P444718,Squalane Cleanser,The Ordinary,9,121486,1.7 oz/ 50 mL,Cleansers,The Ordinary Squalane Cleanser,the ordinary squalane cleanser


In [552]:
search('summer fridays mask')

Unnamed: 0,rating,product_id,product_name,brand_name,price_usd,loves_count,size,category,brand_product_name,clean_brand_product_name
141004,3,P429952,Jet Lag Mask,Summer Fridays,49,245435,2.25 oz/ 64 g,Masks,Summer Fridays Jet Lag Mask,summer fridays jet lag mask
850786,5,P440504,R + R Mask,Summer Fridays,54,47304,2.25 oz/ 64 g,Masks,Summer Fridays R + R Mask,summer fridays r r mask


### Load ratings

In [582]:
ratings = prod[['rating', 'product_id','author_id', 'is_recommended', 'product_name']]
ratings.head()

Unnamed: 0,rating,product_id,author_id,is_recommended,product_name
0,5,P504322,1741593524,1,Gentle Hydra-Gel Face Cleanser
1,1,P420652,31423088263,0,Lip Sleeping Mask Intense Hydration with Vitam...
2,5,P420652,5061282401,1,Lip Sleeping Mask Intense Hydration with Vitam...
3,5,P420652,6083038851,1,Lip Sleeping Mask Intense Hydration with Vitam...
4,5,P420652,47056667835,1,Lip Sleeping Mask Intense Hydration with Vitam...


## Create interactive recommendation widget

In [583]:
# Create a function that finds similair products/brands based on users who have tried the same product and recommend it 
def find_similar_products(product_id):
    
    alike_users = ratings[(ratings["product_id"] == product_id) &(ratings["is_recommended"] == 1)]["author_id"].unique()
    
    alike_users_rec= ratings[(ratings["author_id"].isin(alike_users)) &(ratings["is_recommended"] == 1)]["product_id"].value_counts() / len(alike_users)
    alike_users_rec = alike_users_rec[alike_users_rec > 0.029]
    
    all_users= ratings[(ratings["product_id"].isin(alike_users_rec.index))&(ratings["is_recommended"] == 1)]
    
    all_user_recs =all_users["product_id"].value_counts() / len(all_users["author_id"].unique())
    
    results= pd.concat([alike_users_rec, all_user_recs], axis=1, keys=["similar", "all"])
    results["score"] = results["similar"] / results["all"]
    results = results.sort_values("score", ascending=False).head(8).merge(prod, left_index=True, right_on="product_id")[["product_name", "price_usd", "brand_name"]]
    results = results.drop_duplicates(subset=["product_name", "brand_name"])
    return results

In [584]:
# Create interactive recommendation widget
product_name_input = widgets.Text(
    value='',
    placeholder = 'Search',
    description='Product:'
)

recommendation_list = widgets.Output()

def on_type(data):
    with recommendation_list:
        recommendation_list.clear_output()
        product = data["new"]
        if len(product) > 3:
            results = search(product)
            product_id = results.iloc[0]["product_id"]
            results = find_similar_products(product_id)
            display(results)

product_name_input.observe(on_type, names='value')

display(product_name_input, recommendation_list)

Text(value='', description='Product:', placeholder='Search')

Output()

**Inputs to test out**
- Lip Balm
- Sunscreen
- Eye Cream
- Pore Mask
- Acne 