# Recommender Model for Skincare Products
---
## Overview
-
-
-


*This recommender model was inspired and adapted from Vik Paruchuri's recommender model for movies.*

*YouTube video:* https://www.youtube.com/watch?v=eyEabQRBMQA

*GitHub repository:* https://github.com/dataquestio/project-walkthroughs/blob/master/movie_recs/movie_recommendations.ipynb 


## Imports

In [17]:
# Imports needed to build recommender
import pandas as pd
import numpy as np
import re
from sklearn.feature_extraction.text import TfidfVectorizer
import ipywidgets as widgets
from IPython.display import display
from sklearn.metrics.pairwise import cosine_similarity

## Load in product data

In [18]:
# Load in data (only product info)
prod = pd.read_csv('../data/Clean-ALL_reviews.csv', lineterminator='\n', low_memory=False , 
                   usecols=['product_id','product_name','brand_name','category','rating','size', 'price_usd'])
prod.head()

Unnamed: 0,rating,product_id,product_name,brand_name,price_usd,size,category
0,5,P504322,Gentle Hydra-Gel Face Cleanser,NUDESTIX,19,2.4 oz / 70 ml,Cleansers
1,1,P420652,Lip Sleeping Mask Intense Hydration with Vitam...,LANEIGE,24,0.7 oz/ 20 g,Lip Balms & Treatments
2,5,P420652,Lip Sleeping Mask Intense Hydration with Vitam...,LANEIGE,24,0.7 oz/ 20 g,Lip Balms & Treatments
3,5,P420652,Lip Sleeping Mask Intense Hydration with Vitam...,LANEIGE,24,0.7 oz/ 20 g,Lip Balms & Treatments
4,5,P420652,Lip Sleeping Mask Intense Hydration with Vitam...,LANEIGE,24,0.7 oz/ 20 g,Lip Balms & Treatments


In [19]:
# Check out how many different products there are in this dataset
prod['product_name'].nunique()

2334

## Feature Engineering

In [20]:
# Create new column that combines the brand and product name 
prod['brand_product_name'] = prod['brand_name'] + ' ' + prod['product_name']
prod['brand_product_name']

0                    NUDESTIX Gentle Hydra-Gel Face Cleanser
1          LANEIGE Lip Sleeping Mask Intense Hydration wi...
2          LANEIGE Lip Sleeping Mask Intense Hydration wi...
3          LANEIGE Lip Sleeping Mask Intense Hydration wi...
4          LANEIGE Lip Sleeping Mask Intense Hydration wi...
                                 ...                        
1092732    StriVectin Multi Action Clear Acne Clearing Tr...
1092733    StriVectin Multi Action Clear Acne Clearing Tr...
1092734    StriVectin Multi Action Clear Acne Clearing Tr...
1092735    StriVectin Multi Action Clear Acne Clearing Tr...
1092736    StriVectin Multi Action Clear Acne Clearing Tr...
Name: brand_product_name, Length: 1092737, dtype: object

## Creating the search engine for our model

### Clean product name

In [21]:
# Create a function that cleans the product's name to simplify search
def clean_product_name(product):
    special_chars = '!@#$%^&*()+='
    for i in special_chars:
        product = product.replace(i, '')
    product = ' '.join(product.split())
    return product.lower()


In [22]:
# Check that function works
clean_product_name('Smooth + FAB )Travel Size Duo')

'smooth fab travel size duo'

In [23]:
# Create new column for the clean product name
prod['clean_product_name'] = prod['product_name'].apply(clean_product_name)

### Use TFIDF to convert names into numbers

In [24]:
# Convert product name into numbers
# Instantiate vectorizer and set nrgram to (1,3) so it looks at unigrams, bigrams, and trigrams
vectorizer = TfidfVectorizer(ngram_range=(1,3))

# Convert set of numbers into matrix
tfidf = vectorizer.fit_transform(prod['clean_product_name'])

### Compute similarities btwn entered term and all of the products

In [25]:
# Create a function that calculates similarity btwn search term and product name, and returns results
# Function code adapted from #https://www.youtube.com/watch?v=eyEabQRBMQA&t=671s, https://github.com/dataquestio/project-walkthroughs/blob/master/movie_recs/movie_recommendations.ipynb

def search(product):
    # apply the cleaning fucntion to the product that is entered 
    product = clean_product_name(product)
    
    # Use tfidf to convert search term into nums
    search_term_num = vectorizer.transform([product])
    
    # Use cosine_similarity to find the similarities between search term and clean product name, results will be return in 1-dimensional array with flatten()
    similarity = cosine_similarity(search_term_num, tfidf).flatten()
    # Use argsort to return the indices of a sorted array
    indices =np.argsort(similarity)[-10:][::-1]
    
    # Create a list comp that filters the indicies so it only returns unique product names
    unique_prods = []
    filtered_index = [i for i in indices if (prod.iloc[i]['product_name'] not in unique_prods) and not unique_prods.append(prod.iloc[i]['product_name'])]
    results = prod.iloc[filtered_index]
    
    return results

In [26]:
search('Gentle Hydra-Gel Face Cleanser')

Unnamed: 0,rating,product_id,product_name,brand_name,price_usd,size,category,brand_product_name,clean_product_name
0,5,P504322,Gentle Hydra-Gel Face Cleanser,NUDESTIX,19,2.4 oz / 70 ml,Cleansers,NUDESTIX Gentle Hydra-Gel Face Cleanser,gentle hydra-gel face cleanser
1076716,4,P504524,Milky Jelly Gentle Gel Face Cleanser,Glossier,19,6 oz / 177 mL,Cleansers,Glossier Milky Jelly Gentle Gel Face Cleanser,milky jelly gentle gel face cleanser


### Create interactive search box

In [27]:
# Create search engine 
# Code adapted from https://www.youtube.com/watch?v=eyEabQRBMQA&t=671s, https://github.com/dataquestio/project-walkthroughs/blob/master/movie_recs/movie_recommendations.ipynb
# Input widget that receives a text
product_input = widgets.Text(
                value='',
                placeholder = 'Enter a product name',
                 description='Search:')

# Output widget that displays results
product_list = widgets.Output()

# function that is called when text is written in box
def on_type(data):
    with product_list:
        product_list.clear_output()
        product = data['new']
        if len(product)>3:
            display(search(product))

product_input.observe(on_type, names='value')
            
display(product_input, product_list)

Text(value='', description='Search:', placeholder='Enter a product name')

Output()

### Loading in ratings from similar products

In [28]:
# Load in ratings
ratings = pd.read_csv('../data/Clean-ALL_reviews.csv', lineterminator='\n',low_memory=False,
                     usecols=['rating', 'loves_count', 'product_id','author_id', 'is_recommended'])
ratings.head()

Unnamed: 0,author_id,rating,is_recommended,product_id,loves_count
0,1741593524,5,1,P504322,177
1,31423088263,1,0,P420652,1081315
2,5061282401,5,1,P420652,1081315
3,6083038851,5,1,P420652,1081315
4,47056667835,5,1,P420652,1081315


In [29]:
round(ratings['loves_count'].describe(),2)

count    1092737.00
mean      106744.30
std       167544.72
min            0.00
25%        16632.00
50%        48739.00
75%       123439.00
max      1081315.00
Name: loves_count, dtype: float64

In [30]:
def find_similar_products(prod_id): 
    
    # Find users that like same product 
    alike_users = ratings[(ratings["product_id"] == prod_id) & (ratings["is_recommended"] == 1)]["author_id"].unique()
    # Find products that alike users recommend
    alike_user_recs= ratings[(ratings["author_id"].isin(alike_users)) & (ratings["is_recommended"] == 1)]["product_id"]

    alike_user_recs = alike_user_recs.value_counts() / len(alike_users)

    alike_user_recs= alike_user_recs[alike_user_recs > .10]

    all_users = ratings[(ratings["product_id"].isin(alike_user_recs.index)) & (ratings['is_recommended']==1)]

    all_user_recs=  all_users["product_id"].value_counts() / len(all_users["author_id"].unique())

    rec_percentages = pd.concat([alike_user_recs, all_user_recs], axis=1)
    rec_percentages.columns = ["similar", "all"]
    rec_percentages["score"] = rec_percentages["similar"] / rec_percentages["all"]
    rec_percentages = rec_percentages.sort_values("score", ascending=False)
    results = rec_percentages.merge(prod, left_index=True, right_on="product_id").head(10)[['brand_product_name','price_usd','similar']]
    results = results[['brand_product_name', 'price_usd','similar']].drop_duplicates()

    return results

In [31]:
product_name_input = widgets.Text(
    value='Search',
    description='Product:',
    disabled=False
)
recommendation_list = widgets.Output()

def on_type(data):
    with recommendation_list:
        recommendation_list.clear_output()
        product = data["new"]
        #if len(product) > 5:
        results = search(product)
        product_id = results.iloc[0]["product_id"]
        results = find_similar_products(product_id)
        display(results)

product_name_input.observe(on_type, names='value')

display(product_name_input, recommendation_list)

Text(value='Search', description='Product:')

Output()