# Recommender Model #1 
---
## Overview
-
-
-


## Imports

In [268]:
# Imports needed to build Recommender
import pandas as pd
import re
from sklearn.feature_extraction.text import TfidfVectorizer
import ipywidgets as widgets
from IPython.display import display
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

## Load in product data

In [269]:
# Load in data (only product info)
prod = pd.read_csv('../data/Clean-ALL_reviews.csv', lineterminator='\n', low_memory=False , 
                   usecols=['product_id','product_name','brand_name','category','rating','price_usd'])
prod.head()

Unnamed: 0,rating,product_id,product_name,brand_name,price_usd,category
0,5,P504322,Gentle Hydra-Gel Face Cleanser,NUDESTIX,19,Cleansers
1,1,P420652,Lip Sleeping Mask Intense Hydration with Vitam...,LANEIGE,24,Lip Balms & Treatments
2,5,P420652,Lip Sleeping Mask Intense Hydration with Vitam...,LANEIGE,24,Lip Balms & Treatments
3,5,P420652,Lip Sleeping Mask Intense Hydration with Vitam...,LANEIGE,24,Lip Balms & Treatments
4,5,P420652,Lip Sleeping Mask Intense Hydration with Vitam...,LANEIGE,24,Lip Balms & Treatments


## Feature Engineering

In [270]:
# Create new column that combines the brand and product name 
prod['brand_product_name'] = prod['brand_name'] + ' ' + prod['product_name']
prod['brand_product_name']

0                    NUDESTIX Gentle Hydra-Gel Face Cleanser
1          LANEIGE Lip Sleeping Mask Intense Hydration wi...
2          LANEIGE Lip Sleeping Mask Intense Hydration wi...
3          LANEIGE Lip Sleeping Mask Intense Hydration wi...
4          LANEIGE Lip Sleeping Mask Intense Hydration wi...
                                 ...                        
1092732    StriVectin Multi Action Clear Acne Clearing Tr...
1092733    StriVectin Multi Action Clear Acne Clearing Tr...
1092734    StriVectin Multi Action Clear Acne Clearing Tr...
1092735    StriVectin Multi Action Clear Acne Clearing Tr...
1092736    StriVectin Multi Action Clear Acne Clearing Tr...
Name: brand_product_name, Length: 1092737, dtype: object

In [271]:
# Drop old columns (not needed for this model)
#prod.drop(columns=['brand_name', 'product_name'], inplace=True)

## Creating the search engine for our model

### Clean product name

In [272]:
# Create a function that cleans the product's name using regex to simplify search
def clean_product_name(product):
    return re.sub("[^a-zA-Z0-9!%^&()\+\'\"]", "", product)

In [273]:
# Create new column for the clean product brand name
#prod['clean_brand_product_name'] = prod['brand_product_name'].apply(clean_product_name)

In [274]:
# Create new column for the clean product brand name
prod['clean_product_name'] = prod['product_name'].apply(clean_product_name)

### Use TFIDF to convert names into numbers

In [275]:
# Convert product name into numbers
# Instantiate vectorizer and set nrgram to (1,3) so it looks at unigrams, bigrams, and trigrams
vectorizer = TfidfVectorizer(ngram_range=(1,3))

# Convert set of numbers into matrix
tfidf = vectorizer.fit_transform(prod['clean_product_name'])

### Compute similarities btwn entered term and all of the products

   
# Claculate similairy between search term and product name
def search(product):
    product = clean_product_name(product)
    query_vec= vectorizer.transform([product])
    similarity = cosine_similarity(query_vec, tfidf).flatten()
    indices = np.argpartition(similarity, -5)[-5:]
    results = prod.iloc[indices].iloc[::-1]
    
    return results

In [276]:
# Create a function that calculates similarity btwn search term and product name
def search(product):
    product= clean_product_name(product)
    query_vec = vectorizer.transform([product])
    similarity =cosine_similarity(query_vec, tfidf).flatten()
    indices =np.argsort(similarity)[-5:][::-1]
    results = prod.iloc[indices]
    return results

### Create interactive search box

In [277]:
# Input widget that receives a text
product_input = widgets.Text(
                value='',# default 
                description='Search:')
# Output widget that displays results

product_list = widgets.Output()

# function that is called when text is written in box
def on_type(data):
    with product_list:
        product_list.clear_output()
        product = data['new']
        if len(product)>4:
            display(search(product))

product_input.observe(on_type, names='value')
            
display(product_input, product_list)

Text(value='', description='Search:')

Output()

### Loading in ratings from similar products

In [278]:
# Load in ratings
ratings = pd.read_csv('../data/Clean-ALL_reviews.csv', lineterminator='\n',low_memory=False,
                     usecols=['rating', 'loves_count', 'product_id','author_id', 'is_recommended'])
ratings.head()

Unnamed: 0,author_id,rating,is_recommended,product_id,loves_count
0,1741593524,5,1,P504322,177
1,31423088263,1,0,P420652,1081315
2,5061282401,5,1,P420652,1081315
3,6083038851,5,1,P420652,1081315
4,47056667835,5,1,P420652,1081315


In [280]:
# Find authors who like same products
similar_users= ratings[(ratings["product_id"] == prod_id) & (ratings["rating"] > 4)]["author_id"].unique()
similar_user_recs = ratings[(ratings["author_id"].isin(similar_users)) & (ratings["rating"] > 4)]["product_id"]

similar_user_recs = similar_user_recs.value_counts() / len(similar_users)

similar_user_recs = similar_user_recs[similar_user_recs > .10]

In [281]:
all_users = ratings[(ratings["product_id"].isin(similar_user_recs.index)) & (ratings["rating"] > 4)]


In [282]:

all_user_recs = all_users["product_id"].value_counts() / len(all_users["author_id"].unique())

In [283]:
rec_percentages = pd.concat([similar_user_recs, all_user_recs], axis=1)
rec_percentages.columns = ["similar", "all"]

In [284]:

rec_percentages

Unnamed: 0,similar,all
P420652,1.015279,1.015279


In [285]:
rec_percentages["score"] = rec_percentages["similar"] / rec_percentages["all"]

In [286]:
rec_percentages = rec_percentages.sort_values("score", ascending=False)

In [287]:
rec_percentages.merge(prod, left_index=True, right_on="product_id").head(10)

Unnamed: 0,similar,all,score,rating,product_id,product_name,brand_name,price_usd,category,brand_product_name,clean_product_name
1,1.015279,1.015279,1.0,1,P420652,Lip Sleeping Mask Intense Hydration with Vitam...,LANEIGE,24,Lip Balms & Treatments,LANEIGE Lip Sleeping Mask Intense Hydration wi...,LipSleepingMaskIntenseHydrationwithVitaminC
2,1.015279,1.015279,1.0,5,P420652,Lip Sleeping Mask Intense Hydration with Vitam...,LANEIGE,24,Lip Balms & Treatments,LANEIGE Lip Sleeping Mask Intense Hydration wi...,LipSleepingMaskIntenseHydrationwithVitaminC
3,1.015279,1.015279,1.0,5,P420652,Lip Sleeping Mask Intense Hydration with Vitam...,LANEIGE,24,Lip Balms & Treatments,LANEIGE Lip Sleeping Mask Intense Hydration wi...,LipSleepingMaskIntenseHydrationwithVitaminC
4,1.015279,1.015279,1.0,5,P420652,Lip Sleeping Mask Intense Hydration with Vitam...,LANEIGE,24,Lip Balms & Treatments,LANEIGE Lip Sleeping Mask Intense Hydration wi...,LipSleepingMaskIntenseHydrationwithVitaminC
5,1.015279,1.015279,1.0,4,P420652,Lip Sleeping Mask Intense Hydration with Vitam...,LANEIGE,24,Lip Balms & Treatments,LANEIGE Lip Sleeping Mask Intense Hydration wi...,LipSleepingMaskIntenseHydrationwithVitaminC
6,1.015279,1.015279,1.0,2,P420652,Lip Sleeping Mask Intense Hydration with Vitam...,LANEIGE,24,Lip Balms & Treatments,LANEIGE Lip Sleeping Mask Intense Hydration wi...,LipSleepingMaskIntenseHydrationwithVitaminC
7,1.015279,1.015279,1.0,5,P420652,Lip Sleeping Mask Intense Hydration with Vitam...,LANEIGE,24,Lip Balms & Treatments,LANEIGE Lip Sleeping Mask Intense Hydration wi...,LipSleepingMaskIntenseHydrationwithVitaminC
8,1.015279,1.015279,1.0,5,P420652,Lip Sleeping Mask Intense Hydration with Vitam...,LANEIGE,24,Lip Balms & Treatments,LANEIGE Lip Sleeping Mask Intense Hydration wi...,LipSleepingMaskIntenseHydrationwithVitaminC
9,1.015279,1.015279,1.0,5,P420652,Lip Sleeping Mask Intense Hydration with Vitam...,LANEIGE,24,Lip Balms & Treatments,LANEIGE Lip Sleeping Mask Intense Hydration wi...,LipSleepingMaskIntenseHydrationwithVitaminC
10,1.015279,1.015279,1.0,5,P420652,Lip Sleeping Mask Intense Hydration with Vitam...,LANEIGE,24,Lip Balms & Treatments,LANEIGE Lip Sleeping Mask Intense Hydration wi...,LipSleepingMaskIntenseHydrationwithVitaminC


In [292]:
def find_similar_products(prod_id):
    similar_users= ratings[(ratings["product_id"] == prod_id) & (ratings["rating"] > 4)]["author_id"].unique()
    similar_user_recs = ratings[(ratings["author_id"].isin(similar_users)) & (ratings["rating"] > 4)]["product_id"] 
    similar_user_recs = similar_user_recs.value_counts() / len(similar_users)
    similar_user_recs = similar_user_recs[similar_user_recs > .10]
    all_users = ratings[(ratings["product_id"].isin(similar_user_recs.index)) & (ratings["rating"] > 4)]
    all_user_recs = all_users["product_id"].value_counts() / len(all_users["author_id"].unique())
    rec_percentages = pd.concat([similar_user_recs, all_user_recs], axis=1)
    rec_percentages.columns = ["similar", "all"]
    rec_percentages["score"] = rec_percentages["similar"] / rec_percentages["all"]
    rec_percentages = rec_percentages.sort_values("score", ascending=False)
    return rec_percentages.merge(prod, left_index=True, right_on="product_id").head(10)[["product_name", "brand_name", "price_usd",'category']]


In [296]:
product_name_input = widgets.Text(
    value='Search',
    description='Product:',
    disabled=False
)
recommendation_list = widgets.Output()

def on_type(data):
    with recommendation_list:
        recommendation_list.clear_output()
        product = data["new"]
        if len(product) > 5:
            results = search(product)
            product_id = results.iloc[0]["product_id"]
            display(find_similar_products(product_id))

product_name_input.observe(on_type, names='value')

display(product_name_input, recommendation_list)

Text(value='Search', description='Product:')

Output()