In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

import os
from scipy.sparse import coo_matrix

In [3]:
pd.set_option('display.max_colwidth', None)

In [4]:
train_data = pd.read_csv('/content/clean_data.csv')

In [5]:
train_data.columns

Index(['Unnamed: 0', 'ID', 'ProdID', 'Rating', 'ReviewCount', 'Category',
       'Brand', 'Name', 'ImageURL', 'Description', 'Tags'],
      dtype='object')

# Rating-Based Recommender System

In [6]:
average_ratings = train_data.groupby(['Name','ReviewCount','Brand','ImageURL'])['Rating'].mean().reset_index()

In [7]:
top_rated_items = average_ratings.sort_values(by='Rating', ascending=False)

rating_base_recommendation = top_rated_items.head(10)

In [8]:
rating_base_recommendation['Rating'] = rating_base_recommendation['Rating'].astype(int)
rating_base_recommendation['ReviewCount'] = rating_base_recommendation['ReviewCount'].astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rating_base_recommendation['Rating'] = rating_base_recommendation['Rating'].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rating_base_recommendation['ReviewCount'] = rating_base_recommendation['ReviewCount'].astype(int)


In [23]:
print("Rating Base Recommendation System: (Trending Products)")
rating_base_recommendation[['Name','Rating','ReviewCount','Brand','ImageURL']] = rating_base_recommendation[['Name','Rating','ReviewCount','Brand','ImageURL']]
rating_base_recommendation[['Name', 'Rating']]

Rating Base Recommendation System: (Trending Products)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rating_base_recommendation[['Name','Rating','ReviewCount','Brand','ImageURL']] = rating_base_recommendation[['Name','Rating','ReviewCount','Brand','ImageURL']]


Unnamed: 0,Name,Rating
1027,"Comfort Bath Heavyweight Aloe Cleansing Washcloths 7900 Pack of 8, Clean Scent",5
1851,"Jojoba Oil, 100% Pure Golden, Organic, Unrefined, Cold Pressed",5
1897,Kevin Murphy Balancing Wash Daily Shampoo 8.4 Oz,5
1894,Kerastase Resistance Fondant Extentioniste Conditioner - 6.8oz / 200ml,5
376,"Air Wick Freshmatic Refill Automatic Spray, Paradise Retreat, 6.17oz, Air Freshener",5
233,3 Pack - Sure Anti-Perspirant Deodorant Original Solid Fresh & Cool Scent 2.70 oz,5
2523,New York Color Nyc Ultra Last Lip Wear,5
1881,"Karma Organic Nail Polish; Non-Toxic, Vegan, and Cruelty-Free (PISTACHIO ICE CREAM)",5
1877,KMS Hair Play Liquid Wax - Size : 3.3 oz,5
1855,Juicy Couture Perfumed Soap 5.25oz/150g New,5


# Content-Based recommender system


In [10]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix_content = tfidf_vectorizer.fit_transform(train_data['Tags'])
cosine_similarities_content = cosine_similarity(tfidf_matrix_content,tfidf_matrix_content)

In [11]:
item_name = 'OPI Infinite Shine, Nail Lacquer Nail Polish, Bubble Bath'
item_index = train_data[train_data['Name']==item_name].index[0]

In [12]:
similar_items = list(enumerate(cosine_similarities_content[item_index]))

In [13]:
similar_items = sorted(similar_items, key=lambda x:x[1], reverse=True)
top_similar_items = similar_items[1:10]

recommended_items_indics = [x[0] for x in top_similar_items]

In [14]:
train_data.iloc[recommended_items_indics][['Name','ReviewCount','Brand']]

Unnamed: 0,Name,ReviewCount,Brand
155,OPI Nail Lacquer Polish .5oz/15mL - This Gown Needs A Crown NL U11,0.0,opi
183,"OPI Nail Gel Polish GelColor .5oz/15mL 3 CT Combo - Base, Top & Dont Toot My Flute GC P34",0.0,opi
203,"OPI Nail Lacquer - Dont Bossa Nova Me Around - NL A60, 0.5 Fl Oz",0.0,opi
234,"OPI Infinite Shine 2 Polish - ISL P33 - Alpaca My Bags Nail Polish, Women, 0.5oz",5.0,opi
318,OPI Gel Polish Fall 2019 Scotland Collection GCU18 Rub-a-Pub-Pub 0.5 oz,1.0,opi
366,"OPI Nail Gel Polish GelColor .5oz/15mL 3 CT Combo - Base, Top & Tagus in That Selfie! L18",1.0,opi
392,"OPI Nail Polish, Strawberry Margarita, 0.5 Fl Oz",57.0,opi
671,"OPI Nail Gel Polish GelColor .5oz/15mL 3 CT Combo - Base, Top & Sun, Sea, and Sand in My Pants L23",1.0,opi
820,OPI- Nail Lacquer-GelColor - &quotLiv&quotin the Gray -.5 FL OZ,0.0,opi


In [15]:
def content_based_recommendations(train_data, search_string, top_n=10):

    formatted_search_string = ', '.join(search_string.split())


    matching_items = train_data[train_data['Tags'].str.contains(formatted_search_string, case=False, na=False)]

    if matching_items.empty:
        print(f"No items found containing the string '{search_string}' in their tags.")
        return pd.DataFrame()

    tfidf_vectorizer = TfidfVectorizer(stop_words='english')

    tfidf_matrix_content = tfidf_vectorizer.fit_transform(train_data['Tags'])

    cosine_similarities_content = cosine_similarity(tfidf_matrix_content, tfidf_matrix_content)

    matching_item_indices = matching_items.index.tolist()

    all_recommendations = pd.DataFrame()

    for item_index in matching_item_indices:

        similar_items = list(enumerate(cosine_similarities_content[item_index]))

        similar_items = sorted(similar_items, key=lambda x: x[1], reverse=True)

        top_similar_items = similar_items[1:top_n+1]

        recommended_item_indices = [x[0] for x in top_similar_items]

        recommended_items_details = train_data.iloc[recommended_item_indices][['Name', 'ReviewCount', 'Brand', 'ImageURL', 'Rating']]

        all_recommendations = pd.concat([all_recommendations, recommended_items_details])

    return all_recommendations.drop_duplicates().head(top_n)



In [24]:
search_string = input("Input key-word: ")  # Input from the backend based on user's most seatched key-words
content_based_rec = content_based_recommendations(train_data, search_string, top_n=8).sort_values(by='Rating', ascending=False)

if not content_based_rec.empty:
    print(content_based_rec['Name'])
else:
    print("No recommendations found.")


Input key-word: hair color
2767          Clairol Nice n Easy Permanent Hair Color Creme 5 Medium Brown, 1 Application
2879    Clairol Nice n Easy Permanent Hair Color Creme 5C Medium Cool Brown, 1 Application
3851      Clairol Nice n Easy Permanent Hair Color Creme 6A Light Ash Brown, 1 Application
3253    Clairol Nice n Easy Permanent Hair Color Creme 8A Medium Ash Blonde, 1 Application
614             Clairol nice n easy permanent hair color 4/120 natural dark brown, 1.0 kit
3894                                               Nice N Easy 112a Reddish Brown Hair Clr
3729       Nice n Easy Permanent Color, Natural Light Golden Brown [116A] 1 ea (Pack of 4)
719             Clairol Nicen Easy Permanent Hair Color Crème 6 Light Brown, 1 Application
Name: Name, dtype: object


# Collaborative Filtering (User Item Similarity)

In [25]:
def collaborative_filtering_recommendations(train_data, target_user_id, top_n=10):

    user_item_matrix = train_data.pivot_table(index='ID', columns='ProdID', values='Rating', aggfunc='mean').fillna(0)

    user_similarity = cosine_similarity(user_item_matrix)

    target_user_index = user_item_matrix.index.get_loc(target_user_id)

    user_similarities = user_similarity[target_user_index]

    similar_users_indices = user_similarities.argsort()[::-1][1:]

    recommended_items = []

    for user_index in similar_users_indices:

        rated_by_similar_user = user_item_matrix.iloc[user_index]
        not_rated_by_target_user = (rated_by_similar_user == 0) & (user_item_matrix.iloc[target_user_index] == 0)

        recommended_items.extend(user_item_matrix.columns[not_rated_by_target_user][:top_n])

    recommended_items_details = train_data[train_data['ProdID'].isin(recommended_items)][['Name', 'ReviewCount', 'Brand', 'ImageURL', 'Rating']]

    return recommended_items_details.head(10)

In [28]:
target_user_id = 4
collaborative_filtering_rec = collaborative_filtering_recommendations(train_data, target_user_id)
collaborative_filtering_rec['Name']

Unnamed: 0,Name
2,"Clairol Nice N Easy Permanent Color 7/106A Natural Dark Neutral Blonde, 1.0 KIT"
3,"Kokie Professional Matte Lipstick, Hot Berry, 0.14 fl oz"
4,"Gillette TRAC II Plus Razor Blade Refills, Fit TRAC II Handles, 10 ct"
15,"Clairol Natural Instincts Demi-Permanent Hair Color Crème 6R Light Auburn, 1 Application"
24,Hempz Milk & Honey Herbal Body Moisturizer 2.25 oz.
30,2 Pack - AVEENO Active Naturals Intense Relief Hand Cream 3.50 oz Each
33,"DenTek Kids Fun Flossers, Removes Food & Plaque, 75 Count, 3 Pack"
44,"(4 pack) Crest Pro-Health Sensitive & Enamel Shield Toothpaste, 4.6 oz"
58,"Dr. Hauschka Revitalizing Day Cream (Formerly Moisturizing Day Cream), 3.4-Ounce Box"
60,"COVERGIRL Exhibitionist Cream Lipstick, 395 Darling Kiss, 0.12 oz"
