# Content Based Recommendation System

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity 

In [2]:
products_df = pd.read_csv('Data/products_forvectorizingboolean.csv')

In [3]:
products_df.duplicated().sum()

1124

In [4]:
products_df.drop_duplicates(inplace=True)

In [5]:
products_df.duplicated().sum()

0

In [6]:
products_df.dropna(axis = 0, subset=['description'], inplace=True)

In [7]:
products_df

Unnamed: 0,description,title,brand,rank,asin,price,maincat_Luggage & Travel Gear,maincat_Backpacks,maincat_Novelty & More,maincat_Clothing,...,subcat_Shoes,subcat_Handbags & Wallets,subcat_Girls,subcat_Boys,"subcat_Shoe, Jewelry & Watch Accessories",subcat_Jewelry Accessories,subcat_Shoe Care & Accessories,subcat_Contemporary & Designer,subcat_Travel Accessories,"subcat_Surf, Skate & Street"
0,The Hottest Bag in Town! Brand: Anello Conditi...,Japan Anello Backpack Unisex PINK BEIGE LARGE ...,Anello,3994472.0,0204444454,70.00,True,True,False,False,...,False,False,False,False,False,False,False,False,False,False
1,The Hottest Bag in Town! Brand: Anello Conditi...,Japan Anello Backpack Unisex BLACK LARGE PU LE...,Anello,635761.0,0204444403,65.99,True,True,False,False,...,False,False,False,False,False,False,False,False,False,False
2,Brand New. Hat Centre Length: adult about 8cm...,bettyhome Unisex Adult Winter Spring Thicken C...,bettyhome,5061041.0,0206313535,18.99,False,False,True,True,...,False,False,False,False,False,False,False,False,False,False
3,Please allow 1-2cm dimension deviation. 100% b...,bettyhome Womens Lace Short Sleeves Top Printi...,bettyhome,10635107.0,0206335962,23.99,False,False,False,True,...,False,False,False,False,False,False,False,False,False,False
4,2 Way Shoulder Handle Polyester Canvas Boston ...,Japan Anello LARGE CAMO 2 Way Unisex Shoulder ...,Anello,1615335.0,024444448X,65.33,True,True,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
680975,Get maximum protection from the elements. Clif...,Kaenon Cliff Sunglasses - Select Color,Kaenon,1908928.0,B01HJH6SA2,199.00,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
680976,A classic wingtip with a subtle twist that cov...,Deer Stags Men's Hampden Oxford,Unknown,956501.0,B01HJH7W0W,52.38,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
680977,Our classy but sexy strappy sheath cocktail wi...,Laundry by Shelli Segal Women's Fitted Strappy...,Unknown,3633844.0,B01HJI0G5Y,18.99,False,False,False,True,...,False,False,False,False,False,False,False,False,False,False
680978,Size Length Hip*2 Age Advice 70 39.5 CM 30 CM ...,Newborn Baby Girl Bodysuit Lace Floral Romper ...,Hotone,1671980.0,B01HJHR8A6,4.99,False,False,False,True,...,False,False,False,False,False,False,False,False,False,False


In [8]:
products_df.columns[products_df.columns.str.startswith('maincat_')]

Index(['maincat_Luggage & Travel Gear', 'maincat_Backpacks',
       'maincat_Novelty & More', 'maincat_Clothing', 'maincat_Women',
       'maincat_Baby', 'maincat_Baby Boys', 'maincat_Men', 'maincat_Maternity',
       'maincat_Baby Girls', 'maincat_Traditional & Cultural Wear',
       'maincat_Accessories', 'maincat_Jewelry',
       'maincat_Costumes & Accessories', 'maincat_Watches',
       'maincat_Kids & Baby', 'maincat_Uniforms, Work & Safety',
       'maincat_Shoes', 'maincat_Handbags & Wallets', 'maincat_Girls',
       'maincat_Boys', 'maincat_Shoe, Jewelry & Watch Accessories',
       'maincat_Jewelry Accessories', 'maincat_Shoe Care & Accessories',
       'maincat_Contemporary & Designer', 'maincat_Travel Accessories',
       'maincat_Surf, Skate & Street'],
      dtype='object')

## Using Cosine Similarity 

In [9]:
products_df['title'].iloc[0]

'Japan Anello Backpack Unisex PINK BEIGE LARGE PU LEATHER Rucksack School Bag Campus'

The TF IDF matrix would be too large to work with, so lets break up the dataframe into categories 
Lets do a test on one category

In [29]:
products_backpacks = products_df[products_df['maincat_Backpacks'] == True]
products_backpacks

Unnamed: 0,description,title,brand,rank,asin,price,maincat_Luggage & Travel Gear,maincat_Backpacks,maincat_Novelty & More,maincat_Clothing,...,subcat_Shoes,subcat_Handbags & Wallets,subcat_Girls,subcat_Boys,"subcat_Shoe, Jewelry & Watch Accessories",subcat_Jewelry Accessories,subcat_Shoe Care & Accessories,subcat_Contemporary & Designer,subcat_Travel Accessories,"subcat_Surf, Skate & Street"
0,The Hottest Bag in Town! Brand: Anello Conditi...,Japan Anello Backpack Unisex PINK BEIGE LARGE ...,Anello,3994472.0,0204444454,70.00,True,True,False,False,...,False,False,False,False,False,False,False,False,False,False
1,The Hottest Bag in Town! Brand: Anello Conditi...,Japan Anello Backpack Unisex BLACK LARGE PU LE...,Anello,635761.0,0204444403,65.99,True,True,False,False,...,False,False,False,False,False,False,False,False,False,False
4,2 Way Shoulder Handle Polyester Canvas Boston ...,Japan Anello LARGE CAMO 2 Way Unisex Shoulder ...,Anello,1615335.0,024444448X,65.33,True,True,False,False,...,False,False,False,False,False,False,False,False,False,False
116,This classic backpack features a unique square...,Clava Square Backpack,Clava,13753935.0,B000078ORA,96.99,True,True,False,False,...,False,False,False,False,False,False,False,False,False,False
118,Kipling's Alcatraz II is new and improved with...,Kipling Alcatraz Ii Wheeled Laptop Backpack,Kipling,2069453.0,B000078QUR,21.41,True,True,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
679432,"Specifically designed for girls,lovely pattern...",Dog Pawprint Cat Fingerprint Backpack for Elem...,MIFULGOO,155356.0,B01HGSLJKI,4.58,True,True,False,False,...,False,False,False,False,False,False,False,False,False,False
679992,Kid's Teenage Mutant Ninja Turtles Out of the ...,Teenage Mutant Ninja Turtles Movie Out of The ...,Unknown,2683371.0,B01HHDG7L8,13.99,True,True,False,False,...,False,False,False,False,False,False,False,False,False,False
680159,Withmystyle provide the latest trends and styl...,Casual Canvas Fashion Shool Backpack,Generic,1659770.0,B01HHNCBH2,11.95,True,True,False,False,...,False,False,False,False,False,False,False,False,False,False
680500,Black & white canvas backpack from Fall Out Bo...,Fall Out Boy Mint Anchor Print Backpack,Unknown,1584668.0,B01HIGYDYW,39.90,True,True,False,False,...,False,False,False,False,False,False,False,False,False,False


In [30]:
vectorizer =TfidfVectorizer(stop_words='english', min_df=5, lowercase=True)

TF_IDF_matrix = vectorizer.fit_transform(products_backpacks['description'])

In [31]:
TF_IDF_matrix.shape

(2089, 2001)

In [32]:
similarities = cosine_similarity(TF_IDF_matrix, dense_output=False)

In [33]:
product_index = products_backpacks\
    [products_backpacks['title'] == 'Japan Anello Backpack Unisex PINK BEIGE LARGE PU LEATHER Rucksack School Bag Campus']\
    .index

sim_df = pd.DataFrame(
    {
        'product_name': products_backpacks['title'],
        'similarity': np.array(similarities[product_index, :].todense()).squeeze()
    }
)

In [34]:
sim_df.sort_values(by='similarity', ascending=False).head(10)

Unnamed: 0,product_name,similarity
0,Japan Anello Backpack Unisex PINK BEIGE LARGE ...,1.0
1,Japan Anello Backpack Unisex BLACK LARGE PU LE...,1.0
4,Japan Anello LARGE CAMO 2 Way Unisex Shoulder ...,0.690685
630122,Women Girls Ladies Backpack Fashion Shoulder B...,0.31454
208215,SHENGXILU Women's/Lady's PU Leather Backpack G...,0.254706
474496,Soft PU Backpack School Bag Travel Bag Frozen ...,0.25386
405784,Soft PU Backpack Children's School Bag Travel ...,0.25386
407721,New Fashion Gold /Silver School Travel Gym Sho...,0.252468
254817,Brixton Men's Carson Backpack,0.249343
390980,"Peppa Pig Large 16"" School Backpack(purple)",0.247869


In [None]:
products_df['title'].loc[407721]

'New Fashion Gold /Silver School Travel Gym Shoulder Bag Backpack'

---

In [58]:
products_women = products_df[products_df['maincat_Women'] == True]
products_women

Unnamed: 0,description,title,brand,rank,asin,price,maincat_Luggage & Travel Gear,maincat_Backpacks,maincat_Novelty & More,maincat_Clothing,...,subcat_Shoes,subcat_Handbags & Wallets,subcat_Girls,subcat_Boys,"subcat_Shoe, Jewelry & Watch Accessories",subcat_Jewelry Accessories,subcat_Shoe Care & Accessories,subcat_Contemporary & Designer,subcat_Travel Accessories,"subcat_Surf, Skate & Street"
2,Brand New. Hat Centre Length: adult about 8cm...,bettyhome Unisex Adult Winter Spring Thicken C...,bettyhome,5061041.0,0206313535,18.99,False,False,True,True,...,False,False,False,False,False,False,False,False,False,False
3,Please allow 1-2cm dimension deviation. 100% b...,bettyhome Womens Lace Short Sleeves Top Printi...,bettyhome,10635107.0,0206335962,23.99,False,False,False,True,...,False,False,False,False,False,False,False,False,False,False
13,Veneziana Sexy Strip 20 Open Crotch Pantyhose ...,Sexystrip,Veneziana,734888.0,5120053017,14.95,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
14,Veneziana Ar Beautiful - Hold Ups Thigh High S...,Beautiful,Veneziana,551160.0,5120053351,17.70,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
15,Snug ribbed knit. Size One Size. Fabric 100% S...,Beechfield Ladies/Womens Metro Knitted Winter ...,Beechfield,6396025.0,5120192157,29.29,False,False,True,True,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
680969,HONGYE : Best Jewelry Choice You Could Never M...,HONGYE Fashion Three-dimensional Love Heart Ne...,HONGYE,528223.0,B01HJGJ512,16.99,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
680970,Dance along the trail with nimble agility in t...,HOKA ONE ONE Women's Speed Instinct Trail Runn...,HOKA ONE ONE,915676.0,B01HJFMJQQ,144.48,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
680971,Show your Nor Cal spirit with our limited edit...,Nor Cal T-Shirt - STRAIGHT OUTTA NOR CAL Shirt,Unknown,1801862.0,B01HJGV808,17.99,False,False,True,True,...,False,False,False,False,False,False,False,False,False,False
680974,"Funny Tshirts For, Funny Birthday Gift Tshirts...",Sysadmin Because Even Developer Need Heroes T-...,Sysadmin t shirt,2596846.0,B01HJHLSR0,18.88,False,False,True,True,...,False,False,False,False,False,False,False,False,False,False


In [76]:
products_women_sample = products_women.sample(n=10000, random_state=42)
products_women_sample = products_women_sample.reset_index()
products_women_sample

Unnamed: 0,index,description,title,brand,rank,asin,price,maincat_Luggage & Travel Gear,maincat_Backpacks,maincat_Novelty & More,...,subcat_Shoes,subcat_Handbags & Wallets,subcat_Girls,subcat_Boys,"subcat_Shoe, Jewelry & Watch Accessories",subcat_Jewelry Accessories,subcat_Shoe Care & Accessories,subcat_Contemporary & Designer,subcat_Travel Accessories,"subcat_Surf, Skate & Street"
0,11670,"HONDA is a trademark of Honda Motor Co., Ltd. ...","Honda Flame Fine Embroidered Hat Cap, Black/Si...",A&E Designs,4519868.0,B000NN8TTI,21.99,False,False,True,...,False,False,False,False,False,False,False,False,False,False
1,611472,Beautiful ankle wrap bootie with a zip closure...,Not Rated Women's Odessa Chelsea Boot,Not Rated,834684.0,B01COQNDJM,28.97,False,False,False,...,True,False,False,False,False,False,False,False,False,False
2,474110,Brass With Yellow Finish Police Officer Charm ...,Angelica Collection Yellow Brass Police Office...,Angelica Collection,5168500.0,B012BCSB2I,18.98,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,380012,"Denim Jacket, Denim Jacket, Machine Wash Cold ...",Women's Denim Jacket,Noble U,3329121.0,B00RNG80EC,85.00,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,647922,Haase Unlimited Women\'s T-shirts are great fo...,"HAASE UNLIMITED Women's Goodbye Tension, Hello...",HAASE UNLIMITED,7877148.0,B01F9JP27W,13.92,False,False,True,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,218569,This stylish city sandal features a laser cut ...,Carlos by Carlos Santana Women's Dynamic Dress...,Carlos by Carlos Santana,3411569.0,B00FEM6DCI,59.99,False,False,False,...,True,False,False,False,False,False,False,False,False,False
9996,88540,Made of the finest materials available. Manufa...,H&M Christian Jewelry 1 1/16 Inch Sterling Sil...,H&M,9020971.0,B005IGZP3I,48.40,False,False,False,...,False,False,False,False,False,False,False,False,False,False
9997,282535,"Black and white never go out of style, and we ...",VIRGIN ONLY Women's Geometric Black Velvet Pri...,VIRGIN ONLY,3852502.0,B00JZFNSLO,14.99,False,False,False,...,False,False,False,False,False,False,False,False,False,False
9998,630659,Every girl is on a quest for the perfect dress...,Extravagant Long Maxi Daywear Dress,Aakasha,10650824.0,B01E40OKR0,79.00,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [89]:
products_women_sample['title'].iloc[9995]

"Carlos by Carlos Santana Women's Dynamic Dress Sandal"

In [90]:
vectorizer =TfidfVectorizer(stop_words='english', min_df=5, lowercase=True)

TF_IDF_matrix = vectorizer.fit_transform(products_women_sample['description'])

In [91]:
TF_IDF_matrix.shape

(10000, 7155)

In [92]:
similarities = cosine_similarity(TF_IDF_matrix, dense_output=False)

In [93]:
product_index = products_women_sample[products_women_sample['title'] == "Carlos by Carlos Santana Women's Dynamic Dress Sandal"].index

sim_df = pd.DataFrame(
    {
        'product_name': products_women_sample['title'],
        'similarity': np.array(similarities[product_index, :].todense()).squeeze()
    }
)

In [94]:
sim_df.sort_values(by='similarity', ascending=False).head(10)

Unnamed: 0,product_name,similarity
9995,Carlos by Carlos Santana Women's Dynamic Dress...,1.0
71,Carlos by Carlos Santana Women's Melinda,0.975294
3587,Carlos by Carlos Santana Women's Kaliopi Ankle...,0.970268
5838,Carlos by Carlos Santana Women's Maiko Wedge S...,0.9538
7089,Carlos by Carlos Santana Women's Kiara Gladiat...,0.890211
3957,Carlos by Carlos Santana Womens Zadie Almond T...,0.522622
7884,Santana Women's Brenna Knee-High Boot,0.247555
9005,Endangered Natural Sapphire w/ruby eyes frog b...,0.197982
9078,"FLY London Women's Ydel Perf Wedge Pump,Beige/...",0.1106
123,Full Zip Fleece Jacket with Pink Ribbon Embroi...,0.099564


In [None]:
products_df['title'].loc[407721]

'New Fashion Gold /Silver School Travel Gym Shoulder Bag Backpack'

### Creating a General Function

In [96]:
def content_recommender_cosine(title, products, category):

    products = products[products[category] == True]

    if products.shape[0] > 10000:

        products = products.sample(n=10000, random_state=42)

        products = products.reset_index()

    else: pass

    vectorizer =TfidfVectorizer(stop_words='english', min_df=30, lowercase=True)
    
    TF_IDF_matrix = vectorizer.fit_transform(products['description'])

    similarities = cosine_similarity(TF_IDF_matrix, dense_output=False)

    product_index = products[products['title'] == title].index

    sim_df = pd.DataFrame(
        {
            'product_name': products['title'],
            'similarity': np.array(similarities[product_index, :].todense()).squeeze(),
        }
    )

    top_products = sim_df.sort_values(by='similarity', ascending=False).head(10)

    return top_products

In [98]:
content_recommender_cosine('Japan Anello Backpack Unisex PINK BEIGE LARGE PU LEATHER Rucksack School Bag Campus', 
                            products_df, 'maincat_Backpacks')

Unnamed: 0,product_name,similarity
0,Japan Anello Backpack Unisex PINK BEIGE LARGE ...,1.0
1,Japan Anello Backpack Unisex BLACK LARGE PU LE...,1.0
4,Japan Anello LARGE CAMO 2 Way Unisex Shoulder ...,0.678002
630122,Women Girls Ladies Backpack Fashion Shoulder B...,0.458001
208215,SHENGXILU Women's/Lady's PU Leather Backpack G...,0.43709
254817,Brixton Men's Carson Backpack,0.392878
474496,Soft PU Backpack School Bag Travel Bag Frozen ...,0.382243
405784,Soft PU Backpack Children's School Bag Travel ...,0.382243
433616,Remeehi Girl's Sliver Holographic Laser School...,0.355488
293572,Le Donne Leather Distressed Leather Multi Pock...,0.335467
