In [1]:
import numpy as np
import pandas as pd
import nltk

In [2]:
amazon_data = pd.read_csv('amazon_product.csv')
amazon_data.head()

Unnamed: 0,id,Title,Description,Category
0,1,Swissmar Capstore Select Storage Rack for 18-...,Swissmar's capstore select 18 storage unit kee...,Home & Kitchen Kitchen & Dining Kitchen Utens...
1,2,Gemini200 Delta CV-880 Gold Crown Livery Airc...,Welcome to the exciting world of GeminiJets! O...,Toys & Games Hobbies Models & Model Kits Pre-...
2,5,Superior Threads 10501-2172 Magnifico Cream P...,"For quilting and embroidery, this product is m...","Arts, Crafts & Sewing Sewing Thread & Floss S..."
3,6,Fashion Angels Color Rox Hair Chox Kit,Experiment with the haute trend of hair chalki...,Beauty & Personal Care Hair Care Hair Colorin...
4,8,Union Creative Giant Killing Figure 05: Daisu...,From Union Creative. Turn your display shelf i...,Toys & Games › Action Figures & Statues › Sta...


In [3]:
amazon_data.drop('id', axis=1, inplace=True)

In [4]:
amazon_data.head()

Unnamed: 0,Title,Description,Category
0,Swissmar Capstore Select Storage Rack for 18-...,Swissmar's capstore select 18 storage unit kee...,Home & Kitchen Kitchen & Dining Kitchen Utens...
1,Gemini200 Delta CV-880 Gold Crown Livery Airc...,Welcome to the exciting world of GeminiJets! O...,Toys & Games Hobbies Models & Model Kits Pre-...
2,Superior Threads 10501-2172 Magnifico Cream P...,"For quilting and embroidery, this product is m...","Arts, Crafts & Sewing Sewing Thread & Floss S..."
3,Fashion Angels Color Rox Hair Chox Kit,Experiment with the haute trend of hair chalki...,Beauty & Personal Care Hair Care Hair Colorin...
4,Union Creative Giant Killing Figure 05: Daisu...,From Union Creative. Turn your display shelf i...,Toys & Games › Action Figures & Statues › Sta...


In [5]:
amazon_data.isnull().sum()

Title          0
Description    0
Category       0
dtype: int64

In [6]:
from nltk.stem.snowball import SnowballStemmer
stemmer = SnowballStemmer('english')
def tokenize_stem(text):
    tokens = nltk.word_tokenize(text.lower())
    stemmed = [stemmer.stem(w) for w in tokens]
    return " ".join(stemmed)

In [7]:
amazon_data['stemmed_tokens'] = amazon_data.apply(lambda row:tokenize_stem(row['Title']+" "+row['Description']),axis=1)

In [8]:
amazon_data.head()

Unnamed: 0,Title,Description,Category,stemmed_tokens
0,Swissmar Capstore Select Storage Rack for 18-...,Swissmar's capstore select 18 storage unit kee...,Home & Kitchen Kitchen & Dining Kitchen Utens...,swissmar capstor select storag rack for 18-pac...
1,Gemini200 Delta CV-880 Gold Crown Livery Airc...,Welcome to the exciting world of GeminiJets! O...,Toys & Games Hobbies Models & Model Kits Pre-...,gemini200 delta cv-880 gold crown liveri aircr...
2,Superior Threads 10501-2172 Magnifico Cream P...,"For quilting and embroidery, this product is m...","Arts, Crafts & Sewing Sewing Thread & Floss S...",superior thread 10501-2172 magnifico cream puf...
3,Fashion Angels Color Rox Hair Chox Kit,Experiment with the haute trend of hair chalki...,Beauty & Personal Care Hair Care Hair Colorin...,fashion angel color rox hair chox kit experi w...
4,Union Creative Giant Killing Figure 05: Daisu...,From Union Creative. Turn your display shelf i...,Toys & Games › Action Figures & Statues › Sta...,union creativ giant kill figur 05 : daisuk tsu...


In [36]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
tfidfv = TfidfVectorizer(tokenizer=tokenize_stem)
def cosine_sim(txt1, txt2):
    matrix = tfidfv.fit_transform([txt1, txt2])
    return cosine_similarity(matrix)[0][1]

In [37]:
def search_product(query):
    stemmed_query = tokenize_stem(query)
    amazon_data['similarity'] = amazon_data['stemmed_tokens'].apply(lambda x:cosine_sim(stemmed_query, x))
    result = amazon_data.sort_values(by=['similarity'],ascending=False).head(10)[['Title', 'Description', 'Category']]
    return result

In [39]:
amazon_data['Title'][10]

' PURELL ES8 Professional HEALTHY SOAP Foam Refill, Fresh Scent Fragrance, 1200 mL Soap Refill for PURELL ES8 Touch-Free Dispenser (Pack of 2) - 7777-02 '

In [40]:
search_product(' PURELL ES8 Professional HEALTHY SOAP Foam Refill, Fresh Scent Fragrance, 1200 mL Soap Refill for PURELL ES8 Touch-Free Dispenser (Pack of 2) - 7777-02 ')



Unnamed: 0,Title,Description,Category
10,PURELL ES8 Professional HEALTHY SOAP Foam Ref...,1200 ml refill for Purell ES8 touch-free soap ...,Industrial & Scientific › Janitorial & Sanita...
541,Remington SP290 for F4790 Shaver (2-Pack),Technical Features for Remington SP290-2 The R...,Beauty & Personal Care › Shave & Hair Removal...
176,Angel Soft Professional Series Premium 2-Ply ...,Angel Soft Professional Series offers a qualit...,Health & Household Household Supplies Paper &...
160,"Dixie 8.5""Medium-Weight Paper Plates by GP PR...",The WiseSize product offering provides a packa...,Health & Household › Household Supplies › Pap...
206,Filofax 2018 Personal/Compact Academic Week t...,"Over the years, the Filofax organizer has evol...",Office Products › Office & School Supplies › ...
220,Serta iComfort Premium Infant Sleeper Replace...,Replacement cover for the Serta’s icomfort Pre...,Baby Products › Nursery › Bedding › Baby Bedd...
492,"Seventh Generation Baby Overnight Diapers, Fr...",Everyone could use a little sleep. Seventh Gen...,Baby Products Diapering Disposable Diapers
209,ALEX Toys Artist Studio Ultimate Easel Access...,ALEX Toys Artist Studio Ultimate Easel Accesso...,Toys & Games Arts & Crafts
523,"School Smart 2-Pocket Folders, Green, Pack of...",Compile and file with ease with School Smart's...,Office Products Office & School Supplies Fili...
390,"Elmer's Foam Board, 20 x 28 Inches, 3/16 Inch...",Elmer's Foam Board makes it easier to create e...,Office Products Office & School Supplies Pres...
