In [15]:
import pandas as pd
import numpy as np
import nltk
from nltk.stem.snowball import SnowballStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import streamlit as st
from PIL import Image

# Download punkt (only once, needed for word_tokenize)
nltk.download('punkt')

# Load dataset
data = pd.read_csv('C:\\Users\\hp\\Downloads\\amazon_product.csv')

# Remove unnecessary column if exists
if 'id' in data.columns:
    data = data.drop('id', axis=1)

# Define tokenizer and stemmer
stemmer = SnowballStemmer('english')
def tokenize_and_stem(text):
    tokens = nltk.word_tokenize(str(text).lower())
    stems = [stemmer.stem(t) for t in tokens if t.isalpha()]  # keep only words
    return stems

# Create a text column combining Title + Description
data['text'] = data['Title'].fillna('') + ' ' + data['Description'].fillna('')

# TF-IDF vectorizer (fit once on whole dataset)
tfidf_vectorizer = TfidfVectorizer(tokenizer=tokenize_and_stem)
tfidf_matrix = tfidf_vectorizer.fit_transform(data['text'])

# Search function
def search_products(query, top_n=10):
    query_tfidf = tfidf_vectorizer.transform([query])
    cosine_similarities = cosine_similarity(query_tfidf, tfidf_matrix).flatten()
    data['similarity'] = cosine_similarities
    results = data.sort_values(by='similarity', ascending=False).head(top_n)[['Title', 'Description', 'Category', 'similarity']]
    return results

# # Example: show one product
# print(data['Title'].iloc[1])

# # Example: search
# query = "PURELL ES8 Professional HEALTHY SOAP Foam Refill, Fresh Scent Fragrance, 1200 mL Soap Refill for PURELL ES8 Touch-Free Dispenser (Pack of 2) - 7777-02"
# results = search_products(query)
# print(results)

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [None]:
# # Example: show one product
print(data['Title'].iloc[10])


In [9]:
# # Example: search
query = "PURELL ES8 Professional HEALTHY SOAP Foam Refill, Fresh Scent Fragrance, 1200 mL Soap Refill for PURELL ES8 Touch-Free Dispenser (Pack of 2) - 7777-02"
results = search_products(query)
print(results)

                                                 Title  \
10    PURELL ES8 Professional HEALTHY SOAP Foam Ref...   
517   Mrs. Meyer Liquid Hand Soap Parsley & Rhubarb...   
322   EO PRODUCTS SOAP,EVERYONE,CITRUS&MINT, 32 FZ ...   
84    Body Fantasies Fresh White Musk 3 Piece Gift ...   
324   Topiclear Number One Soap 3 Ounce Boxed (88ml...   
628   Allied Brass 2032-VB Continental Collection W...   
13    BRUT After Shave Classic Fragrance 5 oz (Pack...   
544   GUM Proxabrush Go-Betweens Interdental Brush ...   
291   Febreze Air Freshener, Noticeables Air Freshe...   
606   Aquaphor Healing Skin Ointment Advanced Thera...   

                                           Description  \
10   1200 ml refill for Purell ES8 touch-free soap ...   
517  Mrs. Meyer's Clean Day Liquid Hand Soap Variet...   
322  EO Products Everyone Soap Citrus and Mint is f...   
84   Blends the clean fresh natural scents of brigh...   
324  Develops A Remarkable Bactericidal And Fungici...   
628  Check ou