In [1]:
import streamlit as st
import pandas as pd
import nltk
from nltk.stem.snowball import SnowballStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load NLTK resources if needed
nltk.download('punkt')

# Load data
amazon = pd.read_csv(r'C:\Users\Mohammed Arif\Downloads\amazon_product.csv')

# Initialize SnowballStemmer
stemmer = SnowballStemmer("english")

# Tokenize and stem function
def tokenize_stem(text):
    tokens = nltk.word_tokenize(text.lower())
    stem = [stemmer.stem(w) for w in tokens]
    return " ".join(stem)

# Check if 'stemmed_tokens' column exists, otherwise create it
if 'stemmed_tokens' not in amazon.columns:
    amazon['stemmed_tokens'] = amazon.apply(lambda row: tokenize_stem(row['Title'] + ' ' + row['Description']), axis=1)

# Create TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer(tokenizer=tokenize_stem)

# Cosine similarity function
def cosine_sim(txt1, txt2):
    tfidf_matrix = tfidf_vectorizer.fit_transform([txt1, txt2])
    return cosine_similarity(tfidf_matrix)[0][1]

# Streamlit app
def main():
    st.title('Amazon Product Search')

    # Sidebar input
    query = st.text_input('Enter search query:')
    if st.button('Search'):
        st.write(f'Searching for: {query}')
        results = search_product(query)
        st.dataframe(results)

# Search product function
def search_product(query):
    stemmed_query = tokenize_stem(query)
    amazon['similarity'] = amazon['stemmed_tokens'].apply(lambda x: cosine_sim(stemmed_query, x))
    res = amazon.sort_values(by=['similarity'], ascending=False).head(10)[['Title', 'Description', 'Category']]
    return res

if __name__ == '__main__':
    main()


[nltk_data] Downloading package punkt to C:\Users\Mohammed
[nltk_data]     Arif\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
2024-04-23 10:01:14.420 
  command:

    streamlit run C:\Users\Mohammed Arif\anaconda3\lib\site-packages\ipykernel_launcher.py [ARGUMENTS]


In [None]:
import pandas as pd
from sklearn.metrics import accuracy_score


labeled_data = pd.read_csv(r'C:\Users\Mohammed Arif\Downloads\amazon_product.csv')


def get_recommendations(data):
    recommendations=amazon['stemmed_tokens']=amazon.apply(lambda row: tokenize_stem(row['Title']+ ' ' +row['Description']),axis=1)

    return recommendations


labeled_data['Predicted'] = labeled_data.apply(lambda row: get_recommendations(row), axis=1)


accuracy = accuracy_score(labeled_data['Correct'], labeled_data['Predicted'])

print("Accuracy:", accuracy)