In [None]:
import pandas as pd
file_path= 'Electronics_5.json'
data= pd.read_json(file_path, lines=True)

missing_values=data.isnull().sum()
reviews= data[['reviewText', 'overall']]
print(reviews.columns)

reviews= reviews.dropna(subset=['reviewText', 'overall'])

from sklearn.feature_extraction.text import TfidfVectorizer
tfidf = TfidfVectorizer(stop_words='english', max_features=5000)
tfidf_matrix=tfidf.fit_transform(reviews['reviewText'])

from sklearn.metrics.pairwise import cosine_similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)


import nltk
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def preprocess_text(text):
    text=text.lower()

    tokens=word_tokenize(text)
    tokens=[word for word in tokens if word.isalpha() and word not in stop_words]

    lemmatized_tokens=[lemmatizer.lemmatize(word) for word in tokens]

    return ' '.join(lemmatized_tokens)
reviews['cleaned_reviewText']=reviews['reviewText'].apply(preprocess_text)

import nltk
nltk.download('vader_lexicon')

from nltk.sentiment.vader import SentimentIntensityAnalyzer
sia= SentimentIntensityAnalyzer()

def analyze_sentiment(text):
    sentiment = sia.polarity_scores(text)
    return sentiment['compound']

reviews['sentiment_score']= reviews['cleaned_reviewText'].apply(analyze_sentiment)

def get_filtered_reviews(product_name):
    filtered_reviews= reviews[reviews['summary'].str.contains(product_name, case=False, na=False)]
    
    import numpy as np

def get_recommendations(index, cosine_sim=cosine_sim):
    sim_scores=list(enumerate(cosine_sim[index]))
    sim_scores= sorted(sim_scores, key=lambda x: x[1], reverse=True)

    sim_scores=sim_scores[1:6]
    review_indices=[i[0] for i in sim_scores] 
    
    recommended_reviews= reviews.iloc[review_indices].sort_values(by='sentiment_score', ascending=False)
    
    return recommended_reviews[['reviewText','sentiment_score']]

reviews['asin'] = data['asin']

def recommend_products(product_type):
    filtered_reviews = reviews[reviews['reviewText'].str.contains(product_type, case=False, na=False)]
    
    if filtered_reviews.empty:
        print(f"No reviews found for the product type: {product_type}.")
        return pd.DataFrame()
    
    product_sentiment = filtered_reviews.groupby('asin').agg({
        'sentiment_score': 'mean',
        'overall': 'mean',
        'reviewText': 'first'
    }).reset_index()
    
    top_products = product_sentiment.sort_values(by=['overall', 'sentiment_score'], ascending=False).head(5)
    
    return top_products[['asin', 'reviewText', 'overall', 'sentiment_score']]

def display_review_simple(review_text):
    short_review = review_text[:50] + '...' if len(review_text) > 50 else review_text
    print(short_review)
    
    read_more = input("Do you want to read the full review? (yes/no): ").lower()
    
    if read_more == 'yes':
        print("Full Review:\n", review_text)
    else:
        print("Skipped full review.\n")


product_type = input("Enter the type of electronic device you're looking for (e.g., 'laptop', 'hairdryer'): ")
top_recommendations = recommend_products(product_type)

if not top_recommendations.empty:
    for index, row in top_recommendations.iterrows():
        print(f"Product ASIN: {row['asin']}, Rating: {row['overall']}, Sentimnent score: {row['sentiment_score']}")
        display_review_simple(row['reviewText'])
        print("\n")
else:
    print("No recommendations to display.")
    



Index(['reviewText', 'overall'], dtype='object')


[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/prakritisubedi/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/prakritisubedi/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/prakritisubedi/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/prakritisubedi/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
