In [109]:
import pandas as pd
import numpy as np
import spacy
from textblob import TextBlob
import re
nlp = spacy.load('en_core_web_sm')
dataframe = pd.read_csv('amazon_product_reviews.csv', low_memory=False)

In [110]:
print(dataframe['reviews.text'].head())
print(dataframe['reviews.text'].shape)
clean_data = dataframe.dropna(subset=['reviews.text'])
reviews_data = clean_data['reviews.text']
print(reviews_data.shape)


0    This product so far has not disappointed. My c...
1    great for beginner or experienced person. Boug...
2    Inexpensive tablet for him to use and learn on...
3    I've had my Fire HD 8 two weeks now and I love...
4    I bought this for my grand daughter when she c...
Name: reviews.text, dtype: object
(34660,)
(34659,)


In [111]:
def preprocess_text(text):
    # Apply spaCy processing
    # Remove special characters
    pattern = r'[^\w\s]'
    text = str(re.sub(pattern, '', text))
    doc = nlp(text)
    # Remove stopwords, punctuation, and lemmatize tokens. Also ensure the token is a non-empty string.
    clean_tokens = [token.lemma_.lower() for token in doc if not token.is_stop and not token.is_punct and token.text.strip()]

    # Join tokens back into a single string
    clean_text = ' '.join(clean_tokens)
    
    return clean_text

# Assuming 'clean_data' is your DataFrame after dropping NA values.
# Apply preprocessing to the 'reviews.text' column
clean_reviews =  reviews_data.apply(preprocess_text)

# Print the first few cleaned reviews
print(clean_reviews.head())

0    product far disappoint child love use like abi...
1      great beginner experienced person buy gift love
2    inexpensive tablet use learn step nabi thrille...
3    ve fire hd 8 week love tablet great valuewe pr...
4    buy grand daughter come visit set user enter a...
Name: reviews.text, dtype: object


In [115]:
def sentiment_analysis(review):
    # Analyze the review's sentiment polarity with TextBlob
    textblob_review = TextBlob(review)
    polarity = textblob_review.polarity
    

    
    # Return both the polarity score 
    return polarity
# Apply sentiment analysis to each review and assign the result to a new column 'polarity'
clean_data.loc[:, 'polarity'] = clean_reviews.apply(sentiment_analysis)

# Print the DataFrame with the new 'polarity' column
print(clean_data[['reviews.text', 'polarity',]].head())

                                        reviews.text  polarity
0  This product so far has not disappointed. My c...       0.3
1  great for beginner or experienced person. Boug...       0.7
2  Inexpensive tablet for him to use and learn on...       0.6
3  I've had my Fire HD 8 two weeks now and I love...  0.432222
4  I bought this for my grand daughter when she c...  0.258929


In [102]:
# Load a larger model that includes word vectors
nlp = spacy.load('en_core_web_md')

index_a = int(input('Please enter the index of the first review you want to compare: '))
index_b = int(input('Please enter the index of the second review you want to compare: '))
num_reviews = len(reviews_data)

if index_a < 0 or index_a >= num_reviews or index_b < 0 or index_b >= num_reviews:
    print('\n',"Error: Invalid index. Please enter indices within the range 0 to", num_reviews - 1)
else:
    review_a = reviews_data.iloc[index_a]
    review_b = reviews_data.iloc[index_b]
    
    print("Review A:", review_a, '\n')
    print("Review B:", review_b, '\n')

    doc_a = nlp(review_a)
    doc_b = nlp(review_b)
    
    similarity_score = doc_a.similarity(doc_b)
    print(f'Similarity score of the two reviews: {round(similarity_score,3)}')
    

Please enter the index of the first review you want to compare:  4
Please enter the index of the second review you want to compare:  55


Review A: I bought this for my grand daughter when she comes over to visit. I set it up with her as the user, entered her age and name and now Amazon makes sure that she only accesses sites and content that are appropriate to her age. Simple to do and she loves the capabilities. I also bought and installed a 64gig SD card which gives this little tablet plenty of storage. For the price I think this tablet is best one out there. You can spend hundreds of dollars more for additional speed and capacity but when it comes to the basics this tablets does everything that most people will ever need at a fraction of the cost. 

Review B: Sleek packaging easy set up and great for anyone who wants a e reader with more to offer. 

Similarity score of the two reviews: 0.873


In [117]:
selection_num = int(input("enter the number of reviews you want to test"))
# Select a few reviews from dataset
sample_reviews = clean_reviews.head(selection_num)  

# Apply the sentiment analysis function to each review
for review in sample_reviews:
    sentiment_score = sentiment_analysis(review)
    print("Review:", review)
    print("Sentiment Score:", sentiment_score)
    print()  

enter the number of reviews you want to test 350


Review: product far disappoint child love use like ability monitor control content ease
Sentiment Score: 0.3

Review: great beginner experienced person buy gift love
Sentiment Score: 0.7000000000000001

Review: inexpensive tablet use learn step nabi thrilled learn skype
Sentiment Score: 0.6

Review: ve fire hd 8 week love tablet great valuewe prime members tablet shines love able easily access prime content movie download watch laterthis 1280800 screen nice look nice crisp bright infact bright ipad pro cost 900 base model build fire insanely awesome run 77 mm thick smooth glossy feel amazing hold like futuristic tab ur hand
Sentiment Score: 0.4322222222222223

Review: buy grand daughter come visit set user enter age amazon make sure access site content appropriate age simple love capability buy instal 64gig sd card give little tablet plenty storage price think tablet well spend hundred dollar additional speed capacity come basic tablet people need fraction cost
Sentiment Score: 0.25892