In [None]:
# This is a model that uses the nlp model to perform sentiment analysis on a dataset of consumer reviews of Amazon Products

# Import all necessary library to load csv file and perform nlp
import pandas as pd
import spacy
from textblob import TextBlob
from colorama import Fore, Style

# This will load SpaCy english model 
nlp = spacy.load("en_core_web_lg")

# Reading the csv file into a dataframe
df = pd.read_csv("C:\\Users\\Teju\\Downloads\\amazon_product_reviews.csv")

# Covert all texts in the reviews.text column into lower strings
reviews_data = df['reviews.text'].str.lower()

# Removing all missing values from the column
clean_data = reviews_data.dropna()

# Remove non-alphanumeric characters from the beginning and end of each review
striped_data = clean_data.str.replace(r"^\W+|\W+$", "")

# Defining a function that will take a product review and predict its sentiment
def predict_review_sentiment(review):
    # Process the review using SpaCy
    doc = nlp(review)

    # Removing stopwords and joining back the cleaned tokens
    stopwords_removed = [token.text for token in doc if not token.is_stop]
    cleaned_review = ' '.join(stopwords_removed)

    # Perform sentiment analysis using TextBlob
    blob = TextBlob(cleaned_review)
    polarity = blob.sentiment.polarity

    # Determine sentiment label
    if polarity > 0:
        return 'positive'
    elif polarity < 0:
        return 'negative'
    else:
        return 'neutral'

# Test the model on sample product review by using index
amazon_review_of_choice = striped_data.iloc[0]  # change the index to see the sentiment
sentiment_label = predict_review_sentiment(my_review_of_choice)

formatted_select_review = f"{Fore.BLUE}{Style.BRIGHT}Select Review: {amazon_review_of_choice}{Style.RESET_ALL}"

print(formatted_select_review)
print(f'Select Review: {amazon_review_of_choice}')
print(" ")
print(f"Predicted Sentiment: {sentiment_label}")


In [26]:
# Define a function that will calculate the similarity score between two reviews
def calculate_similarity(review1, review2):
    first_review = nlp(review1)
    second_review = nlp(review2)
    similarity_score = first_review.similarity(second_review)
    return similarity_score

# Choose the two reviews you want to calculate their similarity score by changing the index
sample_review1 = striped_data.iloc[0]
sample_review2 = striped_data.iloc[1]

similarity_score = calculate_similarity(sample_review1, sample_review2)

print(f"1st Review: {sample_review1}")
print(" ")
print(f"2nd Review: {sample_review2}")
print(" ")
print(f"Similarity Score: {similarity_score:.3f}")

1st Review: i thought it would be as big as small paper but turn out to be just like my palm. i think it is too small to read on it... not very comfortable as regular kindle. would definitely recommend a paperwhite instead.
 
2nd Review: this kindle is light and easy to use especially at the beach!!!
 
Similarity Score: 0.779
