In [106]:
# Import Pandas for data manipulation
import pandas as pd
# Import spacy for natural language processing tasks
import spacy
from spacy.lang.en.stop_words import STOP_WORDS

In [107]:
nlp = spacy.load("en_core_web_sm")

In [108]:
def preprocess_text(text):
    # Tokenize the text
    doc = nlp(text)
    # Filter out stopwords and punctuation, and convert tokens to lowercase
    tokens = [token.text.lower() for token in doc if not token.is_stop and not token.is_punct]
    # Join the tokens back into a single string
    return " ".join(tokens)

In [109]:
from textblob import TextBlob

def analyze_sentiment(text):
    # Check if the text is empty or contains only whitespace
    if not text.strip():  
        # Return a default sentiment value or skip the analysis
        return "Empty"
    doc = nlp(text)
    
    # Register the sentiment analysis extension attribute
    if not doc.has_extension("sentiment"):
        doc.set_extension("sentiment", getter=lambda doc: TextBlob(doc.text).sentiment.polarity)
    
    # Calculate sentiment polarity
    polarity = doc._.sentiment
    # Determine sentiment based on polarity
    if polarity > 0:
        return "Positive"
    elif polarity < 0:
        return "Negative"
    else:
        return "Neutral"

In [110]:
import os

# Get the current working directory
current_directory = os.getcwd()
file_path = "/Users/talhahmalik/Dropbox/Bootcamp - TA23100010081/amazon_product_reviews.csv"

# Load the dataset
data = pd.read_csv(file_path)

In [111]:
# Drop rows with missing values in the 'reviews.text' column
clean_data = data.dropna(subset=['reviews.text'])


In [112]:
# Test the sentiment analysis function on sample product reviews
sample_reviews = [
    "This product exceeded my expectations. I highly recommend it!",
    "The quality of this product is terrible. I would not buy it again."
]

In [113]:
print("Sentiment analysis results:")
for review in sample_reviews:
    sentiment = analyze_sentiment(review)
    print(f"Review: {review} - Sentiment: {sentiment}")

Sentiment analysis results:
Review: This product exceeded my expectations. I highly recommend it! - Sentiment: Positive
Review: The quality of this product is terrible. I would not buy it again. - Sentiment: Negative
