In [1]:
import numpy as np
import pandas as pd
import spacy
from spacytextblob.spacytextblob import SpacyTextBlob

nlp = spacy.load('en_core_web_sm')

# Add SpacyTextBlob to the pipeline
nlp.add_pipe('spacytextblob')
print(nlp.pipe_names)



['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner', 'spacytextblob']


In [2]:
# Text Preprocessing
def preprocess_text(text):
    # Create a spaCy document
    doc = nlp(text)
    
    # Remove stopwords and punctuation
    filtered_tokens = [token for token in doc if not token.is_stop and not token.is_punct]
    
    # Lemmatize the tokens
    lemmatized_tokens = [token.lemma_ for token in filtered_tokens]
    
    # Join the lemmatized tokens back into a string
    preprocessed_text = ' '.join(lemmatized_tokens)
    
    return preprocessed_text

In [3]:
# Sentiment Analysis
def analyze_sentiment(text):
    # Create a spaCy document
    doc = nlp(text)
    
    # Get the sentiment polarity and subjectivity
    polarity = doc._.blob.polarity
    subjectivity = doc._.blob.subjectivity
    
    # Determine the sentiment label
    if polarity > 0:
        sentiment = 'Positive'
    elif polarity < 0:
        sentiment = 'Negative'
    else:
        sentiment = 'Neutral'
    
    return sentiment, polarity, subjectivity


In [4]:
# Load in the dataset
df = pd.read_csv('amazon_product_review.csv')

clean_data = df.dropna(subset=['reviews.text'])

# # Preprocess the text column
clean_data['preprocessed_text'] = clean_data['reviews.text'].apply(preprocess_text)

# # # Analyze the sentiment of the preprocessed text
clean_data['sentiment'], clean_data['polarity'], clean_data['subjectivity'] = zip(*clean_data['preprocessed_text'].apply(analyze_sentiment))



In [5]:
clean_data.iloc[4, :]

# id                                                   AVqkIhwDv8e3D1O-lebb
# name                    All-New Fire HD 8 Tablet, 8 HD Display, Wi-Fi,...
# asins                                                          B01AHB9CN2
# brand                                                              Amazon
# categories              Electronics,iPad & Tablets,All Tablets,Fire Ta...
# keys                    841667104676,amazon/53004484,amazon/b01ahb9cn2...
# manufacturer                                                       Amazon
# reviews.date                                     2017-01-12T00:00:00.000Z
# reviews.dateAdded                                    2017-07-03T23:33:15Z
# reviews.dateSeen        2017-06-07T09:04:00.000Z,2017-04-30T00:45:00.000Z
# reviews.didPurchase                                                   NaN
# reviews.doRecommend                                                  True
# reviews.id                                                            NaN
# reviews.numHelpful                                                      0
# reviews.rating                                                          5
# reviews.sourceURLs      http://reviews.bestbuy.com/3545/5620406/review...
# reviews.text            I bought this for my grand daughter when she c...
# reviews.title                                   Fantastic Tablet for kids
# reviews.userCity                                                      NaN
# reviews.userProvince                                                  NaN
# reviews.username                                                explore42
# preprocessed_text       buy grand daughter come visit set user enter a...
# sentiment                                                        Positive
# polarity                                                         0.258929
# subjectivity                                                      0.56729
# Name: 4, dtype: object


id                                                   AVqkIhwDv8e3D1O-lebb
name                    All-New Fire HD 8 Tablet, 8 HD Display, Wi-Fi,...
asins                                                          B01AHB9CN2
brand                                                              Amazon
categories              Electronics,iPad & Tablets,All Tablets,Fire Ta...
keys                    841667104676,amazon/53004484,amazon/b01ahb9cn2...
manufacturer                                                       Amazon
reviews.date                                     2017-01-12T00:00:00.000Z
reviews.dateAdded                                    2017-07-03T23:33:15Z
reviews.dateSeen        2017-06-07T09:04:00.000Z,2017-04-30T00:45:00.000Z
reviews.didPurchase                                                   NaN
reviews.doRecommend                                                  True
reviews.id                                                            NaN
reviews.numHelpful                    

In [6]:
print('Original text: ------------------------>\n', clean_data.iloc[4]['reviews.text'])
print('Preprocessed: ------------------------->\n', clean_data.iloc[4]['preprocessed_text'])


Original text: ------------------------>
 I bought this for my grand daughter when she comes over to visit. I set it up with her as the user, entered her age and name and now Amazon makes sure that she only accesses sites and content that are appropriate to her age. Simple to do and she loves the capabilities. I also bought and installed a 64gig SD card which gives this little tablet plenty of storage. For the price I think this tablet is best one out there. You can spend hundreds of dollars more for additional speed and capacity but when it comes to the basics this tablets does everything that most people will ever need at a fraction of the cost.
Preprocessed: ------------------------->
 buy grand daughter come visit set user enter age Amazon make sure access site content appropriate age simple love capability buy instal 64gig sd card give little tablet plenty storage price think tablet well spend hundred dollar additional speed capacity come basic tablet people need fraction cost


In [7]:
clean_data.iloc[4,-4:]

preprocessed_text    buy grand daughter come visit set user enter a...
sentiment                                                     Positive
polarity                                                      0.258929
subjectivity                                                   0.56729
Name: 4, dtype: object

In [8]:
clean_data.columns

Index(['id', 'name', 'asins', 'brand', 'categories', 'keys', 'manufacturer',
       'reviews.date', 'reviews.dateAdded', 'reviews.dateSeen',
       'reviews.didPurchase', 'reviews.doRecommend', 'reviews.id',
       'reviews.numHelpful', 'reviews.rating', 'reviews.sourceURLs',
       'reviews.text', 'reviews.title', 'reviews.userCity',
       'reviews.userProvince', 'reviews.username', 'preprocessed_text',
       'sentiment', 'polarity', 'subjectivity'],
      dtype='object')

In [11]:
# Show text of top 5 most negative reviews along with their polarity and preprocessed text
for index, row in clean_data.sort_values('polarity').head().iterrows():
    print(f"Review: {row['reviews.text']}")
    print(f"Preprocessed Text: {row['preprocessed_text']}")
    print(f"Polarity: {row['polarity']}")
    print()

Review: the only downside of it is that you can't have google playstore on this tablet but you can still install it manually since they both based on android os
Preprocessed Text: downside google playstore tablet install manually base android os
Polarity: -0.8

Review: For the price, this tablet is not bad. I found a couple of things that is a bit annoying. Every time you turn on the device, ads will appear in the lock screen. Also the picture gallery, it takes several minutes to load. The wait is very annoying.
Preprocessed Text: price tablet bad find couple thing bit annoying time turn device ad appear lock screen picture gallery take minute load wait annoying
Polarity: -0.7666666666666666

Review: I'm disappointed that it doesn't have a mirror display mode.
Preprocessed Text: disappointed mirror display mode
Polarity: -0.75

Review: I had to return my device due to WIFI signal dropping, maybe I just had a bad unit.
Preprocessed Text: return device WIFI signal dropping maybe bad unit

In [12]:
# Show the top 5 most positive reviews along with their polarity and preprocessed text
for index, row in clean_data.sort_values('polarity', ascending=False).head().iterrows():
    print(f"Review: {row['reviews.text']}")
    print(f"Preprocessed Text: {row['preprocessed_text']}")
    print(f"Polarity: {row['polarity']}")
    print()

Review: This was a perfect gift for the holidays! I saw the reviews on this and immediately had to get it!
Preprocessed Text: perfect gift holiday see review immediately
Polarity: 1.0

Review: Perfect for my wife and college daughter. Very easyTo learn reader.
Preprocessed Text: Perfect wife college daughter easyTo learn reader
Polarity: 1.0

Review: I travel a lot and this is perfect for watching movies or reading on the flights
Preprocessed Text: travel lot perfect watch movie read flight
Polarity: 1.0

Review: If you are looking for an entry level tablet, this is it.The Kindle fire HD is perfect for kids!
Preprocessed Text: look entry level tablet Kindle fire HD perfect kid
Polarity: 1.0

Review: Perfect for our needs and an on the go mini computer and ereader.
Preprocessed Text: perfect need mini computer ereader
Polarity: 1.0



In [27]:
# compare results of sentiment
sentiment_counts = clean_data['sentiment'].value_counts()
sentiment_counts

sentiment
Positive    2074
Neutral      139
Negative      87
Name: count, dtype: int64

In [None]:
# From the above we can see that there is a majority positive sentiment on the amazon products 
# in comparison to the Neutral and negative sentinments.
# So therefore an overall positive status on the product reviews at amazon based on our data