In [1]:
import spacy
import pandas as pd
import time
from spacytextblob.spacytextblob import SpacyTextBlob

**Implement a sentiment analysis model using spaCy**


In [2]:
##Loading the SM model from Spacy
nlp = spacy.load('en_core_web_sm')
nlp.add_pipe('spacytextblob')

<spacytextblob.spacytextblob.SpacyTextBlob at 0x1794d16a250>

In [3]:
##Loading all the review data from an example file (downloaded from Kaggle amazon reviews)
df_review_text_raw = pd.read_csv(r'C:\Swathi\SP23110010980\Data Science (Fundamentals)\T21 - Capstone Project - NLP Applications\ReviewFiles\1429_1.csv',low_memory=False, usecols=['reviews.text'])

In [4]:
df_review_text_raw.describe()

Unnamed: 0,reviews.text
count,34659
unique,34659
top,This product so far has not disappointed. My c...
freq,1


**Display Options to see all text within a dataframe**

In [5]:
##Using polartity score for sentiment
def sentiment_analysis(review):
    doc = nlp(review)
    polarity_score = doc._.blob.polarity
    #sentiment = doc._.blob.sentiment
    return polarity_score

In [6]:
def similarity_analysis(review1, review2):
    doc1 = nlp(review1)
    doc2 = nlp(review2)
    return doc1.similarity(doc2)

In [7]:
##By default dataframe display is truncated in column size and row size. This is to display it fully

pd.set_option('display.max_rows', None)  # Show all rows
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.width', 2000)  # Adjust width for better readability
pd.set_option('display.colheader_justify', 'center')  # Center-align column headers
pd.set_option('display.precision', 3)  # Set decimal precision
pd.set_option('display.max_colwidth', None)

In [8]:
##dropping null rows, if any
df_review = df_review_text_raw.dropna()

In [9]:
#df_review.loc[:,'senti_without_processing'] = df_review.apply(lambda x: sentiment_analysis(x['reviews.text']), axis=1)

**Convert to Lower Case**

In [10]:
##Converting to lower case
df_review.loc[:,'review_post_processing'] = df_review['reviews.text'].str.lower()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_review.loc[:,'review_post_processing'] = df_review['reviews.text'].str.lower()


**Remove Punctuation**

In [11]:
##removing punctuation
df_review.loc[:,'review_post_processing'] = df_review['review_post_processing'].str.replace('[^\w\s]','')

**Remove stop words**

In [12]:
## List of stop words from Spacy
stop = spacy.lang.en.stop_words.STOP_WORDS

In [13]:
## NOT is an important word. This is part of the STOP words list to my surprise. So removing this word from the list. IF I Do not remove this, there are reviewes classified wrongly. 
stop.discard("not")

In [14]:
#stop = spacy.lang.en.stop_words.STOP_WORDS
# Removing all Stop words (except NOT)
df_review.loc[:,'review_post_processing'] = df_review['review_post_processing'].apply(lambda x: " ".join(x for x in x.split() if x not in stop))


In [15]:
##Updating the sentiment for all the rows takes time, so only taking a subset of the rows for testing as below
df_review_partial = df_review.head(2000)

In [16]:
df_review_partial.loc[:,'polarity'] = df_review_partial.apply(lambda x: sentiment_analysis(x['review_post_processing']), axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_review_partial.loc[:,'polarity'] = df_review_partial.apply(lambda x: sentiment_analysis(x['review_post_processing']), axis=1)


In [17]:
mask = df_review_partial['polarity'] < 0
df_review_partial.loc[mask,'sentiment_label'] = 'Negative'

mask = df_review_partial['polarity'] == 0
df_review_partial.loc[mask,'sentiment_label'] = 'Neutral'

mask = df_review_partial['polarity'] > 0
df_review_partial.loc[mask,'sentiment_label'] = 'Positive'


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_review_partial.loc[mask,'sentiment_label'] = 'Negative'


**Similarity of different reviews**

In [18]:
df_review_partial['review_post_processing'][0]

'product far not disappointed. children love use like ability monitor control content ease.'

In [19]:
df_review_partial['review_post_processing'][1]

'great beginner experienced person. bought gift loves'

In [20]:
similarity_analysis(df_review_partial['review_post_processing'][0],df_review_partial['review_post_processing'][1] )

  return doc1.similarity(doc2)


0.5647624415313598

In [21]:
df_review_partial.head(20)

Unnamed: 0,reviews.text,review_post_processing,polarity,sentiment_label
0,This product so far has not disappointed. My children love to use it and I like the ability to monitor control what content they see with ease.,product far not disappointed. children love use like ability monitor control content ease.,0.325,Positive
1,great for beginner or experienced person. Bought as a gift and she loves it,great beginner experienced person. bought gift loves,0.8,Positive
2,"Inexpensive tablet for him to use and learn on, step up from the NABI. He was thrilled with it, learn how to Skype on it already...","inexpensive tablet use learn on, step nabi. thrilled it, learn skype already...",0.6,Positive
3,I've had my Fire HD 8 two weeks now and I love it. This tablet is a great value.We are Prime Members and that is where this tablet SHINES. I love being able to easily access all of the Prime content as well as movies you can download and watch laterThis has a 1280/800 screen which has some really nice look to it its nice and crisp and very bright infact it is brighter then the ipad pro costing $900 base model. The build on this fire is INSANELY AWESOME running at only 7.7mm thick and the smooth glossy feel on the back it is really amazing to hold its like the futuristic tab in ur hands.,i've fire hd 8 weeks love it. tablet great value.we prime members tablet shines. love able easily access prime content movies download watch laterthis 1280/800 screen nice look nice crisp bright infact brighter ipad pro costing $900 base model. build fire insanely awesome running 7.7mm thick smooth glossy feel amazing hold like futuristic tab ur hands.,0.413,Positive
4,"I bought this for my grand daughter when she comes over to visit. I set it up with her as the user, entered her age and name and now Amazon makes sure that she only accesses sites and content that are appropriate to her age. Simple to do and she loves the capabilities. I also bought and installed a 64gig SD card which gives this little tablet plenty of storage. For the price I think this tablet is best one out there. You can spend hundreds of dollars more for additional speed and capacity but when it comes to the basics this tablets does everything that most people will ever need at a fraction of the cost.","bought grand daughter comes visit. set user, entered age amazon makes sure accesses sites content appropriate age. simple loves capabilities. bought installed 64gig sd card gives little tablet plenty storage. price think tablet best there. spend hundreds dollars additional speed capacity comes basics tablets people need fraction cost.",0.385,Positive
5,This amazon fire 8 inch tablet is the perfect size. I purchased it for my husband so that he has a bigger screen than just his phone. He had gotten me one a few years ago so I knew it would be a good purchase.,amazon fire 8 inch tablet perfect size. purchased husband bigger screen phone. gotten years ago knew good purchase.,0.567,Positive
6,"Great for e-reading on the go, nice and light weight, and for the price point given, definitely worth the purchase.","great e-reading go, nice light weight, price point given, definitely worth purchase.",0.525,Positive
7,"I gave this as a Christmas gift to my inlaws, husband and uncle. They loved it and how easy they are to use with fantastic features!","gave christmas gift inlaws, husband uncle. loved easy use fantastic features!",0.544,Positive
8,"Great as a device to read books. I like that it links with my borrowed library e-books. Switched from another popular tablet brand and I am happy with the choice I made. It took some time to get books from my previous non-Kindle reader, but finally figured out a way!","great device read books. like links borrowed library e-books. switched popular tablet brand happy choice made. took time books previous non-kindle reader, finally figured way!",0.407,Positive
9,I love ordering books and reading them with the reader.,love ordering books reading reader.,0.5,Positive


**Testing the output**

In [22]:
#Actually Positive, mentioned as Neutral
df_review_partial.iloc[13]

reviews.text              Simply does everything I need. Thank youAnd silk works wonders
review_post_processing                      simply need. thank youand silk works wonders
polarity                                                                             0.0
sentiment_label                                                                  Neutral
Name: 13, dtype: object

In [23]:
#Actually Positive, mentioned as Negative
df_review_partial.iloc[17]

reviews.text              I really like this tablet. I would have given 5 stars but sometimes you have to push start several times after you unlock the screen and it is a little annoying.
review_post_processing                                                                                           like tablet. given 5 stars push start times unlock screen little annoying.
polarity                                                                                                                                                                             -0.494
sentiment_label                                                                                                                                                                    Negative
Name: 17, dtype: object

In [24]:
#Actually positive; mentioned as positive (after removing the stop word NOT from the Stop word list and ensure NOT is not removed from the review text).
df_review_partial.iloc[0]

reviews.text              This product so far has not disappointed. My children love to use it and I like the ability to monitor control what content they see with ease.
review_post_processing                                                         product far not disappointed. children love use like ability monitor control content ease.
polarity                                                                                                                                                            0.325
sentiment_label                                                                                                                                                  Positive
Name: 0, dtype: object

***Examples***

**Example 1**

**Input** : I really like this tablet. I would have given 5 stars but sometimes you have to push start several times after you unlock the screen and it is a little annoying.

**Sentiment**: Negative
   
**Verdict** : This is unexpected. 

**Reason for wrong verdict** : This potentially happens due to words like *annoying* moving the polarity towards negative


In [25]:
#without the word annoying  - What is the score?
sentiment_analysis("I really like this tablet. I would have given 5 stars but sometimes you have to push start several times after you unlock the screen")

# Reason above is correct

0.1

**Example 2**

**Input** : Simply does everything I need. Thank youAnd silk works wonders

**Sentiment** : Negative

**Verdict** : This is unexpected.

**Reason for wrong verdict** : Due a typo in the review text *you* and *And* are combined together. There are no strong positive words. The word *Wonders* is considered as neutral. So the meaning is not correctly inferred 

In [26]:
sentiment_analysis("wonders")

0.0

**Example 3**

**Input** : This product so far has not disappointed. My children love to use it and I like the ability to monitor control what content they see with ease.

**Sentiment** : Positive

**Verdict** : This is as expected.



**Example 4**

**Input** : Great video quality lots of fun apps fun for the whole family

**Sentiment** : Positive

**Verdict** : This is as expected.