In [1]:
import pandas as pd

## Sentiment Analysis using SpacyTextBlob

In [2]:
import spacy
from spacytextblob.spacytextblob import SpacyTextBlob
nlp = spacy.load('en_core_web_sm') #py -m spacy download en
nlp.add_pipe('spacytextblob')

<spacytextblob.spacytextblob.SpacyTextBlob at 0x18d72af83d0>

In [3]:
text = "I love school so much! It's my favorite thing ever! I get to see all my friends and learn about cool things!"
doc = nlp(text)
print(doc._.polarity)   
print(doc._.subjectivity) 
print(doc._.assessments, '\n')  

text = "I hate school so much! It's the worst thing ever!"
doc = nlp(text)
print(doc._.polarity)   
print(doc._.subjectivity) 
print(doc._.assessments)  

0.5208333333333334
0.75
[(['love'], 0.5, 0.6, None), (['much', '!', 'favorite', '!'], 0.625, 1.0, None), (['cool', '!'], 0.4375, 0.65, None)] 

-0.5166666666666667
0.7000000000000001
[(['hate'], -0.8, 0.9, None), (['much', '!'], 0.25, 0.2, None), (['worst', '!'], -1.0, 1.0, None)]


In [4]:

# Polarity is a float within the range [-1.0, 1.0].  Negative polarity indicates negative sentiment, positive polarity indicates positive sentiment. 
# Higher magnitude indicates more extreme sentiment.

# Subjectivity is a float within the range [0.0, 1.0] where 0.0 is very objective and 1.0 is very subjective, 
# Assessments is a list of polarity and subjectivity scores for the assessed tokens.

def get_polarity_and_subjectivity(text):
    doc = nlp(text)
    return (doc._.polarity, doc._.subjectivity,doc._.assessments)

## Sentiment Analysis using [Amazon Comprehend](https://aws.amazon.com/comprehend/)
### [Install boto3 and configure AWS](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/quickstart.html#install-boto3)
### [Authenticating AWS Credentials](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_users_create.html#id_users_create_console)

In [5]:
# !pip install boto3
import boto3
import json

comprehend = boto3.client(service_name='comprehend', region_name='us-east-1')
                
text = "It is raining today in Seattle"

print('Calling DetectSentiment')
response = comprehend.detect_sentiment(Text=text, LanguageCode='en')
print(response['Sentiment'])
print(json.dumps(response['SentimentScore'],indent=4))

print('End of DetectSentiment\n')

Calling DetectSentiment
NEUTRAL
{
    "Positive": 0.03478698432445526,
    "Negative": 0.30989840626716614,
    "Neutral": 0.6552183032035828,
    "Mixed": 9.628861880628392e-05
}
End of DetectSentiment



In [6]:
def get_aws_sentiment(tweet_text):
    response = comprehend.detect_sentiment(Text=text, LanguageCode='en')
    return(response['Sentiment'])

In [7]:
#note: maybe should clean text to be only English but need to test if AWS can handle other languages

df_soccer = pd.read_csv('data/Soccer-tweets.csv', encoding='utf8')
japanese_text = df_soccer["Text"][0]
get_aws_sentiment(japanese_text) 

'NEUTRAL'

### Read in data

In [8]:
df_election = pd.read_csv('data/USA Election 2020-tweets.csv')
tweet_text = df_election["Text"]
for tweet in tweet_text[:5]: #print first 5 tweets
    print(tweet)


RT @mosaitricks: #Biden election fraud
11/2020 = 33 month
USA 7/4 was in 33 cycle

Republicans make gains
11/2021 = 16/7 month
USA in 16/7…
@joncoopertweets I would feel better if USA government did something about the past and present spreading false con… https://t.co/ukBp4fEhuX
A day to remember and to celebrate.
2020 Election Results | USA TODAY https://t.co/iHMDxFzN6I via @usatoday
If you're a patriot you support the laws of the USA. 
The Republicans violated law after law after law during the T… https://t.co/vzMnwkNwm9
RT @RealPatriot56: Democrats cheated in New Jersey knowing they could and would get away with it, because everyone got away with stealing t…


In [17]:
aws_sentiment = []
spacy_sentiment = []
for tweet in tweet_text[:5]:
    aws_sentiment.append(get_aws_sentiment(tweet))
    spacy_sentiment.append(get_polarity_and_subjectivity(tweet)[0])

In [20]:
for i,j in zip(aws_sentiment,spacy_sentiment):
    print(i,j)

NEGATIVE 0.0
NEGATIVE -0.03750000000000002
NEGATIVE 0.0
NEGATIVE 0.0
NEGATIVE 0.13636363636363635


In [25]:
#Display sentiment discrepancies between two methods

df_election_sentiment = pd.DataFrame({"Text": tweet_text[:5],"AWS Sentiment":aws_sentiment, "Spacy Sentiment":spacy_sentiment})
df_election_sentiment

Unnamed: 0,Text,AWS Sentiment,Spacy Sentiment
0,RT @mosaitricks: #Biden election fraud\n11/202...,NEGATIVE,0.0
1,@joncoopertweets I would feel better if USA go...,NEGATIVE,-0.0375
2,A day to remember and to celebrate.\n2020 Elec...,NEGATIVE,0.0
3,If you're a patriot you support the laws of th...,NEGATIVE,0.0
4,RT @RealPatriot56: Democrats cheated in New Je...,NEGATIVE,0.136364
