# Using AWS Comprehend
AWS Comprehend is Amazon's text analysis service. In this file, you'll learn how to call Comprehend from within SageMaker Studio. We will also look at how we can integrate Comprehend for data that we already have saved in S3.

In [None]:
#import needed libraries
import pandas as pd
import boto3

### Get the Comprehend client

In [None]:
comprehend = boto3.client('comprehend', region_name='us-east-1')

### Call the Comprehend methods

In [None]:
sample_tweet="Be of good cheer. Take life one step at a time and do the best you can each day. Life passes so swiftly. --President Nelson"   

# Key phrases
phrases = comprehend.detect_key_phrases(Text=sample_tweet, LanguageCode='en')

# Entities
entities = comprehend.detect_entities(Text=sample_tweet, LanguageCode='en')

#Sentiments
sentiments = comprehend.detect_sentiment(Text=sample_tweet, LanguageCode='en')


#Print the phrases:
print('------- phrases ---------')
for i in range(0, len(phrases['KeyPhrases'])):
    print((phrases['KeyPhrases'][i]['Text']))
    

# Print the entities with entitity type:
print('------- entity : entity type ---------')
for i in range(0, len(entities['Entities'])):
    print(entities['Entities'][i]['Text'] + ' : ' + entities['Entities'][i]['Type'] )
    
# Print the sentiment:
print('------- sentiment ---------')
print(sentiments['Sentiment'])

### Import CSV from web + Comprehend

In [None]:
import pandas as pd
import boto3 

comprehend = boto3.client('comprehend', region_name='us-east-1')

df = pd.read_csv("https://www.ishelp.info/data/tweets_aws.csv")
df = df.head(10)

df

In [None]:
InputFieldName = "text"
OutputFieldName_prefix = "Sentiment"

#iterate over each row calling comprehend for each and taking the result and saving it back to the datafrom
for index, row in df.iterrows():
    #this calls the comprehend service for each item in our data frame
    res = comprehend.detect_sentiment(Text=df.loc[index, InputFieldName], LanguageCode='en')
    
    #save sentiment scores to existing dataframe
    df.loc[index,f'{OutputFieldName_prefix}_Overall'] = res['Sentiment']
    df.loc[index,f'{OutputFieldName_prefix}_Positive'] = res["SentimentScore"]["Positive"]
    df.loc[index,f'{OutputFieldName_prefix}_Negative'] = res["SentimentScore"]["Negative"]
    df.loc[index,f'{OutputFieldName_prefix}_Neutral'] = res["SentimentScore"]["Neutral"]
    df.loc[index,f'{OutputFieldName_prefix}_Mixed'] = res["SentimentScore"]["Mixed"]

In [None]:
df

### Use the Twitter API + Comprehend

In [None]:
import pandas as pd
import boto3 

comprehend = boto3.client('comprehend', region_name='us-east-1')

In [None]:
#Insert your Twitter API keys here
bearer_token = 'AAAAAAAAAAAAAAAAAAAAAID7MAEAAAAAH0xSsle%2FKQizZFUXaTJZCienl%2B8%3D3p59yniOFINYnvWH4seF0P6Nb5gW1FRFZ7hxKU2G6l4WfJlcgU'

In [None]:
%pip install tweepy

In [None]:
import tweepy

client = tweepy.Client(bearer_token=bearer_token)

# Replace with your own search query
query = 'from:byu -is:retweet'

tweets = client.search_recent_tweets(query=query, tweet_fields=['created_at'], max_results=100)

for tweet in tweets.data:
     print(tweet.text)


In [None]:
#Make dataframe out of results
# import pandas as pd
# import json

df = pd.DataFrame(columns=['ID', 'Text', 'Sentiment', 'Positive_Score', 'Negative_Score', 'Neutral_Score', 'Mixed_Score'], dtype='object')
for tweet in tweets.data:
   df.loc[tweet.id]=[tweet.id,tweet.text,'','','','','']
df

In [None]:
res = comprehend.detect_sentiment(Text="BYU is super fun!", LanguageCode='en')
s = res.get('Sentiment')
p = res.get('SentimentScore')['Positive']
neg = res.get('SentimentScore')['Negative']
neu = res.get('SentimentScore')['Neutral']
mix = res.get('SentimentScore')['Mixed']

print(s)
print(p)
print(neg)
print(neu)
print(mix)

In [None]:
#now make a iterate over the data frame and make a call to comprehend
#to determine sentiment of each tweet
for index, row in df.iterrows():
    result = comprehend.detect_sentiment(Text=row['Text'], LanguageCode='en')
    row['Sentiment'] = result.get('Sentiment')
    row['Positive_Score'] = result.get('SentimentScore')['Positive']
    row['Negative_Score'] = result.get('SentimentScore')['Negative']
    row['Neutral_Score'] = result.get('SentimentScore')['Neutral']
    row['Mixed_Score'] = result.get('SentimentScore')['Mixed']
df

In [None]:
#Alternatively, you can pass all the data at once
res = comprehend.batch_detect_sentiment(TextList=df['Text'].tolist(), LanguageCode='en')

#And then you can merge it back in to the data frame column by column all at once
sentiment = [result['Sentiment'] for result in res['ResultList']]
positive_score = [result['SentimentScore']['Positive'] for result in res['ResultList']]
negative_score = [result['SentimentScore']['Negative'] for result in res['ResultList']]
neutral_score = [result['SentimentScore']['Neutral'] for result in res['ResultList']]
mixed_score = [result['SentimentScore']['Mixed'] for result in res['ResultList']]

#now save each column into the existing dataframe
df['Sentiment']=sentiment
df['Positive_Score']=positive_score
df['Negative_Score']=negative_score
df['Neutral_Score']=neutral_score
df['Mixed_Score']=mixed_score

In [None]:
#print out the dataframe to see all the fields that were added
df