# Performing Sentiment Analysis on Tweets
- Following Tutorial: [https://youtu.be/uPKnSq6TaAk]
- See Also:           [https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment?text=I+like+you.+I+love+you]

# Sentiment Analysis

In [122]:
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
import numpy as np
from scipy.special import softmax

In [123]:
# Initialize Tweet
tweet = 'Shit! I\'m pissed off!'

In [124]:
# Preprocess Tweet
words = tweet.split(' ')
words = ['@user' if (w.startswith('@') and len(w) > 1) else w for w in words]
tweet_preprocessed = " ".join(words)
print(tweet_preprocessed)

Shit! I'm pissed off!


In [125]:
# Initialize Model
roberta_url = 'cardiffnlp/twitter-roberta-base-sentiment'
model = AutoModelForSequenceClassification.from_pretrained(roberta_url)
tokenizer = AutoTokenizer.from_pretrained(roberta_url)
labels = ['Negative', 'Neutral', 'Positive']

In [126]:
# Run Model(Tweet)
tweet_encoded = tokenizer(tweet_preprocessed, return_tensors='pt')
#output = model(tweet_encoded['input_ids'], tweet_encoded['attention_mask'])
output = model(**tweet_encoded)

In [127]:
# Output
scores = output[0][0].detach().numpy()
scores = softmax(scores)

for i in range(len(scores)):
    print(f'{labels[i]}: {scores[i]}')

Negative: 0.9690775871276855
Neutral: 0.024700108915567398
Positive: 0.006222342606633902


In [128]:
def sentiment_analyze(tweet):
    # Preprocess Tweet
    words = tweet.split(' ')
    words = ['@user' if (w.startswith('@') and len(w) > 1) else w for w in words]
    tweet_preprocessed = " ".join(words)
    print(f'Tweet (preprocessed): {tweet_preprocessed}')
    
    # Initialize Model
    roberta_url = 'cardiffnlp/twitter-roberta-base-sentiment'
    model = AutoModelForSequenceClassification.from_pretrained(roberta_url)
    tokenizer = AutoTokenizer.from_pretrained(roberta_url)
    labels = ['Negative', 'Neutral', 'Positive']
    
    # Run Model(Tweet)
    tweet_encoded = tokenizer(tweet_preprocessed, return_tensors='pt')
    #output = model(tweet_encoded['input_ids'], tweet_encoded['attention_mask'])
    output = model(**tweet_encoded)
    
    # Output
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)

    for i in range(len(scores)):
        print(f'{labels[i]}: {scores[i]}')
        
    return labels, scores

# Examples (try some out yourself!)

In [129]:
t = 'Fuck!'
sentiment_analyze(t)

Tweet (preprocessed): Fuck!
Negative: 0.931067705154419
Neutral: 0.056312479078769684
Positive: 0.012619812041521072


(['Negative', 'Neutral', 'Positive'],
 array([0.9310677 , 0.05631248, 0.01261981], dtype=float32))

In [130]:
t = 'Awesome!'
sentiment_analyze(t)

Tweet (preprocessed): Awesome!
Negative: 0.0042463126592338085
Neutral: 0.03754191845655441
Positive: 0.9582117199897766


(['Negative', 'Neutral', 'Positive'],
 array([0.00424631, 0.03754192, 0.9582117 ], dtype=float32))

In [131]:
t = 'I hate being bipolar. It is awesome!'
sentiment_analyze(t)

Tweet (preprocessed): I hate being bipolar. It is awesome!
Negative: 0.21771182119846344
Neutral: 0.240257129073143
Positive: 0.5420310497283936


(['Negative', 'Neutral', 'Positive'],
 array([0.21771182, 0.24025713, 0.54203105], dtype=float32))

In [None]:
t = 'There is a ball on the table.'
sentiment_analyze(t)

Tweet (preprocessed): There is a ball on the table.
