-
Notifications
You must be signed in to change notification settings - Fork 1
/
sentiment_twitter.py
37 lines (32 loc) · 1.2 KB
/
sentiment_twitter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax
import numpy as np
import pandas as pd
tweet = '@rektulungrek today the weather is not good for traveling https://upperstairs.wordpress.com'
# Preprocess text (username and link placeholders)
def preprocess(text):
new_text = []
for t in text.split(" "):
t = '@user' if t.startswith('@') and len(t) > 1 else t
t = 'http' if t.startswith('http') else t
new_text.append(t)
return " ".join(new_text)
#load model and tokenizer
roberta = "cardiffnlp/twitter-roberta-base-sentiment"
model = AutoModelForSequenceClassification.from_pretrained(roberta)
tokenizer = AutoTokenizer.from_pretrained(roberta)
labels = ['Negative', 'Neutral', 'Positive']
#sentiment analysis
encoded_tweet = tokenizer(preprocess(tweet), return_tensors='pt')
output = model(encoded_tweet['input_ids'], encoded_tweet['attention_mask'])
scores = output[0][0].detach().numpy()
scores = softmax(scores)
for i in range(len(scores)):
l = labels[i]
s = scores[i]
print(l, s)
#by max value
max_value = max(scores)
list_scores = list(scores)
index = list_scores.index(max_value)
print(labels[index], max_value)