### 1. Trying out sentiment analysis models

In [1]:
from transformers import TFRobertaForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer, AutoConfig
import numpy as np
from scipy.special import softmax
import logging
from transformers import logging as transformers_logging

transformers_logging.set_verbosity_error()
logging.getLogger("transformers").setLevel(logging.ERROR)

labels = {
    0 : 'negative',
    1 : 'neutral',
    2 : 'positive'
}
def test_sentiment_score(text, tokenizer, model):
    encoded_input = tokenizer(text, return_tensors='tf')
    output = model(encoded_input)
    scores = output[0][0].numpy()
    scores = softmax(scores)
    ranking = np.argsort(scores)
    ranking = ranking[::-1]
    ranked_labels = [labels[i] for i in ranking]
    return (ranked_labels[0])

  from .autonotebook import tqdm as notebook_tqdm





In [2]:
import re
 
text = "I hate children"
model_roberta_latest = "cardiffnlp/twitter-roberta-base-sentiment-latest"
model_roberta_older = "cardiffnlp/twitter-roberta-base-sentiment"
model_name_bertweet = "finiteautomata/bertweet-base-sentiment-analysis"


tokenizer_latest = AutoTokenizer.from_pretrained(model_roberta_latest)
model_latest = TFRobertaForSequenceClassification.from_pretrained(model_roberta_latest)

tokenizer_older = AutoTokenizer.from_pretrained(model_roberta_older)
model_older = TFRobertaForSequenceClassification.from_pretrained(model_roberta_older)

tokenizer_bertweet = AutoTokenizer.from_pretrained(model_name_bertweet)
model_bertweet= TFAutoModelForSequenceClassification.from_pretrained(model_name_bertweet)

sentiment_score_new = test_sentiment_score(text, tokenizer_latest, model_latest)
sentiment_score_old = test_sentiment_score(text, tokenizer_older, model_older)
sentiment_score_bertweet = test_sentiment_score(text, tokenizer_bertweet, model_bertweet)


print(f"Sentiment score ({re.search(r'[^/]+$', model_roberta_latest).group(0)}): {sentiment_score_new}")
print(f"Sentiment score ({re.search(r'[^/]+$', model_roberta_older).group(0)}): {sentiment_score_old}")
print(f"Sentiment score ({re.search(r'[^/]+$', model_name_bertweet).group(0)}): {sentiment_score_bertweet}")

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development





To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Sentiment score (twitter-roberta-base-sentiment-latest): positive
Sentiment score (twitter-roberta-base-sentiment): negative
Sentiment score (bertweet-base-sentiment-analysis): negative


### 2. Setting up dataframe for accuracy calculation

In [3]:
import pandas as pd

df_test = pd.read_csv('twitter_validation.csv')
df_test.head()

Unnamed: 0,tweet_id,entity,sentiment,tweet
0,3364,Facebook,Irrelevant,I mentioned on Facebook that I was struggling ...
1,352,Amazon,Neutral,BBC News - Amazon boss Jeff Bezos rejects clai...
2,8312,Microsoft,Negative,@Microsoft Why do I pay for WORD when it funct...
3,4371,CS-GO,Negative,"CSGO matchmaking is so full of closet hacking,..."
4,4433,Google,Neutral,Now the President is slapping Americans in the...


In [4]:

df_test = df_test.drop(df_test[df_test['sentiment'] == 'Irrelevant'].index)
df_test = df_test.head(1000) #Only doing this on the first 1000 tweets, to save time

In [5]:
pd.set_option('display.max_colwidth', None)  
df_test['roberta_old'] = df_test['tweet'].apply(lambda x: test_sentiment_score(x, tokenizer_older, model_older))
df_test['roberta_new'] = df_test['tweet'].apply(lambda x: test_sentiment_score(x, tokenizer_latest, model_latest))
df_test['bertweet'] = df_test['tweet'].apply(lambda x: test_sentiment_score(x, tokenizer_bertweet, model_bertweet))
df_test

KeyboardInterrupt: 

In [None]:
df_test.loc[df_test['sentiment'] == 'Neutral', 'sentiment'] = 'neutral'
df_test.loc[df_test['sentiment'] == 'Positive', 'sentiment'] = 'positive'
df_test.loc[df_test['sentiment'] == 'Negative', 'sentiment'] = 'negative'

In [None]:
df_test.head()

### 3. Checking the accuracy by comparing model labels with human labels (on 100 first tweets)

In [None]:
matches = df_test['sentiment'] == df_test['roberta_old']
percentage_matches = matches.mean() * 100
print(f"Accuracy of roberta_old model: {percentage_matches:.2f}%")

In [None]:
matches = df_test['sentiment'] == df_test['roberta_new']
percentage_matches = matches.mean() * 100
print(f"Accuracy of roberta_new model: {percentage_matches:.2f}%")

In [None]:
matches = df_test['sentiment'] == df_test['bertweet']
percentage_matches = matches.mean() * 100
print(f"Accuracy of bertweet model: {percentage_matches:.2f}%")