In [1]:
import pandas as pd

results = pd.read_csv("test.csv")

In [2]:
results

Unnamed: 0,text,site,sentiment
0,Struggle to evacuate Afghans from Kabul airport,bbc.co.uk,-1
1,Decision on who gets third vaccine dose due im...,bbc.co.uk,1
2,Banksy: How much do we really know about him?,bbc.co.uk,1
3,Raab rejects calls to quit over Afghan interpr...,bbc.co.uk,-1
4,Transfer latest and Premier League build-up,bbc.co.uk,1
...,...,...,...
112,Age is just a number: Rankin portraits celebra...,positive.news,1
113,Renowned photographer Rankin has teamed up wit...,positive.news,1
114,Catch up on what's been going right in the world,positive.news,1
115,Get Positive News stories in your inbox each week,positive.news,1


In [3]:
from afinn import Afinn
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from transformers import pipeline

afinn = Afinn()
results["afinn"] = results.apply(lambda row: 1 if afinn.score(row["text"]) >= 0 else -1, axis=1)

vader = SentimentIntensityAnalyzer()
results["vader"] = results.apply(lambda row: 1 if vader.polarity_scores(row["text"])["compound"] >= 0 else -1, axis=1)

In [4]:
results

Unnamed: 0,text,site,sentiment,afinn,vader
0,Struggle to evacuate Afghans from Kabul airport,bbc.co.uk,-1,-1,-1
1,Decision on who gets third vaccine dose due im...,bbc.co.uk,1,1,1
2,Banksy: How much do we really know about him?,bbc.co.uk,1,1,1
3,Raab rejects calls to quit over Afghan interpr...,bbc.co.uk,-1,-1,-1
4,Transfer latest and Premier League build-up,bbc.co.uk,1,1,1
...,...,...,...,...,...
112,Age is just a number: Rankin portraits celebra...,positive.news,1,1,1
113,Renowned photographer Rankin has teamed up wit...,positive.news,1,1,1
114,Catch up on what's been going right in the world,positive.news,1,1,1
115,Get Positive News stories in your inbox each week,positive.news,1,1,1


In [5]:
from transformers import pipeline
classifier = pipeline("sentiment-analysis")

results["bert"] = results.apply(lambda row: -1 if classifier(row["text"])[0]["label"] == "NEGATIVE" else 1, axis=1)

In [6]:
results

Unnamed: 0,text,site,sentiment,afinn,vader,bert
0,Struggle to evacuate Afghans from Kabul airport,bbc.co.uk,-1,-1,-1,-1
1,Decision on who gets third vaccine dose due im...,bbc.co.uk,1,1,1,-1
2,Banksy: How much do we really know about him?,bbc.co.uk,1,1,1,-1
3,Raab rejects calls to quit over Afghan interpr...,bbc.co.uk,-1,-1,-1,1
4,Transfer latest and Premier League build-up,bbc.co.uk,1,1,1,1
...,...,...,...,...,...,...
112,Age is just a number: Rankin portraits celebra...,positive.news,1,1,1,1
113,Renowned photographer Rankin has teamed up wit...,positive.news,1,1,1,1
114,Catch up on what's been going right in the world,positive.news,1,1,1,1
115,Get Positive News stories in your inbox each week,positive.news,1,1,1,1


In [4]:
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer

#MODEL = f"cardiffnlp/twitter-roberta-base-sentiment"
MODEL = "./cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForSequenceClassification.from_pretrained(MODEL)
#model.save_pretrained(MODEL)
#tokenizer.save_pretrained(MODEL)

In [8]:
import numpy as np
from scipy.special import softmax

def roberta(text):
    encoded_input = tokenizer(text, return_tensors='pt')
    output = model(**encoded_input)
    
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)
    
#    if scores[0] > scores[1] and scores[0] > scores[2]:
    if scores[0] > scores[2]:
        return -1
    else:
        return 1
    
results["roberta"] = results.apply(lambda row: roberta(row["text"]), axis=1)

In [9]:
results[10:20]

Unnamed: 0,text,site,sentiment,afinn,vader,bert,roberta
10,Gunman argued with mum before mass shooting,bbc.co.uk,-1,1,-1,-1,-1
11,Arrest in manhunt for murdered nurse's husband,bbc.co.uk,-1,-1,-1,-1,-1
12,Women's Open: Sagstrom leads with Korda chasin...,bbc.co.uk,1,1,1,1,1
13,Man Utd great Law diagnosed with dementia,bbc.co.uk,-1,1,1,-1,-1
14,Mother in court charged with murder of her son,bbc.co.uk,-1,-1,-1,-1,-1
15,NI health service facing 'a very difficult win...,bbc.co.uk,-1,-1,-1,-1,-1
16,Newquay has highest Covid rates in England,bbc.co.uk,-1,1,1,1,-1
17,'We just can't take any risks',bbc.co.uk,-1,-1,1,-1,-1
18,Man takes legal action over quarantine hotel stay,bbc.co.uk,-1,1,1,-1,-1
19,Hampshire & Isle of Wight,bbc.co.uk,1,1,1,1,1


In [10]:
results.to_csv("results.csv", index=False)

In [11]:
import pandas as pd
results = pd.read_csv("results.csv")

In [7]:
results

Unnamed: 0,text,site,sentiment,afinn,vader,bert,roberta
0,Struggle to evacuate Afghans from Kabul airport,bbc.co.uk,-1,-1,-1,-1,1
1,Decision on who gets third vaccine dose due im...,bbc.co.uk,1,1,1,-1,1
2,Banksy: How much do we really know about him?,bbc.co.uk,1,1,1,-1,1
3,Raab rejects calls to quit over Afghan interpr...,bbc.co.uk,-1,-1,-1,1,-1
4,Transfer latest and Premier League build-up,bbc.co.uk,1,1,1,1,1
...,...,...,...,...,...,...,...
112,Age is just a number: Rankin portraits celebra...,positive.news,1,1,1,1,1
113,Renowned photographer Rankin has teamed up wit...,positive.news,1,1,1,1,1
114,Catch up on what's been going right in the world,positive.news,1,1,1,1,1
115,Get Positive News stories in your inbox each week,positive.news,1,1,1,1,1


In [10]:
from sklearn.metrics import accuracy_score

for model in ["afinn","vader","bert","roberta"]:
    print(model, accuracy_score(results["sentiment"],results[model]))

afinn 0.7521367521367521
vader 0.717948717948718
bert 0.6581196581196581
roberta 0.7948717948717948


In [11]:
results[results["sentiment"] != results["roberta"]]

Unnamed: 0,text,site,sentiment,afinn,vader,bert,roberta
1,Decision on who gets third vaccine dose due im...,bbc.co.uk,1,1,1,-1,-1
2,Banksy: How much do we really know about him?,bbc.co.uk,1,1,1,-1,-1
6,'The Taliban have moved into the house next do...,bbc.co.uk,1,1,1,-1,-1
9,No planes leaving Afghanistan empty - UK minister,bbc.co.uk,1,-1,-1,-1,-1
21,Quiz: Do you know these animated firsts?,bbc.co.uk,1,1,1,-1,-1
27,Who is Charlie Kane and why is he trending?,bbc.co.uk,1,1,1,-1,-1
29,Changing rooms is back - what did critics think?,bbc.co.uk,1,-1,-1,-1,-1
35,The man who could have bought Man Utd for Â£10...,bbc.co.uk,1,1,1,-1,-1
47,'Game-changer' plan for 1.2bn disabled people,bbc.co.uk,1,1,1,-1,-1
56,The BBC around the UK,bbc.co.uk,1,1,1,1,-1


In [5]:
text = "Banksy: How much do we really know about him?"
encoded_input = tokenizer(text, return_tensors='pt')

In [7]:
import numpy as np
from scipy.special import softmax

output = model(**encoded_input)

scores = output[0][0].detach().numpy()
scores = softmax(scores)

In [8]:
scores

array([0.10682145, 0.86794084, 0.02523762], dtype=float32)