## English Sentiment comparison


In [None]:
%load_ext autoreload
%autoreload 2

import random
import tweetnlp
import stanza
from tqdm.auto import tqdm
from pysentimiento import create_analyzer
from textblob import TextBlob
from datasets import load_dataset, ClassLabel
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

sent_eval = load_dataset("SetFit/SentEval-CR")["test"]
sent_eval = sent_eval.rename_column("text", "sentence")

sent_eval = sent_eval.cast_column("label", ClassLabel(2, names=["negative", "positive"]))

sent140 = load_dataset("stanfordnlp/sentiment140", trust_remote_code=True, split="test")
sent140 = sent140.map(lambda x: {"sentiment": x["sentiment"]/2})
sent140 = sent140.rename_column("text", "sentence")
sent140 = sent140.rename_column("sentiment", "label")

sent140 = sent140.cast_column("label", ClassLabel(3, names=["negative", "neutral", "positive"]))

mteb = load_dataset("mteb/tweet_sentiment_multilingual", "english", split="test")
mteb = mteb.rename_column("text", "sentence")
mteb = mteb.cast_column("label", ClassLabel(3, names=["negative", "neutral", "positive"]))


amazon = load_dataset("SetFit/amazon_reviews_multi_en", split="test")

def convert_label(ex):
    if ex["label"] <= 1:
        return 0
    elif ex["label"] >= 3:
        return 2
    else:
        return 1

amazon = amazon.map(lambda ex: {"label": convert_label(ex)})
amazon = amazon.cast_column("label", ClassLabel(3, names=["negative", "neutral", "positive"]))
amazon = amazon.rename_column("text", "sentence")


benchmark_datasets = [
    ("sst2", load_dataset("stanfordnlp/sst2")["validation"]),
    ("financial_phrasebank", load_dataset("takala/financial_phrasebank", "sentences_66agree")["train"]),
    ("SentEval-CR", sent_eval),
    ("sentiment140", sent140),
    ("mteb", mteb),
    ("amazon", amazon)
]

#pysentimient + tweetnlp + stanza

model = tweetnlp.load_model('sentiment')  # Or `model = tweetnlp.Sentiment()`
nlp = stanza.Pipeline(lang='en', processors='tokenize,sentiment', tokenize_no_ssplit=True)
vader = SentimentIntensityAnalyzer()

def pysentimiento_analyzer(dataset):
    analyzer = create_analyzer("sentiment", lang="en")
    id2label = dataset.features["label"].names

    outs = analyzer.predict(dataset["sentence"])

    if len(id2label) == 2:
        # Only positive/negative
        return ["negative" if x.probas["NEG"] > x.probas["POS"] else "positive" for x in outs]
    else:
        translation = {"NEU": "neutral", "POS": "positive", "NEG": "negative"}
        return [translation[x.output] for x in outs]

def stanza_analyzer(dataset):
    id2label = dataset.features["label"].names
    outs = nlp(dataset["sentence"])

    def _get_sentiment(x):
        if x.sentiment == 0:
            return "negative"
        elif x.sentiment == 2:
            return "positive"
        elif len(id2label) == 2:
            # Flip a coin
            if random.random() > 0.5:
                return "positive"
            else:
                return "negative"
        else:
            return "neutral"

    return [_get_sentiment(x) for x in outs.sentences]

def tweetnlp_analyzer(dataset):
    id2label = dataset.features["label"].names
    outs = model.predict(dataset["sentence"])
    def get_tweetnlp_sentiment(x):
        if x["label"] in {"positive", "negative"}:
            return x["label"]
        elif len(id2label) == 2:
            # Flip a coin
            if random.random() > 0.5:
                return "positive"
            else:
                return "negative"
        else:
            return "neutral"

    return [get_tweetnlp_sentiment(x) for x in outs]

def textblob_analyzer(dataset, threshold=0.1):
    id2label = dataset.features["label"].names
    outs = [TextBlob(x).sentiment.polarity for x in dataset["sentence"]]

    def get_textblob_sentiment(x):
        if len(id2label) == 2:
            if x > 0:
                return "positive"
            else:
                return "negative"
        else:
            if x > threshold:
                return "positive"
            elif x < -threshold:
                return "negative"
            else:
                return "neutral"

    return [get_textblob_sentiment(x) for x in outs]

def vader_analyzer(dataset):
    id2label = dataset.features["label"].names
    outs = [vader.polarity_scores(x) for x in dataset["sentence"]]

    def get_vader_sentiment(x):
        if len(id2label) == 2:
            if x["pos"] > x["neg"]:
                return "positive"
            else:
                return "negative"
        else:
            labels = ["neg", "neu", "pos"]

            # get argmax
            max_sent = max(range(len(labels)), key=lambda i: x[labels[i]])

            return id2label[max_sent]
    return [get_vader_sentiment(x) for x in outs]

analyzers = {
    "pysentimiento": pysentimiento_analyzer,
    "tweetnlp": tweetnlp_analyzer,
    "stanza": stanza_analyzer,
    "textblob": textblob_analyzer,
    "vader": vader_analyzer
}


In [None]:
from sklearn.metrics import classification_report

results = []

for ds_name, dataset in tqdm(benchmark_datasets):
    preds = {
        k: analyzer(dataset) for k, analyzer in analyzers.items()
    }
    id2label = dataset.features["label"].names
    label2id = {v: k for k, v in enumerate(id2label)}


    for name, pred in tqdm(list(preds.items())):
        print(name)
        true_labels = dataset["label"]
        pred_labels = [label2id[x] for x in pred]

        ret = classification_report(true_labels, pred_labels, target_names=id2label, output_dict=True)

        res = {
            "Model": name,
            "Dataset": ds_name,
            "Macro F1": ret["macro avg"]["f1-score"],
            "Macro Precision": ret["macro avg"]["precision"],
            "Macro Recall": ret["macro avg"]["recall"],
        }

        results.append(res)


In [None]:
import pandas as pd

df = pd.DataFrame(results)

(df.set_index(["Dataset", "Model"]) * 100).round(2)[["Macro Precision", "Macro Recall", "Macro F1"]]