In [1]:
%load_ext autoreload
%autoreload 2

from pysentimiento import create_analyzer
from textblob import TextBlob
from datasets import load_dataset
import random
import tweetnlp
import stanza
from tqdm.auto import tqdm
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

vader = SentimentIntensityAnalyzer()






benchmark_datasets = {
    "sentiment": ["stanfordnlp/sst2", "takala/financial_phrasebank"]
}

#pysentimient + tweetnlp + stanza
analyzer = create_analyzer("sentiment", lang="en")
model = tweetnlp.load_model('sentiment')  # Or `model = tweetnlp.Sentiment()`
nlp = stanza.Pipeline(lang='en', processors='tokenize,sentiment', tokenize_no_ssplit=True)

def pysentimiento_analyzer(dataset):
    id2label = dataset.features["label"].names

    outs = analyzer.predict(dataset["sentence"])

    if len(id2label) == 2:
        # Only positive/negative
        return ["negative" if x.probas["NEG"] > x.probas["POS"] else "positive" for x in outs]
    else:
        translation = {"NEU": "neutral", "POS": "positive", "NEG": "negative"}
        return [translation[x.output] for x in outs]

def stanza_analyzer(dataset):
    id2label = dataset.features["label"].names
    outs = nlp(dataset["sentence"])

    def _get_sentiment(x):
        if x.sentiment == 0:
            return "negative"
        elif x.sentiment == 2:
            return "positive"
        elif len(id2label) == 2:
            # Flip a coin
            if random.random() > 0.5:
                return "positive"
            else:
                return "negative"
        else:
            return "neutral"

    return [_get_sentiment(x) for x in outs.sentences]

def tweetnlp_analyzer(dataset):
    id2label = dataset.features["label"].names
    outs = model.predict(dataset["sentence"])
    def get_tweetnlp_sentiment(x):
        if x["label"] in {"positive", "negative"}:
            return x["label"]
        elif len(id2label) == 2:
            # Flip a coin
            if random.random() > 0.5:
                return "positive"
            else:
                return "negative"
        else:
            return "neutral"

    return [get_tweetnlp_sentiment(x) for x in outs]

def textblob_analyzer(dataset, threshold=0.1):
    id2label = dataset.features["label"].names
    outs = [TextBlob(x).sentiment.polarity for x in dataset["sentence"]]

    def get_textblob_sentiment(x):
        if len(id2label) == 2:
            if x > 0:
                return "positive"
            else:
                return "negative"
        else:
            if x > threshold:
                return "positive"
            elif x < -threshold:
                return "negative"
            else:
                return "neutral"

    return [get_textblob_sentiment(x) for x in outs]

def vader_analyzer(dataset):
    id2label = dataset.features["label"].names
    outs = [vader.polarity_scores(x) for x in dataset["sentence"]]

    def get_vader_sentiment(x):
        if len(id2label) == 2:
            if x["pos"] > x["neg"]:
                return "positive"
            else:
                return "negative"
        else:
            labels = ["neg", "neu", "pos"]

            # get argmax
            max_sent = max(range(len(labels)), key=lambda i: x[labels[i]])

            return id2label[max_sent]
    return [get_vader_sentiment(x) for x in outs]

analyzers = {
    "pysentimiento": pysentimiento_analyzer,
    "tweetnlp": tweetnlp_analyzer,
    "stanza": stanza_analyzer,
    "textblob": textblob_analyzer,
    "vader": vader_analyzer
}

ds = load_dataset(benchmark_datasets["sentiment"][0])

ds

2024-06-15 14:43:17,617	INFO util.py:154 -- Outdated packages:
  ipywidgets==7.8.1 found, needs ipywidgets>=8
Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
2024-06-15 14:43:17,767	INFO util.py:154 -- Outdated packages:
  ipywidgets==7.8.1 found, needs ipywidgets>=8
Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
loading configuration file https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment-latest/resolve/main/config.json from cache at /users/jmperez/.cache/huggingface/transformers/c26252806565e705085b65f69d7d544c05112fee06744845d6c067efcb278fff.31fdd4298ba667598119e493f82afb18fcd41f96366700ec7d6460c17c421feb
Model config RobertaConfig {
  "_name_or_path": "cardiffnlp/twitter-roberta-base-sentiment-latest",
  "architectures": [
    "RobertaForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gr

Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.8.0.json:   0%|   …

2024-06-15 14:43:29 INFO: Downloaded file to /users/jmperez/stanza_resources/resources.json
INFO:stanza:Downloaded file to /users/jmperez/stanza_resources/resources.json
2024-06-15 14:43:29 INFO: Loading these models for language: en (English):
| Processor | Package        |
------------------------------
| tokenize  | combined       |
| mwt       | combined       |
| sentiment | sstplus_charlm |

INFO:stanza:Loading these models for language: en (English):
| Processor | Package        |
------------------------------
| tokenize  | combined       |
| mwt       | combined       |
| sentiment | sstplus_charlm |

2024-06-15 14:43:29 INFO: Using device: cuda
INFO:stanza:Using device: cuda
2024-06-15 14:43:29 INFO: Loading: tokenize
INFO:stanza:Loading: tokenize
2024-06-15 14:43:29 INFO: Loading: mwt
INFO:stanza:Loading: mwt
2024-06-15 14:43:29 INFO: Loading: sentiment
INFO:stanza:Loading: sentiment
2024-06-15 14:43:30 INFO: Done loading processors!
INFO:stanza:Done loading processors!


DatasetDict({
    train: Dataset({
        features: ['idx', 'sentence', 'label'],
        num_rows: 67349
    })
    validation: Dataset({
        features: ['idx', 'sentence', 'label'],
        num_rows: 872
    })
    test: Dataset({
        features: ['idx', 'sentence', 'label'],
        num_rows: 1821
    })
})

In [2]:
import random
preds = {
    k: analyzer(ds["validation"]) for k, analyzer in analyzers.items()
}


Map:   0%|          | 0/872 [00:00<?, ? examples/s]

The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 872
  Batch size = 32


In [3]:
from sklearn.metrics import classification_report
import pandas as pd

id2label = ["negative", "positive"]
label2id = {label: i for i, label in enumerate(id2label)}


results = {}
for name, pred in preds.items():
    print(name)
    true_labels = ds["validation"]["label"]
    pred_labels = [label2id[x] for x in pred]

    ret = classification_report(true_labels, pred_labels, target_names=id2label, output_dict=True)

    res = {
        "Negative F1": ret["negative"]["f1-score"],
        "Positive F1": ret["positive"]["f1-score"],
        "Macro F1": ret["macro avg"]["f1-score"],
        "Macro Precision": ret["macro avg"]["precision"],
        "Macro Recall": ret["macro avg"]["recall"],
    }
    results[name] = res


df = pd.DataFrame(results).T

# Sort by "Macro F1"
df = df.sort_values("Macro F1", ascending=False)

df * 100

pysentimiento
tweetnlp
stanza
textblob
vader


Unnamed: 0,Negative F1,Positive F1,Macro F1,Macro Precision,Macro Recall
pysentimiento,87.573964,88.320356,87.94716,87.990882,87.931506
stanza,83.274021,84.350721,83.812371,83.864649,83.797466
tweetnlp,81.573034,80.796253,81.184643,81.358885,81.257893
vader,64.179104,69.361702,66.770403,67.171414,66.851899
textblob,62.6109,69.109948,65.860424,66.500154,66.017302



## Financial Phrasebank

In [4]:


ds = load_dataset("takala/financial_phrasebank", "sentences_66agree")
print(ds)
preds = {
    k: analyzer(ds["train"]) for k, analyzer in analyzers.items()
}


DatasetDict({
    train: Dataset({
        features: ['sentence', 'label'],
        num_rows: 4217
    })
})


Map:   0%|          | 0/4217 [00:00<?, ? examples/s]

The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 4217
  Batch size = 32


In [8]:
from sklearn.metrics import classification_report


results = {}
id2label = ds["train"].features["label"].names

label2id = {label: i for i, label in enumerate(id2label)}
for name, pred in preds.items():
    print(name)
    true_labels = ds["train"]["label"]
    pred_labels = [label2id[x] for x in pred]

    ret = classification_report(true_labels, pred_labels, target_names=id2label, output_dict=True)

    res = {
        "Negative F1": ret["negative"]["f1-score"],
        "Positive F1": ret["positive"]["f1-score"],
        "Macro F1": ret["macro avg"]["f1-score"],
        "Macro Precision": ret["macro avg"]["precision"],
        "Macro Recall": ret["macro avg"]["recall"],
    }
    results[name] = res


df = pd.DataFrame(results).T

df.sort_values("Macro F1", ascending=False) * 100

df

pysentimiento
tweetnlp
stanza
textblob
vader


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Negative F1,Positive F1,Macro F1,Macro Precision,Macro Recall
pysentimiento,0.637441,0.609615,0.682352,0.750389,0.645194
tweetnlp,0.513834,0.466513,0.588266,0.73207,0.548265
stanza,0.313076,0.324654,0.441454,0.463273,0.444095
textblob,0.289238,0.377609,0.448512,0.466919,0.439714
vader,0.0,0.00341,0.251322,0.333713,0.33351


In [None]:
results