In [1]:
!pip install transformers

Defaulting to user installation because normal site-packages is not writeable



[notice] A new release of pip is available: 23.2.1 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer, AutoConfig
import numpy as np
from scipy.special import softmax

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def preprocess(text):
    new_text = []
    for t in text.split(" "):
        t = '@user' if t.startswith('@') and len(t) > 1 else t
        t = 'http' if t.startswith('http') else t
        new_text.append(t)
    return " ".join(new_text)

In [3]:
MODEL = f"cardiffnlp/twitter-roberta-base-sentiment-latest"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
config = AutoConfig.from_pretrained(MODEL)

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [4]:
model = AutoModelForSequenceClassification.from_pretrained(MODEL)
model.save_pretrained("model/cardiffnlp/twitter-roberta-base-sentiment-latest")

  return self.fget.__get__(instance, owner)()
Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [7]:
def analyze_multiple_texts(texts, tokenizer, model, config, delimiter="."):
    individual_texts = texts.split(delimiter)
    result_str = ""

    for text in individual_texts:
        text = preprocess(text.strip())  # Preprocess and strip extra spaces
        if text:  # Check if the text is not empty
            encoded_input = tokenizer(text, return_tensors='pt')
            output = model(**encoded_input)
            scores = output[0][0].detach().numpy()
            scores = softmax(scores)
            ranking = np.argsort(scores)
            ranking = ranking[::-1]
            
            result_str += f"Text: {text}\n"
            for i in range(scores.shape[0]):
                label = config.id2label[ranking[i]]
                score = scores[ranking[i]]
                result_str += f"{i+1}) {label} {np.round(float(score), 4)}\n"
            result_str += "\n"  # Separate results for each text with a newline

    return result_str.strip()

In [8]:
texts = "Covid cases are increasing fast!. "
result = analyze_multiple_texts(texts, tokenizer, model, config)
print(result)

Text: Covid cases are increasing fast!
1) negative 0.7236
2) neutral 0.2287
3) positive 0.0477


In [9]:
texts = "The new vaccine is showing promising results.\nClimate change is accelerating.\nAI technology is advancing rapidly."
result = analyze_multiple_texts(texts, tokenizer, model, config)
print(result)

Text: The new vaccine is showing promising results
1) positive 0.9772
2) neutral 0.0207
3) negative 0.0021

Text: Climate change is accelerating
1) negative 0.5289
2) neutral 0.4075
3) positive 0.0636

Text: AI technology is advancing rapidly
1) positive 0.9467
2) neutral 0.0474
3) negative 0.0059


In [10]:
texts = "The weather today is mild. The meeting is scheduled for 3 PM. The book is on the table."
result = analyze_multiple_texts(texts, tokenizer, model, config)
print(result)

Text: The weather today is mild
1) positive 0.5173
2) neutral 0.4593
3) negative 0.0234

Text: The meeting is scheduled for 3 PM
1) neutral 0.951
2) positive 0.036
3) negative 0.013

Text: The book is on the table
1) neutral 0.8828
2) positive 0.0915
3) negative 0.0258
