In [10]:
from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer, AutoConfig
import numpy as np
from scipy.special import softmax

In [5]:
class sentimentAnalysis:
    def __init__(self, model_name):
        self.model = self.__get_model(model_name)
        self.tokenizer = self.__get_tokenizer(model_name)
        self.config = self.__get_config(model_name)
        self.model_name = model_name
    
    def __get_model(self,model_name):
        return AutoModelForSequenceClassification.from_pretrained(model_name)

    def __get_tokenizer(self,model_name):
        return AutoTokenizer.from_pretrained(model_name)

    def __get_config(self,model_name):
        return AutoConfig.from_pretrained(model_name)

    def preprocess(self,text):
        new_text = []
        for t in text.split(" "):
            t = '@user' if t.startswith('@') and len(t) > 1 else t
            t = 'http' if t.startswith('http') else t
            new_text.append(t)
        return " ".join(new_text)

    def get_sentiment(self,text):
        ptext = self.preprocess(text)
        encoded_input = self.tokenizer(ptext, return_tensors='pt')
        output = self.model(**encoded_input)
        scores = output[0][0].detach().numpy()
        scores = softmax(scores)
        ranking = np.argsort(scores)
        ranking = ranking[::-1]
        return self.config.id2label[ranking[0]]    


In [6]:
MODEL = f"cardiffnlp/twitter-roberta-base-sentiment-latest"

sentiment_classifier = sentimentAnalysis(MODEL)

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [8]:
text_arr = ["i like meat",
            "i hate meat",
            "i love meat",
            "math is hard",
            "life is hard"]
result = {}
for i in text_arr:
    result[i] = sentiment_classifier.get_sentiment(i)
print(result)

{'i like meat': 'positive', 'i hate meat': 'negative', 'i love meat': 'positive', 'math is hard': 'negative', 'life is hard': 'negative'}
