In [3]:
from scipy.special import softmax
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from tqdm.notebook import tqdm
import torch
import numpy as np

### Model Selected Roberta

In [2]:
##Reference https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment-latest

MODEL = f"cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForSequenceClassification.from_pretrained(MODEL)

Downloading (…)lve/main/config.json:   0%|          | 0.00/747 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

### This is just an example

In [4]:
text = "Covid cases are increasing fast!"
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)
scores = output[0][0].detach().numpy()
scores = softmax(scores)


In [5]:
# Run for Roberta Model
encoded_text = tokenizer(text, return_tensors='pt')
output = model(**encoded_text)
scores = output[0][0].detach().numpy()
scores = softmax(scores)
scores_dict = {
    'roberta_neg' : scores[0],
    'roberta_neu' : scores[1],
    'roberta_pos' : scores[2]
}
print(scores_dict)

{'roberta_neg': 0.20352869, 'roberta_neu': 0.58220696, 'roberta_pos': 0.21426432}


### Here is the real application

In [1]:
import pandas as pd

df = pd.read_excel('https://raw.githubusercontent.com/kmiloR2/Maestria/main/feedback-examples.xlsx')
df.head()

Unnamed: 0,Feedback
0,I want to talk to you about your work on this ...
1,I want to talk to you about your priorities. I...
2,Reaching your goal of [name the goal] is a big...
3,One of your most impactful moments was how you...
4,It’s clear you’re excited about the project. B...


In [13]:
def polarity_scores_roberta(text):
    encoded_input = tokenizer(text, return_tensors='pt')
    output = model(**encoded_input)
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)
    return {
        'roberta_neg' : scores[0],
        'roberta_neu' : scores[1],
        'roberta_pos' : scores[2]
    }

df['roberta_scores'] = df['Feedback'].apply(polarity_scores_roberta)

df[['roberta_neg', 'roberta_neu', 'roberta_pos']] = df['roberta_scores'].apply(pd.Series)

df.drop('roberta_scores', axis=1, inplace=True)

In [14]:
#The score determines the polarity of the comment
df['compound_score'] = df['roberta_pos'] - df['roberta_neg'] * 0.5 + 0.5 * df['roberta_neu']

In [15]:
import pandas as pd

def categorize_sentiment(compound_score):
    if compound_score > 0:
        return "positive"
    else:
        return "negative"

df['sentiment'] = df['compound_score'].apply(categorize_sentiment)

df


Unnamed: 0,Feedback,roberta_neg,roberta_neu,roberta_pos,compound_score,sentiment
0,I want to talk to you about your work on this ...,0.144024,0.444991,0.410985,0.561468,positive
1,I want to talk to you about your priorities. I...,0.010806,0.181307,0.807887,0.893138,positive
2,Reaching your goal of [name the goal] is a big...,0.001244,0.020576,0.978181,0.987846,positive
3,One of your most impactful moments was how you...,0.001208,0.012038,0.986754,0.992169,positive
4,It’s clear you’re excited about the project. B...,0.009261,0.181684,0.809055,0.895266,positive
5,"I want to congratulate you, not only for your ...",0.001117,0.011363,0.987521,0.992644,positive
6,I really appreciated how you kept me up to dat...,0.000972,0.006549,0.992478,0.995267,positive
7,I’ve noticed that you seem less engaged lately...,0.232038,0.628211,0.139751,0.337838,positive
8,“I’m curious about where we are with Y project...,0.106754,0.833012,0.060234,0.423363,positive
9,know that [name the project] is really import...,0.063524,0.557782,0.378694,0.625823,positive
