In [1]:
pip install -q transformers

Note: you may need to restart the kernel to use updated packages.


In [2]:
from transformers import pipeline
import pandas as pd

sentiment_pipeline = pipeline(model="finiteautomata/bertweet-base-sentiment-analysis")

emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0


### Read database

In [3]:
df = pd.read_excel('https://raw.githubusercontent.com/kmiloR2/Maestria/main/feedback-examples.xlsx')
df.head()

Unnamed: 0,Feedback,Label
0,I want to talk to you about your work on this ...,Negative
1,I want to talk to you about your priorities. I...,Positive
2,Reaching your goal of [name the goal] is a big...,Positive
3,One of your most impactful moments was how you...,Positive
4,It’s clear you’re excited about the project. B...,Negative


### Modelo BERT

In [4]:
df["Sentiment_BERT"] = df["Feedback"].apply(lambda feedback: sentiment_pipeline(feedback))

# Extract the sentiment labels and scores from the model output
df["Sentiment_Label_BERT"] = df["Sentiment_BERT"].apply(lambda sentiment: sentiment[0]['label'])

# Crear un diccionario de mapeo de etiquetas
label_mapping = {
    "POS": "Positive",
    "NEG": "Negative",
    "NEU": "Neutral"
}

# Aplicar la función de mapeo y agregar la nueva columna "Label_BERT"
df["Label_BERT"] = df["Sentiment_Label_BERT"].apply(lambda label: label_mapping.get(label, label))

# Print the DataFrame with sentiment analysis results
df.head()

Unnamed: 0,Feedback,Label,Sentiment_BERT,Sentiment_Label_BERT,Label_BERT
0,I want to talk to you about your work on this ...,Negative,"[{'label': 'NEU', 'score': 0.7832766175270081}]",NEU,Neutral
1,I want to talk to you about your priorities. I...,Positive,"[{'label': 'POS', 'score': 0.8706808686256409}]",POS,Positive
2,Reaching your goal of [name the goal] is a big...,Positive,"[{'label': 'POS', 'score': 0.9895287156105042}]",POS,Positive
3,One of your most impactful moments was how you...,Positive,"[{'label': 'POS', 'score': 0.990766167640686}]",POS,Positive
4,It’s clear you’re excited about the project. B...,Negative,"[{'label': 'POS', 'score': 0.5218806266784668}]",POS,Positive


### Modelo Roberta

In [5]:
sentiment_pipeline = pipeline(model="cardiffnlp/twitter-roberta-base-sentiment")

In [6]:
df["Sentiment_ROBERTA"] = df["Feedback"].apply(lambda feedback: sentiment_pipeline(feedback))
df["Sentiment_Label_ROBERTA"] = df["Sentiment_ROBERTA"].apply(lambda sentiment: sentiment[0]['label'])
# Crear un diccionario de mapeo de etiquetas
label_mapping = {
    "LABEL_2": "Positive",
    "LABEL_0": "Negative",
    "LABEL_1": "Neutral"
}

# Aplicar la función de mapeo y agregar la nueva columna "Label_BERT"
df["Label_ROBERTA"] = df["Sentiment_Label_ROBERTA"].apply(lambda label: label_mapping.get(label, label))

# Print the DataFrame with sentiment analysis results
df.head()

Unnamed: 0,Feedback,Label,Sentiment_BERT,Sentiment_Label_BERT,Label_BERT,Sentiment_ROBERTA,Sentiment_Label_ROBERTA,Label_ROBERTA
0,I want to talk to you about your work on this ...,Negative,"[{'label': 'NEU', 'score': 0.7832766175270081}]",NEU,Neutral,"[{'label': 'LABEL_1', 'score': 0.4449909329414...",LABEL_1,Neutral
1,I want to talk to you about your priorities. I...,Positive,"[{'label': 'POS', 'score': 0.8706808686256409}]",POS,Positive,"[{'label': 'LABEL_2', 'score': 0.8078871369361...",LABEL_2,Positive
2,Reaching your goal of [name the goal] is a big...,Positive,"[{'label': 'POS', 'score': 0.9895287156105042}]",POS,Positive,"[{'label': 'LABEL_2', 'score': 0.9781804084777...",LABEL_2,Positive
3,One of your most impactful moments was how you...,Positive,"[{'label': 'POS', 'score': 0.990766167640686}]",POS,Positive,"[{'label': 'LABEL_2', 'score': 0.9867540001869...",LABEL_2,Positive
4,It’s clear you’re excited about the project. B...,Negative,"[{'label': 'POS', 'score': 0.5218806266784668}]",POS,Positive,"[{'label': 'LABEL_2', 'score': 0.8090545535087...",LABEL_2,Positive


### Modelo AFINN

This model works different versus the previous 2, this model is based on a predefined lexicon (dictionary), which means that asigns the label according to predefined value(postive or negative) assigned to each word. This means that would be necessary apply a cleainig on the database removing stops words that could intefere with the final result.

In [7]:
import re
import nltk
from nltk.corpus import stopwords
from tqdm import tqdm

def preprocess_text(text_data):
   
    punctuation = re.compile(r'[^\w\s]')
    
    english_stopwords = set(stopwords.words('english'))
  
    preprocessed_text = []
    for sentence in tqdm(text_data):
        # Converting lowercase
        sentence = sentence.lower()
        
        # Removing punctuations
        sentence = punctuation.sub('', sentence)
  
        # Removing stopwords and tokenizing
        preprocessed_text.append(' '.join(token for token in nltk.word_tokenize(sentence) if token not in english_stopwords))
  
    return preprocessed_text

In [8]:
#fill na

# preprocess text /remove stop words, punctuation, lower case
preprocessed_review = preprocess_text(df['Feedback'].values)
df['Feedback_2'] = preprocessed_review
df

100%|████████████████████████████████████████████████████████████████████████████████| 53/53 [00:00<00:00, 1558.70it/s]


Unnamed: 0,Feedback,Label,Sentiment_BERT,Sentiment_Label_BERT,Label_BERT,Sentiment_ROBERTA,Sentiment_Label_ROBERTA,Label_ROBERTA,Feedback_2
0,I want to talk to you about your work on this ...,Negative,"[{'label': 'NEU', 'score': 0.7832766175270081}]",NEU,Neutral,"[{'label': 'LABEL_1', 'score': 0.4449909329414...",LABEL_1,Neutral,want talk work last project delay impacted tea...
1,I want to talk to you about your priorities. I...,Positive,"[{'label': 'POS', 'score': 0.8706808686256409}]",POS,Positive,"[{'label': 'LABEL_2', 'score': 0.8078871369361...",LABEL_2,Positive,want talk priorities noticed well projects 2 3...
2,Reaching your goal of [name the goal] is a big...,Positive,"[{'label': 'POS', 'score': 0.9895287156105042}]",POS,Positive,"[{'label': 'LABEL_2', 'score': 0.9781804084777...",LABEL_2,Positive,reaching goal name goal big accomplishment rem...
3,One of your most impactful moments was how you...,Positive,"[{'label': 'POS', 'score': 0.990766167640686}]",POS,Positive,"[{'label': 'LABEL_2', 'score': 0.9867540001869...",LABEL_2,Positive,one impactful moments handled project x showed...
4,It’s clear you’re excited about the project. B...,Negative,"[{'label': 'POS', 'score': 0.5218806266784668}]",POS,Positive,"[{'label': 'LABEL_2', 'score': 0.8090545535087...",LABEL_2,Positive,clear youre excited project sometimes get exci...
5,"I want to congratulate you, not only for your ...",Positive,"[{'label': 'POS', 'score': 0.992424726486206}]",POS,Positive,"[{'label': 'LABEL_2', 'score': 0.9875206947326...",LABEL_2,Positive,want congratulate performance name specific ac...
6,I really appreciated how you kept me up to dat...,Positive,"[{'label': 'POS', 'score': 0.9926396608352661}]",POS,Positive,"[{'label': 'LABEL_2', 'score': 0.9924782514572...",LABEL_2,Positive,really appreciated kept date x project week he...
7,I’ve noticed that you seem less engaged lately...,Negative,"[{'label': 'NEU', 'score': 0.7238913774490356}]",NEU,Neutral,"[{'label': 'LABEL_1', 'score': 0.6282107830047...",LABEL_1,Neutral,ive noticed seem less engaged lately important...
8,“I’m curious about where we are with Y project...,Positive,"[{'label': 'NEU', 'score': 0.894446849822998}]",NEU,Neutral,"[{'label': 'LABEL_1', 'score': 0.8330121636390...",LABEL_1,Neutral,im curious project issues come best know soon ...
9,know that [name the project] is really import...,Positive,"[{'label': 'POS', 'score': 0.643831729888916}]",POS,Positive,"[{'label': 'LABEL_1', 'score': 0.5577819943428...",LABEL_1,Neutral,know name project really important youre excit...


In [9]:
from afinn import Afinn
afinn = Afinn()

#apply afinn score and polarity
df['sentiment_score'] = df['Feedback_2'].apply(lambda x: afinn.score(x))

scores = df['sentiment_score']
sentiment = ['Positive' if score >= 0
                          else 'Negative' 
                                  for score in scores]
df['Label_AFINN'] = sentiment

In [10]:
df.head()

Unnamed: 0,Feedback,Label,Sentiment_BERT,Sentiment_Label_BERT,Label_BERT,Sentiment_ROBERTA,Sentiment_Label_ROBERTA,Label_ROBERTA,Feedback_2,sentiment_score,Label_AFINN
0,I want to talk to you about your work on this ...,Negative,"[{'label': 'NEU', 'score': 0.7832766175270081}]",NEU,Neutral,"[{'label': 'LABEL_1', 'score': 0.4449909329414...",LABEL_1,Neutral,want talk work last project delay impacted tea...,4.0,Positive
1,I want to talk to you about your priorities. I...,Positive,"[{'label': 'POS', 'score': 0.8706808686256409}]",POS,Positive,"[{'label': 'LABEL_2', 'score': 0.8078871369361...",LABEL_2,Positive,want talk priorities noticed well projects 2 3...,8.0,Positive
2,Reaching your goal of [name the goal] is a big...,Positive,"[{'label': 'POS', 'score': 0.9895287156105042}]",POS,Positive,"[{'label': 'LABEL_2', 'score': 0.9781804084777...",LABEL_2,Positive,reaching goal name goal big accomplishment rem...,7.0,Positive
3,One of your most impactful moments was how you...,Positive,"[{'label': 'POS', 'score': 0.990766167640686}]",POS,Positive,"[{'label': 'LABEL_2', 'score': 0.9867540001869...",LABEL_2,Positive,one impactful moments handled project x showed...,7.0,Positive
4,It’s clear you’re excited about the project. B...,Negative,"[{'label': 'POS', 'score': 0.5218806266784668}]",POS,Positive,"[{'label': 'LABEL_2', 'score': 0.8090545535087...",LABEL_2,Positive,clear youre excited project sometimes get exci...,6.0,Positive


In [11]:
df.to_excel(r'C:\Users\USUARIO\Documents\Maestria\Prompt Engineering\Comparacion Modelos.xlsx', index=False)

### Determinación mejor modelo

In [12]:
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix

In [13]:
print('AFINN', classification_report(df['Label'],df['Label_AFINN']))

AFINN               precision    recall  f1-score   support

    Negative       0.80      0.27      0.40        15
    Positive       0.77      0.97      0.86        38

    accuracy                           0.77        53
   macro avg       0.79      0.62      0.63        53
weighted avg       0.78      0.77      0.73        53



In [14]:
print("AFIN\n",confusion_matrix(df['Label'],df['Label_AFINN']))

AFIN
 [[ 4 11]
 [ 1 37]]


In [15]:
print('ROBERTA', classification_report(df['Label'],df['Label_ROBERTA']))

ROBERTA               precision    recall  f1-score   support

    Negative       1.00      0.27      0.42        15
     Neutral       0.00      0.00      0.00         0
    Positive       0.89      0.84      0.86        38

    accuracy                           0.68        53
   macro avg       0.63      0.37      0.43        53
weighted avg       0.92      0.68      0.74        53



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [16]:
print("ROBERTA\n",confusion_matrix(df['Label'],df['Label_ROBERTA']))

ROBERTA
 [[ 4  7  4]
 [ 0  0  0]
 [ 0  6 32]]


In [17]:
print('BERT', classification_report(df['Label'],df['Label_BERT']))

BERT               precision    recall  f1-score   support

    Negative       1.00      0.27      0.42        15
     Neutral       0.00      0.00      0.00         0
    Positive       0.85      0.89      0.87        38

    accuracy                           0.72        53
   macro avg       0.62      0.39      0.43        53
weighted avg       0.89      0.72      0.74        53



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [18]:
print("BERT\n",confusion_matrix(df['Label'],df['Label_BERT']))

BERT
 [[ 4  5  6]
 [ 0  0  0]
 [ 0  4 34]]
