# 6.2 Sentiment Analysis

## 6.2.3 Sentiment analysis with LLM’s API

### Listing 6.2

In [2]:
# List of reviews to analyze (prepared manually). We want to limit the analysis to the first 500 non-empty reviews to save you some time and money.
import pandas as pd
df = pd.read_csv('olist_order_reviews_dataset.csv')
df = df.dropna(subset = ['review_comment_message'])[0:500]
reviews = list(df["review_comment_message"])

In [9]:
# Code snippet that utilizes the API for Chat-GPT-4

from openai import OpenAI

# Replace 'your_openai_api_key' with your actual OpenAI API key
client = OpenAI(
    api_key= "your-api-key",
)


def get_sentiment(review):
    response = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": f"The sentiment of this review is: {review}",
            }
        ],
        model="gpt-4-0125-preview",
    )
    completion = response.choices[0].message.content
    if "positive" in completion:
        return "positive"
    elif "neutral" in completion:
        return "neutral"
    elif "negative" in completion:
        return "negative"
    else:
        return "unknown"

# Analyze the reviews and store the output (manually adapted)
sentiments = []
for review in reviews:
    sentiments.append(get_sentiment(review))

df["GPT4"] = sentiments

In [13]:
# Simple keywords analysis performed in section 5.5.4 (Listing 5.6) and run on the set of the first 500 reviews.
keywords = [
    "excelente", "ótimo", "maravilhoso", "incrível", "fantástico",
    "perfeito", "bom", "eficiente", "durável", "confiável",
    "rápido", "custo-benefício", "recomendo", "satisfeito",
    "surpreendente", "confortável", "fácil de usar", "funcional",
    "melhor", "vale a pena"
]

# Second version of the keyword search function proposed by ChatGPT that copes with NaNs in the input.
def is_positive(review, keywords):
    if not isinstance(review, str):
        return False

    for keyword in keywords:
        if keyword.lower() in review.lower():
            return True
    return False

# Applying the function to the test DataFrame (adapted).
df['keyword_sentiment'] = df['review_comment_message'].apply(lambda x: is_positive(x, keywords))

In [14]:
###
# Assessing quality of the sentiment analysis based on keywords.

# Extract records with positive reviews assessed by sentiment analysis and by review scores.
posrev_senti = df[df['keyword_sentiment']==True]
posrev_score = df[(df['review_score']==5)|(df['review_score']==4)]

# Perform set operations to determine true positives (TP), false positives (FP), false negatives (FN) and true negatives (TN).
TP = pd.merge(posrev_senti, posrev_score)
FP = posrev_senti[posrev_senti["review_id"].isin(posrev_score["review_id"]) == False]
FN = posrev_score[posrev_score["review_id"].isin(posrev_senti["review_id"]) == False]
TN = df[(df["review_id"].isin(posrev_senti["review_id"]) == False) & (df["review_id"].isin(posrev_score["review_id"]) == False)]

# Calculate sensitivity and specificity
print("Quality of the basic keyword search:")
print("Sensitivity: ", round(len(TP) / (len(TP) + len(FN)),2))
print("Specificity: ", round(len(TN) / (len(TN) + len(FP)),2))

###
# Assessing quality of the sentiment analysis based on ChatGPT-4 language model.

# Extract records with positive reviews assessed by sentiment analysis and by review scores.
posrev_senti = df[df['GPT4']=='positive']
posrev_score = df[(df['review_score']==5)|(df['review_score']==4)]

# Perform set operations to determine true positives (TP), false positives (FP), false negatives (FN) and true negatives (TN).
TP = pd.merge(posrev_senti, posrev_score)
FP = posrev_senti[posrev_senti["review_id"].isin(posrev_score["review_id"]) == False]
FN = posrev_score[posrev_score["review_id"].isin(posrev_senti["review_id"]) == False]
TN = df[(df["review_id"].isin(posrev_senti["review_id"]) == False) & (df["review_id"].isin(posrev_score["review_id"]) == False)]

# Calculate sensitivity and specificity
print("Quality of the GPT-4 direct sentiment analysis:")
print("Sensitivity: ", round(len(TP) / (len(TP) + len(FN)),2))
print("Specificity: ", round(len(TN) / (len(TN) + len(FP)),2))

Quality of the basic keyword search:
Sensitivity:  0.45
Specificity:  0.9
Quality of the GPT-4 direct sentiment analysis:
Sensitivity:  0.74
Specificity:  0.93


In [15]:
# Printing out the number of positive, negative and unknown/neutral annotations
print("\nReview score:")
print(df["review_score"].value_counts())
print("\nKeyword sentiment analysis:")
print(df["keyword_sentiment"].value_counts())
print("\nGPT4 sentiment analysis:")
print(df["GPT4"].value_counts())


Review score:
5    249
1    106
4     71
3     51
2     23
Name: review_score, dtype: int64

Keyword sentiment analysis:
False    337
True     163
Name: keyword_sentiment, dtype: int64

GPT4 sentiment analysis:
positive    250
unknown     165
negative     78
neutral       7
Name: GPT4, dtype: int64


## 6.2.5 Sentiment analysis with a suboptimal model
### Listing 6.2

The code proposed by ChatGPT works well. The correct label for positive reviews was added manually based on the FinBERT-PT-BR model documentation. 

In [10]:
import pandas as pd
from transformers import pipeline

# Assuming df is your DataFrame and it has a column named 'review_comment_message'

# Load the sentiment analysis pipeline with the FinBERT-PT-BR model
classifier = pipeline("sentiment-analysis", model="lucas-leme/FinBERT-PT-BR")

def get_sentiment(review):
    try:
        result = classifier(review)[0]
        return result['label'], result['score']
    except Exception as e:
        print(f"Error processing review: {e}")
        return None, None

# Apply the sentiment analysis to each review
df['sentiment'], df['score'] = zip(*df['review_comment_message'].map(get_sentiment))

# Filter the DataFrame to only include positive reviews
positive_reviews_df = df[df['sentiment'] == 'POSITIVE']  # Adjust label as necessary based on model output

# Now positive_reviews_df contains only the positive reviews

# Clean the df dataframe (manually added)
df = df.drop(['sentiment', 'score'], axis=1)

In [11]:
###
# Assessing quality of the sentiment analysis based on FinBERT-PT-BR model.

# Extract records with positive reviews assessed by sentiment analysis and by review scores.
posrev_senti = positive_reviews_df
posrev_score = df[(df['review_score']==5)|(df['review_score']==4)]

# Perform set operations to determine true positives (TP), false positives (FP), false negatives (FN) and true negatives (TN).
TP = pd.merge(posrev_senti, posrev_score)
FP = posrev_senti[posrev_senti["review_id"].isin(posrev_score["review_id"]) == False]
FN = posrev_score[posrev_score["review_id"].isin(posrev_senti["review_id"]) == False]
TN = df[(df["review_id"].isin(posrev_senti["review_id"]) == False) & (df["review_id"].isin(posrev_score["review_id"]) == False)]

# Calculate sensitivity and specificity
print("Quality of the FinBERT-PT-BR sentiment analysis:")
print("Sensitivity: ", round(len(TP) / (len(TP) + len(FN)),2))
print("Specificity: ", round(len(TN) / (len(TN) + len(FP)),2))

print("Nr of reviews classified as positive: ", len(positive_reviews_df))

Quality of the FinBERT-PT-BR sentiment analysis:
Sensitivity:  0.56
Specificity:  0.93
Nr of reviews classified as positive:  193


## 6.2.6 Sentiment analysis on translated inputs
### Listing 6.3
The code translates the Portuguese input to English using Meta m2m100_418M model. It utilizes the distilbert-base-uncased-finetuned-sst-2-english model for sentiment analysis. The part to assess the output quality was added manually.

In [3]:
import pandas as pd
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer, pipeline

# Assuming df is your DataFrame and it has a column named 'review_comment_message'

# Initialize the M2M100 tokenizer and model for translation
tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")

# Initialize the sentiment analysis pipeline
sentiment_pipeline = pipeline('sentiment-analysis', model='distilbert-base-uncased-finetuned-sst-2-english')

def translate_review(review):
    # Specify the source and target language
    tokenizer.src_lang = "pt"
    encoded_pt = tokenizer(review, return_tensors="pt")
    generated_tokens = model.generate(**encoded_pt, forced_bos_token_id=tokenizer.get_lang_id("en"))
    translated_review = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
    return translated_review

def analyze_sentiment(review):
    result = sentiment_pipeline(review)[0]
    return result['label'], result['score']

# Translate reviews from Portuguese to English
df['translated_review'] = df['review_comment_message'].apply(translate_review)

# Apply sentiment analysis to the translated reviews
df['sentiment'], df['score'] = zip(*df['translated_review'].apply(analyze_sentiment))

# Filter the DataFrame to only include reviews with positive sentiment
positive_reviews_df = df[df['sentiment'] == 'POSITIVE']

# positive_reviews_df now contains only the positive reviews, translated into English

# Clean the df dataframe (manually added)
df = df.drop(['sentiment', 'score'], axis=1)

In [4]:
###
# Assessing quality of the sentiment analysis with a translated input and classifier for English.

# Extract records with positive reviews assessed by sentiment analysis and by review scores.
posrev_senti = positive_reviews_df
posrev_score = df[(df['review_score']==5)|(df['review_score']==4)]

# Perform set operations to determine true positives (TP), false positives (FP), false negatives (FN) and true negatives (TN).
TP = pd.merge(posrev_senti, posrev_score)
FP = posrev_senti[posrev_senti["review_id"].isin(posrev_score["review_id"]) == False]
FN = posrev_score[posrev_score["review_id"].isin(posrev_senti["review_id"]) == False]
TN = df[(df["review_id"].isin(posrev_senti["review_id"]) == False) & (df["review_id"].isin(posrev_score["review_id"]) == False)]

# Calculate sensitivity and specificity
print("Quality of the sentiment analysis with English translations:")
print("Sensitivity: ", round(len(TP) / (len(TP) + len(FN)),2))
print("Specificity: ", round(len(TN) / (len(TN) + len(FP)),2))

print("Nr of reviews classified as positive: ", len(positive_reviews_df))

Quality of the sentiment analysis with English translations:
Sensitivity:  0.86
Specificity:  0.89
Nr of reviews classified as positive:  295


## 6.2.7 Sentiment analysis with multilingual models
### Listing 6.4
The code utilizes the multilingual model to assess sentiment of the reviews. The code required manual adaptation of the label used for positive reviews. The part for checking the quality of the output was added manually.

In [5]:
import pandas as pd
from transformers import pipeline

# Assuming df is your DataFrame and it has a column named 'review_comment_message'

# Initialize the sentiment analysis pipeline with the multilingual model
sentiment_pipeline = pipeline('sentiment-analysis', model='cardiffnlp/twitter-xlm-roberta-base-sentiment')

def analyze_sentiment_multilingual(text):
    result = sentiment_pipeline(text)[0]
    return result['label'], result['score']

# Apply sentiment analysis to the reviews
df['sentiment'], df['score'] = zip(*df['review_comment_message'].apply(analyze_sentiment_multilingual))

# Filter the DataFrame to only include positive reviews
# Note: The labels returned by this model are 'LABEL_0' (negative), 'LABEL_1' (neutral), and 'LABEL_2' (positive).
positive_reviews_df = df[df['sentiment'] == 'positive']  # Addapted manually

# positive_reviews_df now contains only the reviews classified as positive

# Clean the df dataframe (manually added)
df = df.drop(['sentiment', 'score'], axis=1)

In [9]:
###
# Assessing quality of the sentiment analysis with a multilingual model.

# Extract records with positive reviews assessed by sentiment analysis and by review scores.
posrev_senti = positive_reviews_df
posrev_score = df[(df['review_score']==5)|(df['review_score']==4)]

# Perform set operations to determine true positives (TP), false positives (FP), false negatives (FN) and true negatives (TN).
TP = pd.merge(posrev_senti, posrev_score)
FP = posrev_senti[posrev_senti["review_id"].isin(posrev_score["review_id"]) == False]
FN = posrev_score[posrev_score["review_id"].isin(posrev_senti["review_id"]) == False]
TN = df[(df["review_id"].isin(posrev_senti["review_id"]) == False) & (df["review_id"].isin(posrev_score["review_id"]) == False)]

# Calculate sensitivity and specificity
print("Quality of the sentiment analysis with a multilingual model:")
print("Sensitivity: ", round(len(TP) / (len(TP) + len(FN)),2))
print("Specificity: ", round(len(TN) / (len(TN) + len(FP)),2))

print("Nr of reviews classified as positive: ", len(positive_reviews_df))

Quality of the sentiment analysis with a multilingual model:
Sensitivity:  0.79
Specificity:  0.93
Nr of reviews classified as positive:  265


## 6.2.8 Sentiment analysis with zero-shot learning models
### Listing 6.5
The code utilizes a zero-shot learning model proposed by Generative AI to assess sentiment of the reviews. The code required manual adaptation to our input data. The part for checking the quality of the output was added manually.

In [7]:
from transformers import pipeline
import pandas as pd

# Load the zero-shot classification pipeline
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

# Specify the candidate labels
candidate_labels = ["positive", "negative"]

# Define a function to classify a single review
def classify_review(review):
    result = classifier(review, candidate_labels=candidate_labels, hypothesis_template="This review is {}.", multi_label=False)
    return result['labels'][0]

# Apply the classification to each review
df['sentiment'] = df['review_comment_message'].apply(classify_review)

# Filter the DataFrame to only include positive reviews
positive_reviews_df = df[df['sentiment'] == 'positive']

# Clean the df dataframe (manually added)
df = df.drop(['sentiment'], axis=1)

In [9]:
###
# Assessing quality of the sentiment analysis with a zero-shot learning model

# Extract records with positive reviews assessed by sentiment analysis and by review scores.
posrev_senti = positive_reviews_df
posrev_score = df[(df['review_score']==5)|(df['review_score']==4)]

# Perform set operations to determine true positives (TP), false positives (FP), false negatives (FN) and true negatives (TN).
TP = pd.merge(posrev_senti, posrev_score)
FP = posrev_senti[posrev_senti["review_id"].isin(posrev_score["review_id"]) == False]
FN = posrev_score[posrev_score["review_id"].isin(posrev_senti["review_id"]) == False]
TN = df[(df["review_id"].isin(posrev_senti["review_id"]) == False) & (df["review_id"].isin(posrev_score["review_id"]) == False)]

# Calculate sensitivity and specificity
print("Quality of the sentiment analysis with a zero-shot learning model:")
print("Sensitivity: ", round(len(TP) / (len(TP) + len(FN)),2))
print("Specificity: ", round(len(TN) / (len(TN) + len(FP)),2))

print("Nr of reviews classified as positive: ", len(positive_reviews_df))

Quality of the sentiment analysis with a zero-shot learning model:
Sensitivity:  0.87
Specificity:  0.74
Nr of reviews classified as positive:  325


# 6.3 Text summarization
## 6.3.4 Summarizing text with dedicated libraries
### Listing 6.6

Python code utilizing the frequency-based approach to generate summaries of very short customer reviews in Portuguese. Proposed by ChatGPT. The input was adapted manually.

In [16]:
import pandas as pd
import nltk
import string
from collections import Counter

# Download the required NLTK resources
nltk.download('punkt')
nltk.download('stopwords')

# Sample data (manually adapted to remove empty records)
df = pd.read_csv('olist_order_reviews_dataset.csv')
df = df.dropna(subset = ['review_comment_message'])

# Function to tokenize and remove stopwords
def preprocess(text):
    stopwords = nltk.corpus.stopwords.words('portuguese')
    tokens = nltk.word_tokenize(text.lower())
    tokens = [token for token in tokens if token not in string.punctuation and token not in stopwords]
    return tokens

# Function to create word frequency distribution
def word_frequency(tokens):
    frequency = Counter(tokens)
    return frequency

# Function to summarize short reviews
def summarize_reviews(text, num_keywords=3):
    tokens = preprocess(text)
    frequency = word_frequency(tokens)
    important_words = [word for word, count in frequency.most_common(num_keywords)]
    summary = ' '.join(important_words)
    return summary

# Apply the function to the DataFrame
df['summary'] = df['review_comment_message'].apply(summarize_reviews)

# Display the results (manually adapted to print the summary of the longest message)
print("Longest review:", df.loc[1316]["review_comment_message"])
print("Summary:", df.loc[1316]["summary"])

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\marle\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\marle\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Longest review: NÃO RECEBI O PRODUTO, O PRODUTO CONSTA COMO ENVIADO PARA O CORREIO DE RIBEIRÃO PRETO. O CORREIO NÃO RECEBEU O PRODUTO. ENVIE VARIAS MENSAGEM PARA A targaryen E NÃO OBTIVE. ESTA targaryen ESTA SUJANDO SEU NOME
Summary: produto correio targaryen


## 6.3.5 Topic modeling
### Listing 6.7

Python code proposed by ChatGPT to perform topic modelling of negative customer reviews in Portuguese. The code was manually adapted to our input data. Only negative reviews with review_scores of 1 or 2 were used in the analysis. The output was manually adapted to display 7 words for each topic.

In [5]:
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import SnowballStemmer
import re

# Load data. Only negative reviews were chosen for the analysis (adapted manually).
df = pd.read_csv('olist_order_reviews_dataset.csv')
df = df.dropna(subset = ['review_comment_message'])
df = df[(df["review_score"]==1) | (df["review_score"]==2)]

# Preprocess the text
def preprocess_text(text, language='portuguese'):
    # Remove special characters, convert to lowercase
    cleaned_text = re.sub(r'[^\w\s]', '', text.lower())

    # Tokenize words
    words = word_tokenize(cleaned_text, language=language)

    # Remove stopwords
    stop_words = set(stopwords.words(language))
    words = [word for word in words if word not in stop_words]

    # Apply stemming
    stemmer = SnowballStemmer(language)
    words = [stemmer.stem(word) for word in words]

    return words

df['preprocessed_reviews'] = df['review_comment_message'].apply(preprocess_text)

# Loading the model.
from gensim.corpora import Dictionary
from gensim.models import LdaModel

# Create a dictionary and corpus for LDA
dictionary = Dictionary(df['preprocessed_reviews'])
corpus = [dictionary.doc2bow(text) for text in df['preprocessed_reviews']]

# Train an LDA model
num_topics = 5  # Adjust this value according to the desired number of topics
lda_model = LdaModel(corpus, num_topics=num_topics, id2word=dictionary, random_state=42)

# Displaying results (manually adapted to display 7 words).
def display_topics(model, num_topics, num_words=7):
    for idx, topic in model.print_topics(num_topics, num_words):
        print(f"Topic {idx + 1}: {topic}\n")

display_topics(lda_model, num_topics)

Topic 1: 0.055*"compr" + 0.043*"receb" + 0.032*"produt" + 0.020*"2" + 0.019*"entreg" + 0.017*"apen" + 0.017*"ped"

Topic 2: 0.043*"entreg" + 0.035*"produt" + 0.035*"compr" + 0.028*"receb" + 0.021*"agor" + 0.020*"praz" + 0.013*"falt"

Topic 3: 0.095*"produt" + 0.046*"receb" + 0.021*"entreg" + 0.017*"compr" + 0.016*"aind" + 0.016*"vei" + 0.015*"quer"

Topic 4: 0.070*"produt" + 0.066*"entreg" + 0.040*"cheg" + 0.024*"aind" + 0.024*"dia" + 0.023*"praz" + 0.019*"receb"

Topic 5: 0.052*"produt" + 0.035*"receb" + 0.035*"compr" + 0.032*"vei" + 0.017*"nao" + 0.012*"cheg" + 0.011*"entreg"

