# Sentiment Analysis

In [None]:
import torch
import pandas as pd
import pickle

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

In [None]:
train_df = pd.read_pickle('../data/Train_by_postoal_code_without_review_pointwise_v3_3.pkl').reset_index(drop=True)
test_df = pd.read_pickle('../data/Test_by_postoal_code_without_review_pointwise_v3_3.pkl').reset_index(drop=True)
all_df = pd.read_pickle('../Data/restaurant_only_filtered_with_embedding.pkl')
lda_model = pickle.load(open('../data/lda_model.pkl', 'rb'))

In [None]:
import spacy
import pandas as pd
nlp = spacy.load("en_core_web_md")
from collections import Counter

In [None]:
# Get the most important words for each topic
num_words = 10  # Number of top words to retrieve for each topic
topics = lda_model.show_topics(num_topics=-1, num_words=num_words, formatted=False)
aspect_list = []
# Print the most important words for each topic
for topic_id, topic in topics:
    # print(f'Topic #{topic_id} , {topic[0][0]}')
    for word, _ in topic:
        if word not in aspect_list:
            aspect_list.append(word)
            break
    # print(topic[0][0])
    # words = [word for word, _ in topic]
    # print(', '.join(words))
    # print()


In [None]:
aspect_list

In [None]:
restaurant_aspects = [
    "Food Quality",
    "Service",
    "Ambiance",
    "Pricing",
    "Cleanliness",
    "Menu Variety",
    "Location",
    "Reputation",
    "Customer Reviews",
    "Specialties",
    "Beverage Selection",
    "Dietary Options",
    "Innovation",
    "Reservation System",
    "Overall Experience"
]


## SPACY and TextBlob
* [SPACY and TextBlob](https://towardsdatascience.com/aspect-based-sentiment-analysis-using-spacy-textblob-4c8de3e0d2b9)

In [None]:
# Counter(train_df.name)
# Joe Boccardi's Ristorante

In [None]:
sentences = ['Mushrooms n calamari was not bad']

In [None]:
tmp = train_df[train_df.name== 'Joe Boccardi\'s Ristorante']

In [None]:
sentences = list(tmp.text)
sentences = list(dict.fromkeys(sentences)) # deduplicate

In [None]:
sentences

In [None]:
aspects = []

for sentence in sentences:
    for sent in sentence.split('.'):
        doc = nlp(sent)
        print(doc)
        descriptive_term = ''
        target = ''
        for token in doc:
            if token.dep_ == 'nsubj' and token.pos_ == 'NOUN':
                target = token.text
            if token.pos_ == 'ADJ':
                prepend = ''
                for child in token.children:
                    if child.pos_ != 'ADV':
                        continue
                    prepend += child.text + ' '
                descriptive_term = prepend + token.text
        aspects.append({'aspect': target,'description': descriptive_term})

print(aspects)


In [None]:
from textblob import TextBlob
for aspect in aspects:
    aspect['sentiment'] = TextBlob(aspect['description']).sentiment.polarity
print(aspects)


## aspect-based-sentiment-analysis 2.0.3
* can only work on google colab
* [python package](https://pypi.org/project/aspect-based-sentiment-analysis/)
* [example from youtube](https://www.youtube.com/watch?v=q8sTicXK4Fg)

## VaderSentiment.vaderSentiment
* [Kaggle notebook](https://www.kaggle.com/code/phiitm/aspect-based-sentiment-analysis)

In [None]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
#note: depending on how you installed (e.g., using source code download versus pip install), you may need to import like this:
#from vaderSentiment import SentimentIntensityAnalyzer

# --- examples -------
sentences = ["VADER is smart, handsome, and funny.",  # positive sentence example
             "VADER is smart, handsome, and funny!",  # punctuation emphasis handled correctly (sentiment intensity adjusted)
             "VADER is very smart, handsome, and funny.", # booster words handled correctly (sentiment intensity adjusted)
             "VADER is VERY SMART, handsome, and FUNNY.",  # emphasis for ALLCAPS handled
             "VADER is VERY SMART, handsome, and FUNNY!!!", # combination of signals - VADER appropriately adjusts intensity
             "VADER is VERY SMART, uber handsome, and FRIGGIN FUNNY!!!", # booster words & punctuation make this close to ceiling for score
             "VADER is not smart, handsome, nor funny.",  # negation sentence example
             "The book was good.",  # positive sentence
             "At least it isn't a horrible book.",  # negated negative sentence with contraction
             "The book was only kind of good.", # qualified positive sentence is handled correctly (intensity adjusted)
             "The plot was good, but the characters are uncompelling and the dialog is not great.", # mixed negation sentence
             "Today SUX!",  # negative slang with capitalization emphasis
             "Today only kinda sux! But I'll get by, lol", # mixed sentiment example with slang and constrastive conjunction "but"
             "Make sure you :) or :D today!",  # emoticons handled
             "Catch utf-8 emoji such as such as 💘 and 💋 and 😁",  # emojis handled
             "Not bad at all"  # Capitalized negation
             ]

analyzer = SentimentIntensityAnalyzer()
for sentence in sentences:
    vs = analyzer.polarity_scores(sentence)
    print("{:-<65} {}".format(sentence, str(vs)))

## HuggingFace

In [None]:
pip install transformers[sentencepiece]

In [None]:
pip install -U transformers

In [None]:
from transformers import DebertaV2Tokenizer


In [15]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification,DebertaV2Tokenizer

tokenizer = AutoTokenizer.from_pretrained("yangheng/deberta-v3-base-absa-v1.1")
model = AutoModelForSequenceClassification.from_pretrained("yangheng/deberta-v3-base-absa-v1.1")
# model = AutoModelForSequenceClassification.from_pretrained("yangheng/deberta-v3-large-absa-v1.1")

inputs = tokenizer("[CLS]The pizza tastes nice . The pizza tastes awful. [SEP]food[SEP]", return_tensors="pt")
outputs = model(**inputs)

In [16]:
outputs

SequenceClassifierOutput(loss=None, logits=tensor([[-0.8710, -1.0564,  2.0131]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [17]:
import torch
print()
for i in torch.sigmoid(outputs.logits)[0]:
    print(i/torch.sigmoid(outputs.logits)[0].sum())


tensor(0.2056, grad_fn=<DivBackward0>)
tensor(0.1798, grad_fn=<DivBackward0>)
tensor(0.6147, grad_fn=<DivBackward0>)


In [None]:
import torch
from transformers import AutoTokenizer, DebertaV2ForSequenceClassification

sentiment_labels = ["Positive", "Neutral","Negative"]
tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-v2-xlarge")
model = DebertaV2ForSequenceClassification.from_pretrained("microsoft/deberta-v2-xlarge")
model.eval()

input_text = "Hello, my dog is ugly"
inputs = tokenizer(input_text, return_tensors="pt")

with torch.no_grad():
    logits = model(**inputs).logits

predicted_class_ids = torch.arange(0, logits.shape[-1])[torch.sigmoid(logits).squeeze(dim=0)>0.5]
print(logits)

# Interpret the sentiment result
sentiment = sentiment_labels[predicted_class_ids]

print("Input Text: ", input_text)
print("Predicted Sentiment: ", sentiment)

In [None]:
tmp = train_df[train_df.name == 'Cafe Patachou']

In [None]:
sentences = list(tmp.text)
sentences = list(dict.fromkeys(sentences)) # deduplicate

In [None]:
Counter(train_df.name)

In [None]:
import torch
from transformers import DebertaV2ForSequenceClassification, DebertaV2Tokenizer

# Load the pretrained model and tokenizer
model_name = "microsoft/deberta-v2-xlarge"  # Pretrained DeBERTa-v2 model
tokenizer = DebertaV2Tokenizer.from_pretrained(model_name)
model = DebertaV2ForSequenceClassification.from_pretrained(model_name)

# Set the device for inference
model.to(device)
model.eval()

# Example input text and aspect
input_text = sentences[0]
aspect = "Food"

def prediction(input_text , aspect):
    # Combine the input text and aspect
    input_text_with_aspect = f"{input_text} [ASP] {aspect}"

    # Tokenize the input text with aspect
    encoded_input = tokenizer.encode_plus(input_text_with_aspect, padding=True, truncation=True, return_tensors="pt")
    input_ids = encoded_input["input_ids"].to(device)
    attention_mask = encoded_input["attention_mask"].to(device)

    # Perform aspect sentiment analysis inference
    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask)
        logits = outputs.logits

    # Get the predicted sentiment label
    predicted_sentiment = torch.argmax(logits, dim=1).item()

    # Interpret the sentiment result
    sentiment_labels = ["Positive", "Neutral", "Negative"]
    sentiment = sentiment_labels[predicted_sentiment]

    print("Input Text: ", input_text)
    print("Aspect: ", aspect)
    print("Predicted Sentiment: ", sentiment)

In [None]:
for i in sentences:
    prediction(i , 'food')