In [1]:
!pip install -r ../requirements.txt

Collecting argparse
  Using cached argparse-1.4.0-py2.py3-none-any.whl (23 kB)
Installing collected packages: argparse
Successfully installed argparse-1.4.0



[notice] A new release of pip available: 22.2.2 -> 23.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip


### Read Data

In [2]:
import nltk
import re
import csv

import codecs
from bs4 import BeautifulSoup
# download the necessary nltk packages (only needs to be done once)
nltk.download('punkt')
nltk.download('stopwords')

def read_reviews_from_csv(file_path):
    ratings = []
    reviews = []
    with open(file_path, 'r', newline='', encoding='utf-8') as csvfile:
        reader = csv.reader(csvfile)
        for row in reader:
            # Assuming rating is the first item in each row, and review is the second item
            rating, review = row[0], row[1]
            ratings.append(int(rating)) # Convert rating to integer if needed
            reviews.append(review)
    return ratings, reviews


# function to clean a review text
def clean_review(review):
    # convert to lowercase
    review = review.lower()
    # remove non-alphanumeric characters
    review = re.sub(r'[^a-zA-Z0-9\s]', '', review)
    # remove extra whitespaces
    review = re.sub(r'\s+', ' ', review).strip()
    # tokenize the cleaned review into words using nltk's word_tokenize() function
    tokens = nltk.word_tokenize(review)
    tokens = [str(token) for token in tokens]
    # join the tokens back into a sentence using ' ' as a separator
    review = ' '.join(tokens)
    # return cleaned review
    return review


def get_ratings_sentences(path):
    ratings, reviews = read_reviews_from_csv(path)
    # clean the reviews and split them into sentences
    sentences = []
    for idx, review in enumerate(reviews):
        # clean the review text
        cleaned_review = clean_review(review)
        # split the cleaned review into sentences using nltk's sent_tokenize() function
        review_sentences = nltk.sent_tokenize(cleaned_review)
        # append the sentences to the list
        sentences.extend(review_sentences)

    return sentences


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\sahil\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\sahil\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


### Call Model 
Instruct ATE + SA + Clustering

In [3]:
import sys

# Add the parent directory of ABSA to the module search path
sys.path.append('..')

from models.SA import evaluate
from models.Clustering import scores
from scripts import script
sentences = get_ratings_sentences("../data/reviews.csv")
category_1_score = 0
category_1_total = 0
category_2_score = 0
category_2_total = 0
category_3_score = 0
category_3_total = 0
category_4_score = 0
category_4_total = 0

for sent in sentences:
	print("Sentence:" , sent)
	terms = script.return_iabsa(script.Task.ATE, "../models/InstructABSA/InstructABSA/Models/ate/allenaitk-instruct-base-def-pos-ate_check", 2, sent)
	print("Aspects:" , terms)
	labels, final_terms = evaluate.evaluate(sent, terms)
	print("Final Aspects:" , final_terms)
	print("Polarities: " , labels)
	categories_scores, categories_total =  scores.get_clusters(final_terms, labels)
	print("Clustering scores: ", categories_scores)
	category_1_score += categories_scores[0]
	category_1_total += categories_total[0]
	category_2_score += categories_scores[1]
	category_2_total += categories_total[1]
	category_3_score += categories_scores[2]
	category_3_total += categories_total[2]
	category_4_score += categories_scores[3]
	category_4_total += categories_total[3]


  from .autonotebook import tqdm as notebook_tqdm
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Sentence: like the restaurant the burgers are delicious but for the price you pay they are cheap with the fries like the decorations and music the store is a plus our waitress very friendly
Aspects: ['burgers', 'price', 'fries', 'decorations', 'music', 'waitress']
Final Aspects: ['burgers', 'price', 'fries', 'decorations', 'music', 'waitress']
Polarities:  [0, 0, 2, 0, 1, 0]
Clustering scores:  [1, 0, 1, 1]
Sentence: coming from the uk and knowing how popular this chain is i prebooked our table well in advance of our visit to nyc last time we came to the city we were unable to get a walkin that suited so didnt want to face the same disfortune this time round we attended on a wednesday night in early december and after a small que awaiting a photo being taken we got seated admittedly our table didnt feel in the best location within the restaurant but despite this we had no issues with it our server was friendly and welcoming orders were taken swiftly and drinks and food followed accordi

Token indices sequence length is longer than the specified maximum sequence length for this model (620 > 512). Running this sequence through the model will result in indexing errors


Aspects: ['burger with fries', 'burger', 'meal']
Final Aspects: ['burger', 'burger', 'meal']
Polarities:  [2, 2, 2]
Clustering scores:  [0, -2, -1, 0]
Sentence: was in nyc 10 days ago and during my short 5 days stay went twice the day i arrived and the day i was leaving totally worth it not long wait good service great food good pricequality relation totally nice decor as all hr cafes this one has an strategic location right at times square in the middle of the best in my opinion manhattan touristic spot i need to make a special mention to the server we got on our lunch on 24th at about 1145am dont know her name but we were 2 sitting from the door to the right not far from the tall tables like bar ones we got a messi burger the spicy shrimps and a salmon and noodle bowl and the server we got was just amazing i am so sorry did not catch her name i really hope you could find out because she was extraordinary kind nice super smiley super knowledgeable of everything attentive fast empathic

In [7]:
rest_1_scores = {'service': category_1_score, 'food': category_2_score, 'price': category_3_score, 'ambience': category_4_score}
print("Restaurant 1 Sentiment Score")
print("-----------------")
print("service: ", category_1_score)
print("food: ", category_2_score)
print("price: ", category_3_score)
print("ambience: ", category_4_score)

Restaurant 1 Sentiment Score
-----------------
service:  3
food:  -5
price:  -2
ambience:  1


In [5]:
sentences = get_ratings_sentences("../data/reviews2.csv")
category_1_score_2 = 0
category_2_score_2 = 0
category_3_score_2 = 0
category_4_score_2 = 0

for sent in sentences:
	print("Sentence:" , sent)
	terms = script.return_iabsa(script.Task.ATE, "../models/InstructABSA/InstructABSA/Models/ate/allenaitk-instruct-base-def-pos-ate_check", 2, sent)
	print("Aspects:" , terms)
	labels, final_terms = evaluate.evaluate(sent, terms)
	print("Final Aspects:" , final_terms)
	print("Polarities: " , labels)
	categories_scores, categories_total =  scores.get_clusters(final_terms, labels)
	print("Clustering scores: ", categories_scores)
	category_1_score_2 += categories_scores[0]
	category_2_score_2 += categories_scores[1]
	category_3_score_2 += categories_scores[2]
	category_4_score_2 += categories_scores[3]


Sentence: this was our first visit to the rake lovely premises music playing and we were welcomed in we were shown to our table and offered drinks the menu arrived a strip of paper and pen and we made our selections it would have been helpful to have a detailed menu so we could understand the dishes ingredients etc the salt and pepper on our table was from the coop the food was good however our dishes were brought out one at a time which tended to result in us not eating at the same time 110 bill for two people seemed pricey in our view
Aspects: ['premises', 'food', 'dishes', 'salt and pepper on our table', 'drinks', 'menu', 'dishes', 'bill']
Final Aspects: ['premises', 'food', 'dishes', 'salt', 'drinks', 'menu', 'dishes', 'bill']
Polarities:  [0, 1, 2, 2, 1, 2, 2, 2]
Clustering scores:  [0, -4, 0, 0]
Sentence: another fabulous meal at the rake the staff were wonderful especially our italian waitress really enjoyed the food will be back very soon
Aspects: ['staff', 'italian waitress', 

In [8]:
rest_2_scores = {'service': category_1_score_2, 'food': category_2_score_2, 'price': category_3_score_2, 'ambience': category_4_score_2}

print("Restaurant 2 Sentiment Scores")
print("-----------------")
print("service: ", category_1_score_2)
print("food: ", category_2_score_2)
print("price: ", category_3_score_2)
print("ambience: ", category_4_score_2)

Restaurant 2 Sentiment Scores
-----------------
service:  4
food:  2
price:  0
ambience:  0


In [11]:
print("Restaurant 1 or 2?")
print("-----------------")
for attribute in rest_1_scores:
    if rest_1_scores[attribute] > rest_2_scores[attribute]:
        print(f"{attribute}: Restaurant 1 has better {attribute} than Restaurant 2.")
    elif rest_1_scores[attribute] < rest_2_scores[attribute]:
        print(f"{attribute}: Restaurant 2 has better {attribute} than Restaurant 1.")
    else:
        print(f"{attribute}: Restaurant 1 and Restaurant 2 have the same level of {attribute}.")

Restaurant 1 or 2?
-----------------
service: Restaurant 2 has better service than Restaurant 1.
food: Restaurant 2 has better food than Restaurant 1.
price: Restaurant 2 has better price than Restaurant 1.
ambience: Restaurant 1 has better ambience than Restaurant 2.
