In [1]:
!pip install -r ../requirements.txt



### Read Data

In [2]:
import nltk
import re
import csv

# download the necessary nltk packages (only needs to be done once)
nltk.download('punkt')
nltk.download('stopwords')


def read_reviews_from_csv(file_path):
    ratings = []
    reviews = []
    with open(file_path, 'r', newline='') as csvfile:
        reader = csv.reader(csvfile)
        for row in reader:
            # Assuming rating is the first item in each row, and review is the second item
            rating, review = row[0], row[1]
            ratings.append(int(rating)) # Convert rating to integer if needed
            reviews.append(review)
    return ratings, reviews


# function to clean a review text
def clean_review(review):
    # convert to lowercase
    review = review.lower()
    # remove non-alphanumeric characters
    review = re.sub(r'[^a-zA-Z0-9\s]', '', review)
    # remove extra whitespaces
    review = re.sub(r'\s+', ' ', review).strip()
    # tokenize the cleaned review into words using nltk's word_tokenize() function
    tokens = nltk.word_tokenize(review)
    tokens = [str(token) for token in tokens]
    # join the tokens back into a sentence using ' ' as a separator
    review = ' '.join(tokens)
    # return cleaned review
    return review


def get_ratings_sentences():
    ratings, reviews = read_reviews_from_csv("../data/reviews.csv")
    # clean the reviews and split them into sentences
    sentences = []
    for idx, review in enumerate(reviews):
        # clean the review text
        cleaned_review = clean_review(review)
        # split the cleaned review into sentences using nltk's sent_tokenize() function
        review_sentences = nltk.sent_tokenize(cleaned_review)
        # append the sentences to the list
        sentences.extend(review_sentences)

    return sentences


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/shahedabuqamar/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/shahedabuqamar/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


### Call Model 
Instruct ATE + SA + Clustering

In [3]:
import sys

# Add the parent directory of ABSA to the module search path
sys.path.append('..')

from models.SA import evaluate
from models.Clustering import scores
from scripts import script
sentences = get_ratings_sentences()

for sent in sentences:
	terms = script.return_iabsa(script.Task.ATE, "../models/InstructABSA/InstructABSA/Models/ate/allenaitk-instruct-base-def-pos-ate_check", 2, sent)
	labels = evaluate.evaluate(sent, terms)
	scores.get_clusters(terms, labels)

  from .autonotebook import tqdm as notebook_tqdm
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
2023-04-26 16:39:18.090333: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary i

['service', 'meal']
2
transitions matrix  tensor([[ 1.0539,  0.5643],
        [-0.3995,  0.5731]])
pred: 1
pred: 1
[1, 1]
2
1
1
['food', 'staff', 'atmosphere', 'gift shop', 'merchandise']
5
transitions matrix  tensor([[ 1.0539,  0.5643],
        [-0.3995,  0.5731]])
pred: 1
pred: 1
pred: 1
pred: 1
pred: 1
[1, 1, 1, 1, 1]
5
1
1
1
1
1
['seats', 'bar', 'food', 'prices', 'drinks', 'food', 'bartenders', 'dinner', 'portion']
9
transitions matrix  tensor([[ 1.0539,  0.5643],
        [-0.3995,  0.5731]])
pred: 1
pred: 1
pred: 1
pred: 1
pred: 1
pred: 1
pred: 1
pred: 1
pred: 1
[1, 1, 1, 1, 1, 1, 1, 1, 1]
9
1
1
1
1
1
1
1
1
1
['fajitas', 'tortillas cheeseburger', 'tropical margarita', 'ice cream surprise', 'table', 'staff']
6
transitions matrix  tensor([[ 1.0539,  0.5643],
        [-0.3995,  0.5731]])
pred: 1
pred: 1
pred: 1
pred: 1
pred: 1
pred: 1
[1, 1, 1, 1, 1, 1]
6
1
1
1
1
1
1
['atmosphere', 'place', 'server', 'shane', 'burgers', 'desert portions']
6
transitions matrix  tensor([[ 1.0539,  0.56

Token indices sequence length is longer than the specified maximum sequence length for this model (620 > 512). Running this sequence through the model will result in indexing errors


['burger with fries', 'burger', 'meal']
3
transitions matrix  tensor([[ 1.0539,  0.5643],
        [-0.3995,  0.5731]])
pred: 1
pred: 1
pred: 1
[1, 1, 1]
3
1
1
1
['service', 'food', 'pricequality relation', 'server', 'lunch']
5
transitions matrix  tensor([[ 1.0539,  0.5643],
        [-0.3995,  0.5731]])
pred: 1
pred: 1
pred: 1
pred: 1
pred: 1
[1, 1, 1, 1, 1]
5
1
1
1
1
1
['service', 'food', 'prices', 'menu', 'receipt', 'bud light', 'pint', 'glass']
8
transitions matrix  tensor([[ 1.0539,  0.5643],
        [-0.3995,  0.5731]])
pred: 1
pred: 1
pred: 1
pred: 1
pred: 1
pred: 1
pred: 1
pred: 1
[1, 1, 1, 1, 1, 1, 1, 1]
8
1
1
1
1
1
1
1
1
['nachos', 'waitress', 'wine', 'steak', 'shrimp', 'chicken', 'waitress']
7
transitions matrix  tensor([[ 1.0539,  0.5643],
        [-0.3995,  0.5731]])
pred: 1
pred: 1
pred: 1
pred: 1
pred: 1
pred: 1
pred: 1
[1, 1, 1, 1, 1, 1, 1]
7
1
1
1
1
1
1
1
['staff', 'meals', 'music souvenirs']
3
transitions matrix  tensor([[ 1.0539,  0.5643],
        [-0.3995,  0.5731]])


Token indices sequence length is longer than the specified maximum sequence length for this model (526 > 512). Running this sequence through the model will result in indexing errors


['food', 'service', 'bill', 'staff', 'bar']
5
transitions matrix  tensor([[ 1.0539,  0.5643],
        [-0.3995,  0.5731]])
pred: 1
pred: 1
pred: 1
pred: 1
pred: 1
[1, 1, 1, 1, 1]
5
1
1
1
1
1
['food', 'service', 'meals', 'bill', 'table']
5
transitions matrix  tensor([[ 1.0539,  0.5643],
        [-0.3995,  0.5731]])
pred: 1
pred: 1
pred: 1
pred: 1
pred: 1
[1, 1, 1, 1, 1]
5
1
1
1
1
1
['meal', 'food', 'staff', 'live music', 'song choice']
5
transitions matrix  tensor([[ 1.0539,  0.5643],
        [-0.3995,  0.5731]])
pred: 1
pred: 1
pred: 1
pred: 1
pred: 1
[1, 1, 1, 1, 1]
5
1
1
1
1
1
['food']
1
transitions matrix  tensor([[ 1.0539,  0.5643],
        [-0.3995,  0.5731]])
pred: 1
[1]
1
1
['food', 'drinks']
2
transitions matrix  tensor([[ 1.0539,  0.5643],
        [-0.3995,  0.5731]])
pred: 1
pred: 1
[1, 1]
2
1
1
['entrees', 'food', 'manager', 'waitstaff']
4
transitions matrix  tensor([[ 1.0539,  0.5643],
        [-0.3995,  0.5731]])
pred: 1
pred: 1
pred: 1
pred: 1
[1, 1, 1, 1]
4
1
1
1
1
['ser

KeyboardInterrupt: 