In [1]:
!pip install -r ../requirements.txt

Collecting argparse (from -r ../requirements.txt (line 7))
  Using cached argparse-1.4.0-py2.py3-none-any.whl (23 kB)
Installing collected packages: argparse
Successfully installed argparse-1.4.0


### Read Data

In [1]:
import nltk
import re
import csv

import codecs
from bs4 import BeautifulSoup
# download the necessary nltk packages (only needs to be done once)
nltk.download('punkt')
nltk.download('stopwords')

def read_reviews_from_csv(file_path):
    ratings = []
    reviews = []
    with open(file_path, 'r', newline='') as csvfile:
        reader = csv.reader(csvfile)
        for row in reader:
            # Assuming rating is the first item in each row, and review is the second item
            rating, review = row[0], row[1]
            ratings.append(int(rating)) # Convert rating to integer if needed
            reviews.append(review)
    return ratings, reviews


# function to clean a review text
def clean_review(review):
    # convert to lowercase
    review = review.lower()
    # remove non-alphanumeric characters
    review = re.sub(r'[^a-zA-Z0-9\s]', '', review)
    # remove extra whitespaces
    review = re.sub(r'\s+', ' ', review).strip()
    # tokenize the cleaned review into words using nltk's word_tokenize() function
    tokens = nltk.word_tokenize(review)
    tokens = [str(token) for token in tokens]
    # join the tokens back into a sentence using ' ' as a separator
    review = ' '.join(tokens)
    # return cleaned review
    return review


def get_ratings_sentences():
    ratings, reviews = read_reviews_from_csv("../data/reviews.csv")
    # clean the reviews and split them into sentences
    sentences = []
    for idx, review in enumerate(reviews):
        # clean the review text
        cleaned_review = clean_review(review)
        # split the cleaned review into sentences using nltk's sent_tokenize() function
        review_sentences = nltk.sent_tokenize(cleaned_review)
        # append the sentences to the list
        sentences.extend(review_sentences)

    return sentences


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/shahedabuqamar/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/shahedabuqamar/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


### Call Model 
Instruct ATE + SA + Clustering

In [3]:
import sys

# Add the parent directory of ABSA to the module search path
sys.path.append('..')

from models.SA import evaluate
from models.Clustering import scores
from scripts import script
sentences = get_ratings_sentences()
category_1_score = 0
category_1_total = 0
category_2_score = 0
category_2_total = 0
category_3_score = 0
category_3_total = 0
category_4_score = 0
category_4_total = 0

for sent in sentences:
	print("Sentence:" , sent)
	terms = script.return_iabsa(script.Task.ATE, "../models/InstructABSA/InstructABSA/Models/ate/allenaitk-instruct-base-def-pos-ate_check", 2, sent)
	print("Aspects:" , terms)
	labels = evaluate.evaluate(sent, terms)
	print("Polarities: " , labels)
	categories_scores, categories_total =  scores.get_clusters(terms, labels)
	print("Clustering scores: ", categories_scores)
	category_1_score += categories_scores[0]
	category_1_total += categories_total[0]
	category_2_score += categories_scores[1]
	category_2_total += categories_total[1]
	category_3_score += categories_scores[2]
	category_3_total += categories_total[2]
	category_4_score += categories_scores[3]
	category_4_total += categories_total[3]

Sentence: very crowded but we did get seated quickly barely any service my meal was ok it was fine but nothing special very busy area
Aspects: ['service', 'meal']
{'meat': 0, 'feet': 1, 'recognize': 2, 'Tart': 3, 'marginally': 4, 'accept': 5, 'roe': 6, 'gives': 7, 'LOVED': 8, 'shoestring': 9, 'OK': 10, 'Nov.': 11, 'critic': 12, 'Napoli': 13, 'certain': 14, 'carte': 15, 'Nanbu': 16, 'Maybe': 17, 'village': 18, 'asks': 19, 'noisy': 20, 'chill': 21, 'Luckily': 22, 'chopsticks': 23, 'Midtown': 24, 'Leaf': 25, 'decrease': 26, 'major': 27, "'": 28, 'assured': 29, 'had': 30, 'deegan': 31, 'bend': 32, 'spectacular': 33, 'pretentious': 34, 'feeling': 35, 'spice': 36, 'truly': 37, 'entered': 38, 'watery': 39, 'pricy': 40, 'sample': 41, 'remaining': 42, 'January': 43, 'Any': 44, 'insanely': 45, 'filets': 46, '40pp': 47, 'whatsoever': 48, 'creative': 49, 'Tostada': 50, 'service': 51, 'chefs': 52, 'mint': 53, 'accompanied': 54, 'trays': 55, 'rushed': 56, 'French': 57, 'tag': 58, '8:00': 59, 'rest':

KeyboardInterrupt: 