# Aspect-Based Sentiment Analysis using generative Language Modeling 

In [1]:
from transformers import AutoModel, AutoTokenizer, GPT2LMHeadModel
import os
import glob
from tqdm import tqdm
import random
import numpy as np
import torch

from datasets import load_dataset

import transformers
import logging
import re


In [2]:

def set_global_logging_level(level=logging.ERROR, prefices=[""]):
    """
    Override logging levels of different modules based on their name as a prefix.
    It needs to be invoked after the modules have been loaded so that their loggers have been initialized.

    Args:
        - level: desired level. e.g. logging.INFO. Optional. Default is logging.ERROR
        - prefices: list of one or more str prefices to match (e.g. ["transformers", "torch"]). Optional.
          Default is `[""]` to match all active loggers.
          The match is a case-sensitive `module_name.startswith(prefix)`
    """
    prefix_re = re.compile(fr'^(?:{ "|".join(prefices) })')
    for name in logging.root.manager.loggerDict:
        if re.match(prefix_re, name):
            logging.getLogger(name).setLevel(level)

set_global_logging_level(logging.ERROR, ["transformers", "nlp", "tensorflow", "tensorboard", "wandb"])

# load model

In [12]:
GPU = 7
model_dir = '/export/share/ehosseiniasl/absa/checkpoints/restaurant_laptop/checkpoint-5000/'
model= GPT2LMHeadModel.from_pretrained(model_dir)
tokenizer = AutoTokenizer.from_pretrained(model_dir)

device = torch.device("cuda", index=GPU)
model.to(device)
model.device

device(type='cuda', index=7)

# prepare input text

In [13]:
# define sequence task tokens

target_token = '<|term|>'
target_end_token = '<|endoftext|>'
s_token = '<|review|>'
s_end_token = '<|endofreview|>'

#### more test data can be loaded from here <br> /export/share/ehosseiniasl/absa/semeval1416_restaurants_laptops_aspect_term_aspect_category_test.txt


In [10]:

text = "it was pleasantly uncrowded, the service was delightful, the garden adorable, the food (from appetizers to entrees) was delectable."
sequence = f"{target_end_token} {s_token} {text} {s_end_token}"
print(f"input sequence:\n{sequence}")

input sequence:
<|endoftext|> <|review|> it was pleasantly uncrowded, the service was delightful, the garden adorable, the food (from appetizers to entrees) was delectable. <|endofreview|>


# run model generation

In [9]:

max_length = 128
out = tokenizer.decode(model.generate(tokenizer.encode(sequence, return_tensors='pt').to(device), 
                                      max_length=max_length, 
                                      do_sample=True)[0])

review = out.split("<|review|>")[-1].split("<|term|>")[0]
review_text = f"<|review|> {review}"
term = out.split("<|term|>")[-1].split("<|category|>")[0]
term_text = f"<|term|> {term}"
category = out.split("<|category|>")[-1].split("<|endoftext|>")[0]
category_text = f"<|category|> {category}"

print(f"\nmodel raw output:\n{out}")
print(f"\n {term_text}")
print(f"\n {category_text}")



model raw output:
<|endoftext|> <|review|> it was pleasantly uncrowded, the service was delightful, the garden adorable, the food (from appetizers to entrees) was delectable. <|endofreview|> <|term|> service positive, garden positive, food positive, appetizers positive, entrees positive <|endofterm|> <|category|> ambience general positive, food quality positive, service general positive <|endofcategory|> <|endoftext|>

 <|term|>  service positive, garden positive, food positive, appetizers positive, entrees positive <|endofterm|> 

 <|category|>  ambience general positive, food quality positive, service general positive <|endofcategory|> 
