In [1]:
import os
import torch
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer

os.environ["CUDA_VISIBLE_DEVICES"] = "2"
device = torch.device("cuda:0")

prompt = "lego_absa"
answer = "lego_absa"
se_order = "aos"
model_path = "./output/IndoLEGO-ABSA-v3"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

In [27]:
pipe = pipeline(task="text2text-generation",model=model, tokenizer=tokenizer, device=device)

In [4]:
import preprocess
data_path = "../data/absa/id/william/test.txt"
data_reader = preprocess.DataReader()
data_augmentator = preprocess.DataAugmentator()

In [24]:
model.config.max_length = 128

In [5]:
data_augmentator.task_example()

[{'se_order': 'aos', 'prompt': 'lego_absa', 'answer': 'lego_absa'},
 {'se_order': 'ao', 'prompt': 'lego_absa', 'answer': 'lego_absa'},
 {'se_order': 'as', 'prompt': 'lego_absa', 'answer': 'lego_absa'},
 {'se_order': 'a', 'prompt': 'lego_absa', 'answer': 'lego_absa'},
 {'se_order': 'o', 'prompt': 'lego_absa', 'answer': 'lego_absa'}]

In [6]:
data = data_reader.do(data_path)
augmented_data = data_augmentator.do(data, "aos", [{"se_order" : se_order, "prompt" : prompt, "answer" : answer}], 1, shuffle=False)

100%|██████████| 1000/1000 [00:00<00:00, 16303.37it/s]


In [7]:
import postprocess

answer_catcher = postprocess.AnswerCatcher()

In [8]:
catch_answer_fn = getattr(answer_catcher, answer)

In [9]:
inputs = [el["input"] for el in augmented_data]
texts = [el for el in inputs]
targets = [catch_answer_fn(el["output"], el["se_order"], t) for el, t in zip(augmented_data, texts)]

In [36]:
tokenizer.batch_decode([el["generated_token_ids"] for el in pipe(inputs[:1], return_tensors=True)])

['<pad><extra_id_0> pelayanan <extra_id_1> ramah <extra_id_2> positif ; <extra_id_3> kamar <extra_id_4> nyaman <extra_id_5> positif ; <extra_id_6> fasilitas <extra_id_7> lengkap <extra_id_8> positif ; <extra_id_9> airnya showernya <extra_id_10> kurang panas <extra_id_11> negatif</s>']

In [17]:
preds = pipe(inputs)

In [18]:
preds

[{'generated_text': 'pelayanan  ramah  positif ;  kamar  nyaman  positif ;'},
 {'generated_text': 'NULL  tidak terlalu jauh  positif'},
 {'generated_text': 'harga  terjangkau  positif ;  fasilitas  nyaman  positif'},
 {'generated_text': 'kondisinya  cukup baik  positif ;  lift  tanpa ada '},
 {'generated_text': 'kamar  bersih  positif ;  bentuknya  unik  positif ;'},
 {'generated_text': 'room  bersih  positif ;  linen  kotor '},
 {'generated_text': 'air hangat  tidak berfungsi  negatif'},
 {'generated_text': 'hotelnya  kurang terawat  negatif'},
 {'generated_text': 'NULL  baik  positif'},
 {'generated_text': 'bantal  keras  negatif ;  bantal  tebal '},
 {'generated_text': 'wifi  okelah  positif ;  NULL  cukup'},
 {'generated_text': 'air hangatnya  kurang berfungsi dengan baik  negatif ;  NULL '},
 {'generated_text': 'acnya  kurang dingin  negatif ;  tv channel  rata'},
 {'generated_text': 'sarapan  dapat  positif ;  wifinya  kurang kencang '},
 {'generated_text': 'pelayanan  baik  posi

In [11]:
preds = [catch_answer_fn(p["generated_text"], el["se_order"], t) for p, el, t in zip(preds, augmented_data, texts)]

In [12]:
from evaluation import summary_score

score = summary_score(preds, targets)

In [13]:
score

{'recall': 0.0, 'precision': 0, 'f1_score': 0}