In [None]:
import os
import torch
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer

os.environ["CUDA_VISIBLE_DEVICES"] = "2"
device = torch.device("cuda:0")

prompt = "lego_absa"
answer = "lego_absa"
se_order = "aos"
model_path = "./output/GAS"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

In [3]:
pipe = pipeline(task="text2text-generation",model=model, tokenizer=tokenizer, device=device)

In [None]:
import preprocess
data_path = "../data/test.txt"
data_reader = preprocess.DataReader()
data_augmentator = preprocess.DataAugmentator()

In [5]:
model.config.max_length = 128

In [6]:
data_augmentator.task_example()

[{'se_order': 'aos', 'prompt': 'lego_absa', 'answer': 'lego_absa'},
 {'se_order': 'ao', 'prompt': 'lego_absa', 'answer': 'lego_absa'},
 {'se_order': 'as', 'prompt': 'lego_absa', 'answer': 'lego_absa'},
 {'se_order': 'a', 'prompt': 'lego_absa', 'answer': 'lego_absa'},
 {'se_order': 'o', 'prompt': 'lego_absa', 'answer': 'lego_absa'}]

In [7]:
data = data_reader.do(data_path)
augmented_data = data_augmentator.do(data, "aos", [{"se_order" : se_order, "prompt" : prompt, "answer" : answer}], 1, shuffle=False)

100%|██████████| 1000/1000 [00:00<00:00, 11788.74it/s]


In [8]:
import postprocess

answer_catcher = postprocess.AnswerCatcher()
cleaner = postprocess.Cleaner()

In [9]:
catch_answer_fn = getattr(answer_catcher, answer)

In [10]:
inputs = [el["input"] for el in augmented_data]
texts = [el for el in inputs]
targets = [catch_answer_fn(el["output"], el["se_order"], t) for el, t in zip(augmented_data, texts)]

In [11]:
preds = pipe(inputs, return_tensors=True)

In [12]:
preds = tokenizer.batch_decode([el["generated_token_ids"] for el in preds])

In [13]:
preds = cleaner.many(preds, remove=[tokenizer.pad_token, tokenizer.eos_token])

In [14]:
preds = [catch_answer_fn(p, el["se_order"], t) for p, el, t in zip(preds, augmented_data, texts)]

In [15]:
from evaluation import summary_score

score = summary_score(preds, targets)

In [16]:
score

{'recall': 0.8531007751937985,
 'precision': 0.9033319621554916,
 'f1_score': 0.8774981025862469}

In [17]:
preds

[[{'opini': 'ramah'},
  {'opini': 'nyaman'},
  {'opini': 'lengkap'},
  {'opini': 'kurang panas'}],
 [{'opini': 'tidak terlalu jauh'}],
 [{'opini': 'terjangkau'}, {'opini': 'nyaman'}],
 [{'opini': 'cukup baik'}, {'opini': 'tanpa ada'}],
 [{'opini': 'bersih'}, {'opini': 'unik'}, {'opini': 'tidak jernih'}],
 [{'opini': 'bersih'},
  {'opini': 'kotor'},
  {'opini': 'bau'},
  {'opini': 'susah'}],
 [{'opini': 'tidak berfungsi'}, {'opini': 'tidak ada'}],
 [{'opini': 'kurang terawat'}],
 [{'opini': 'baik'}],
 [{'opini': 'keras'},
  {'opini': 'tebal'},
  {'opini': 'tidak bisa ditutup rapat'},
  {'opini': 'tidak ada'},
  {'opini': 'rusak'},
  {'opini': 'tidak ada'},
  {'opini': 'berantakan'}],
 [{'opini': 'okelah'}, {'opini': 'cukup bersih'}],
 [{'opini': 'sangat bagus'},
  {'opini': 'kurang berfungsi dengan baik'},
  {'opini': 'cukup baik'}],
 [{'opini': 'kurang dingin'}, {'opini': 'rata ngilang'}],
 [{'opini': 'kurang kencang'}, {'opini': 'enak banget'}],
 [{'opini': 'baik'}, {'opini': 'berkesa