In [1]:
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "2"

In [None]:
ckpt = os.listdir("./output")
last_ckpt = max(ckpt, key=lambda x: int(x.split('-')[-1]))

In [None]:
import json

last_state_path = "./output/" + last_ckpt + "/trainer_state.json"
with open(last_state_path, 'r') as fp:
    last_state = json.load(fp)

best_state_path = last_state["best_model_checkpoint"]
model_name_or_path = best_state_path

In [2]:
max_length = 256

In [3]:
from transformers import AutoTokenizer

encoding_args = {
    "max_length" : max_length,
    "padding" : True,
    "truncation" : True,
    "return_tensors" : "pt"
}

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
import torch
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
print(device)

cuda:0


In [5]:
from transformers import AutoModelForSeq2SeqLM

model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path)
model = model.to(device)

MT5ForConditionalGeneration(
  (shared): Embedding(250112, 768)
  (encoder): T5Stack(
    (embed_tokens): Embedding(250112, 768)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=768, out_features=768, bias=False)
              (k): Linear(in_features=768, out_features=768, bias=False)
              (v): Linear(in_features=768, out_features=768, bias=False)
              (o): Linear(in_features=768, out_features=768, bias=False)
              (relative_attention_bias): Embedding(32, 12)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseGatedActDense(
              (wi_0): Linear(in_features=768, out_features=2048, bias=False)
              (wi_1): Linear(in_features=768, out_features=2048, bias=False)
              (w

In [6]:
import data_utils
from evaluation import compute_metrics, summary_score

catch_answer_fn = getattr(data_utils.AnswerCatcher(),"gas")
decoding_args = {
    "skip_special_tokens" : False
}

In [7]:
from tqdm import tqdm
from typing import List, Dict, Tuple

def generate_predictions(model,tokenizer,data,device=torch.device("cuda:0"),decoding_args:Dict={}) -> List[str]:
    # Predict
    model = model
    tokenizer = tokenizer
    tensor_predictions = []
    with torch.no_grad():
        for text in tqdm(data):
            input_ids = tokenizer(text, return_tensors="pt").input_ids.to(device)
            tensor_predictions.extend(model.generate(input_ids=input_ids, pad_token_id=tokenizer.pad_token_id,eos_token_id=tokenizer.eos_token_id,max_length=max_length).cpu())
            input_ids = input_ids.cpu()
            # attention_mask = attention_mask.cpu()
    tensor_predictions = [[token for token in row if token != -100] for row in tensor_predictions]
    predictions = tokenizer.batch_decode(tensor_predictions,**decoding_args)
    predictions = [el for el in predictions]
    return predictions

In [8]:
from datasets import Dataset

test_path = "../../data/absa/en/zhang/interim/interim_2/rest1516/test.txt"

test_tasks = [
    {
        "paradigm" : "extraction",
        "se_order" : "oasc",
        "prompt" : "gas",
        "answer" : "gas"
    }
]

test = data_utils.read_data(test_path)
test_ds = data_utils.data_gen(data=test, nt_se_order="acso", tasks=test_tasks, n_fold=1, algo="round_robin", shuffle=False)
test_ds = Dataset.from_list(test_ds)

100%|██████████| 1000/1000 [00:00<00:00, 16847.44it/s]


In [9]:
inputs = test_ds["input"]
str_preds = generate_predictions(model, tokenizer, inputs, device, decoding_args)

100%|██████████| 995/995 [09:46<00:00,  1.70it/s]


In [10]:
str_preds = [el.replace("</s>",'').replace("<pad>",'') for el in str_preds]

In [11]:
oasc_ext_preds = [catch_answer_fn(out, "oasc", text) for out, text in zip(str_preds, test_ds["input"])]

In [12]:
targets = [catch_answer_fn(out, "oasc", text) for out, text in zip(test_ds["output"], test_ds["input"])]

In [13]:
summary = summary_score(oasc_ext_preds,targets)
summary

{'recall': 0.689374305812662,
 'precision': 0.8334065934065934,
 'f1_score': 0.7545787999888001}

In [None]:
with open("summary_score.json", 'w') as fp:
    json.dump(summary, fp)