In [1]:
import os
import torch

os.environ["CUDA_VISIBLE_DEVICES"] = "2"
n_gpu = torch.cuda.device_count()

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import pandas as pd

In [3]:
import sys
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
sys.path.append("../../../src/")
import data_utils

In [4]:
import random
import numpy as np
import torch

def set_seed(seed: int):
    """Set the seed for reproducibility across multiple libraries."""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(42)

# Dataset Utilities

In [5]:
william_dir = dict(
    hotel = "/raid/m13519061/ta/facebook-absa/data/absa/en/zhang/interim/interim_2/rest1516"
)

william = dict(
    hotel = dict(
        train = data_utils.read_data(path=william_dir["hotel"] + "/train.txt",
                                     target_format="acos"),
        val = data_utils.read_data(path=william_dir["hotel"] + "/dev.txt",
                                     target_format="acos"),
        test = data_utils.read_data(path=william_dir["hotel"] + "/test.txt",
                                     target_format="acos")
    )
)

# Data Preprocessing 1

1. AOS (ASTE)
    * AO
    * AS
    * A
    * O

2. ACS (TASD)
    * AS
    * CS
    * A
    * C

3. ACOS
    * AO
    * AS
    * CS
    * A
    * O
    * C

In [6]:
# task_tree = {
#     "aos" : ["aos","ao","as",'a','o'],
#     "asc" : ["asc","as","sc",'a','c'],
#     "oasc" : ["oasc","ao","as","sc",'a','o','c']
# }

# all_task = []
# for k,v1 in task_tree.items():
#     if k not in all_task:
#         all_task.append(k)
#     for v2 in v1:
#         if v2 not in all_task:
#             all_task.append(v2)

# print(all_task)

tasks = {
    "single" : ['a', 'o', 'c'],
    "simple" : ["ao", "as", "cs"],
    "complex" : ["acos"]
}

In [7]:
combination_tasks = [
    tasks["simple"],
    tasks["complex"],
    tasks["single"] + tasks["simple"],
    tasks["single"] + tasks["complex"],
    tasks["simple"] + tasks["complex"],
    tasks["single"] + tasks["simple"] + tasks["complex"]
]

In [8]:
all_task = combination_tasks[-1]
print(all_task)

['a', 'o', 'ao', 'as', 'aos']


In [12]:
data_utils.reduce_targets([{
    "aspect" : "NULL",
    "opinion" : "kocak",
    "sentiment" : "positive"
}],"s")

[{'sentiment': 'positive'}]

In [9]:
from copy import deepcopy

# William (AOS ID)
william_intermediate = dict()

for domain, v1 in william.items():
    william_intermediate[domain] = dict()
    for task in all_task:
        william_intermediate[domain][task] = dict()
        for split in v1.keys():
            ds = william[domain][split]
            ds_copy = deepcopy(ds)
            for i in range(len(ds_copy)):
                # Reduce
                ds_copy[i]["target"] = data_utils.reduce_targets(ds_copy[i]["target"],task)
                # Remove Duplicates
                ds_copy[i]["target"] = data_utils.remove_duplicate_targets(ds_copy[i]["target"])
            william_intermediate[domain][task][split] = ds_copy

# Answer Engineering

In [10]:
mask = "<extra_id_X>"

In [11]:
added_tokens = {
    ',' : "<comma>",
    '(' : "<open_bracket>",
    ')' : "<close_bracket>",
    ';' : "<semicolon>"
}

In [12]:
def construct_answer(targets,se_order):
    if len(targets) == 0:
        return "NULL"
    result = []
    counter = 0
    for t in targets:
        constructed_t = ""
        for se in se_order:
            counter = counter % 100
            constructed_t += ' ' + mask.replace('X',str(counter)) + ' ' + t[data_utils.SENTIMENT_ELEMENT[se]]
            counter += 1
        constructed_t = constructed_t.strip()
        result.append(constructed_t)
    result = " ; ".join(result)
    return result
# def construct_answer(targets,se_order):
#     if len(targets) == 0:
#         return "NULL"
#     result = []
#     for t in targets:
#         constructed_t = []
#         for se in se_order:
#             element = t[data_utils.SENTIMENT_ELEMENT[se]]
#             for k, v in added_tokens.items():
#                 element = element.replace(k,v)
#             constructed_t.append(element)
#         constructed_t = " , ".join(constructed_t)
#         constructed_t = f"( {constructed_t} )"
#         result.append(constructed_t)
#     result = " ; ".join(result)
#     return result

# Prompt Engineering

In [13]:
def construct_prompt(text,se_order):
    pattern = []
    for counter, se in enumerate(se_order):
        pattern.append(data_utils.SENTIMENT_ELEMENT[se] + " : " + mask.replace('X',str(counter)))
    pattern = " ,".join(pattern)
    prompt = f"Extract ABSA with format >> {pattern} | "
    # result = text + "| " + pattern
    result = prompt + text
    return result
# def construct_prompt(text,se_order):
#     prompt = []
#     for se in se_order:
#         prompt.append(data_utils.SENTIMENT_ELEMENT[se])
#     prompt = " , ".join(prompt)
#     prompt = f"( {prompt} )"
#     masked_text = text
#     for k, v in added_tokens.items():
#         masked_text = masked_text.replace(k,v)
#     result = masked_text + " | " + prompt
#     return result

# Answer Catch

In [14]:
import re

def catch_answer(output,se_order):
    if output == "NULL":
        return []
    output = output.replace("<pad>",'')
    output = output.replace("</s>",'')
    pattern = r""
    for se in se_order:
        if se != 's':
            pattern += f"<extra_id_\d+>\s*(?P<{data_utils.SENTIMENT_ELEMENT[se]}>[^;]+)\s*"
        else:
            pattern += f"<extra_id_\d+>\s*(?P<{data_utils.SENTIMENT_ELEMENT['s']}>positive|negative|neutral)\s*"
    found = [found_iter.groupdict() for found_iter in re.finditer(pattern,output)]
    for i in range(len(found)):
        for k, v in found[i].items():
            found[i][k] = found[i][k].strip()
    return found
# def catch_answer(output,se_order):
#     if output == "NULL":
#         return []
#     output = output.replace("<pad>",'')
#     output = output.replace("</s>",'')
#     pattern = []
#     for se in se_order:
#         if se != 's':
#             pattern.append(f"\s*(?P<{data_utils.SENTIMENT_ELEMENT[se]}>[^;]+)\s*")
#         else:
#             pattern.append(f"\s*(?P<{data_utils.SENTIMENT_ELEMENT['s']}>positive|negative|neutral)\s*")
#     pattern = ','.join(pattern)
#     pattern = f"\({pattern}\)"
#     found = [found_iter.groupdict() for found_iter in re.finditer(pattern,output)]
#     for i in range(len(found)):
#         for k, v in found[i].items():
#             found[i][k] = found[i][k].strip()
#     return found

# Data Preprocessing 2

# Prepare Tokenized Dataset

In [15]:
encoding_args = {
    "max_length" : 128,
    "padding" : True,
    "truncation" : True,
    "return_tensors" : "pt"
}

In [16]:
tokenizer_id = AutoTokenizer.from_pretrained("google/mt5-base")



In [17]:
def encode_id(dataset):
    result = tokenizer_id(dataset["input"], text_target=dataset["output"], **encoding_args)
    return result

In [18]:
supporting_path = {
    "doc_sa" : "/raid/m13519061/ta/facebook-absa/data/doc_sa/en/kaggle/interim/Restaurant_Reviews.csv",
    "pos_tag" : "../../../data/pos_tag/id/interim/data.csv",
    "ner" : "/raid/m13519061/ta/facebook-absa/data/ner/en/jia/interim/interim_2/tech.csv",
    "emotion" : "/raid/m13519061/ta/facebook-absa/data/emotion_cls/en/kaggle/interim/emotion.csv"
}

supporting_df = {
    k: pd.read_csv(v) for k,v in supporting_path.items()
}

n_sample_supporting_ds = np.inf
for k, v in supporting_df.items():
    if v.shape[0] < n_sample_supporting_ds:
        n_sample_supporting_ds = v.shape[0]

for k,v in supporting_df.items():
    supporting_df[k] = v.sample(n_sample_supporting_ds,random_state=42).reset_index(drop=True)
    supporting_df[k]["task"] = "non_absa"

In [19]:
from datasets import Dataset

In [20]:
Dataset.from_pandas(supporting_df["ner"])

Dataset({
    features: ['input', 'output', 'task'],
    num_rows: 1000
})

In [21]:
from itertools import combinations

In [22]:
# supporting_data_combination = []
# for i in range(len(supporting_df.keys())):
#     supporting_data_combination += list(combinations(supporting_df.keys(),i+1))
# print(supporting_data_combination)
supporting_data_combination = [
                                ('doc_sa',), 
                               ('pos_tag',), 
                               ('ner',), 
                               ('emotion',),
                               ('doc_sa', 'pos_tag'), 
                               ('doc_sa', 'ner'), 
                               ('doc_sa', 'emotion'),
                               ('pos_tag', 'ner'), 
                               ('pos_tag', 'emotion'), 
                               ('ner', 'emotion'),
                               ('doc_sa', 'pos_tag', 'ner'), 
                               ('doc_sa', 'pos_tag', 'emotion'),
                               ('doc_sa', 'ner', 'emotion'), 
                               ('pos_tag', 'ner', 'emotion'),
                               ('doc_sa', 'pos_tag', 'ner', 'emotion')
                               ]

In [23]:
print(len(supporting_data_combination))

15


In [24]:
pd.concat([supporting_df["ner"],supporting_df["pos_tag"]]).reset_index(drop=True).sample(frac=1,random_state=42)

Unnamed: 0,input,output,task
1860,Ekstrak POS TAG dengan format >> pos : <extra_...,<extra_id_0> Kurs <extra_id_1> noun ; <extra_i...,non_absa
353,Ekstrak POS TAG dengan format >> pos : <extra_...,<extra_id_0> Sementara <extra_id_1> coordinati...,non_absa
1333,Ekstrak POS TAG dengan format >> pos : <extra_...,<extra_id_0> Keberhasilan <extra_id_1> noun ; ...,non_absa
905,Ekstrak POS TAG dengan format >> pos : <extra_...,<extra_id_0> BI <extra_id_1> proper noun ; <ex...,non_absa
1289,Ekstrak POS TAG dengan format >> pos : <extra_...,<extra_id_0> Menurut <extra_id_1> preposition ...,non_absa
...,...,...,...
1130,Ekstrak POS TAG dengan format >> pos : <extra_...,<extra_id_0> Kami <extra_id_1> personal pronou...,non_absa
1294,Ekstrak POS TAG dengan format >> pos : <extra_...,<extra_id_0> Padahal <extra_id_1> coordinating...,non_absa
860,Ekstrak POS TAG dengan format >> pos : <extra_...,<extra_id_0> Kurs <extra_id_1> noun ; <extra_i...,non_absa
1459,Ekstrak POS TAG dengan format >> pos : <extra_...,<extra_id_0> Tujuan <extra_id_1> noun ; <extra...,non_absa


In [25]:
def create_data_2(tasks,combo_supporting_ds=[]):
    william_2 = dict()
    for domain, v1 in william_intermediate.items():
        william_2[domain] = {
            "train" : [], # basic task
            "val" : [], # complex task
            "test" : [] # complex task
        }
        # TRAIN
        for basic_task in tasks:
            for el in william_intermediate[domain][basic_task]["train"]:
                william_2[domain]["train"].append({
                        "input" : construct_prompt(el["text"],basic_task),
                        "output" : construct_answer(el["target"],basic_task),
                        "task" : basic_task
                    })
        # VAL
        for el in william_intermediate[domain]["aos"]["val"]:
            william_2[domain]["val"].append({
                    "input" : construct_prompt(el["text"],"aos"),
                    "output" : construct_answer(el["target"],"aos"),
                    "task" : "aos"
                })
        # TEST
        for el in william_intermediate[domain]["aos"]["test"]:
            william_2[domain]["test"].append({
                    "input" : construct_prompt(el["text"],"aos"),
                    "output" : construct_answer(el["target"],"aos"),
                    "task" : "aos"
                })
        random.shuffle(william_2[domain]["train"])
        random.shuffle(william_2[domain]["val"])
        random.shuffle(william_2[domain]["test"])
        william_2[domain]["train"] = pd.DataFrame(william_2[domain]["train"])
        william_2[domain]["val"] = Dataset.from_list(william_2[domain]["val"])
        william_2[domain]["test"] = Dataset.from_list(william_2[domain]["test"])

        for ds_name in combo_supporting_ds:
            # supporting_ds = Dataset.from_pandas(supporting_df[ds_name])
            william_2[domain]["train"] = pd.concat([william_2[domain]["train"],supporting_df[ds_name]]).sample(frac=1,random_state=42).reset_index(drop=True)
        william_2[domain]["train"] = Dataset.from_pandas(william_2[domain]["train"])
    
    william_tok = dict()
    for domain, v1 in william_2.items():
        william_tok[domain] = dict()
        for split, v2 in v1.items():
            if split != "test":
                william_tok[domain][split] = william_2[domain][split].map(encode_id,batched=True,remove_columns=["input","output","task"])
                william_tok[domain][split].set_format("torch")
            else:
                william_tok[domain][split] = encode_id(william_2[domain][split])
    
    return william_2, william_tok

# Data Collator

## Indo

In [26]:
from transformers import DataCollatorForSeq2Seq

data_collator_id = DataCollatorForSeq2Seq(tokenizer=tokenizer_id)

# Compute Metrics

In [27]:
from transformers import EvalPrediction
from evaluation import recall, precision, f1_score, summary_score
from typing import List, Dict, Tuple
import numpy as np

def seperate_target_prediction_per_task(predictions:List[List[Dict]],targets:List[List[Dict]],tasks:List) -> Tuple[Dict[str,List],Dict[str,List]]:
    per_task_targets = {}
    per_task_predictions = {}
    for target, prediction, task in zip(targets,predictions,tasks):
        if task not in per_task_targets.keys():
            per_task_targets[task] = []
        if task not in per_task_predictions.keys():
            per_task_predictions[task] = []
        per_task_targets[task].append(target)
        per_task_predictions[task].append(prediction)
    return per_task_targets, per_task_predictions

def preprocess_eval_preds(eval_preds:EvalPrediction,decoding_args:Dict[str,str],tokenizer:AutoTokenizer):
    input_ids = eval_preds.inputs
    target_ids = eval_preds.label_ids
    pred_ids = eval_preds.predictions

    # In case the model returns more than the prediction logits
    if isinstance(input_ids, tuple):
        input_ids = input_ids[0]
    if isinstance(target_ids, tuple):
        target_ids = target_ids[0]
    if isinstance(pred_ids, tuple):
        pred_ids = pred_ids[0]
    
    input_ids = np.argmax(input_ids,axis=-1) if len(input_ids.shape) == 3 else input_ids # in case not predict with generate
    target_ids = np.argmax(target_ids,axis=-1) if len(target_ids.shape) == 3 else target_ids # in case not predict with generate
    prediction_ids = np.argmax(pred_ids,axis=-1) if len(pred_ids.shape) == 3 else pred_ids # in case not predict with generate

    input_ids = [[token for token in row if token != -100] for row in input_ids]
    target_ids = [[token for token in row if token != -100] for row in target_ids]
    prediction_ids = [[token for token in row if token != -100] for row in prediction_ids]

    inputs = tokenizer.batch_decode(input_ids,**decoding_args)
    targets = tokenizer.batch_decode(target_ids,**decoding_args)
    predictions = tokenizer.batch_decode(prediction_ids,**decoding_args)

    return inputs, targets, predictions

def compute_metrics(eval_preds:EvalPrediction,decoding_args:Dict[str,str],tokenizer:AutoTokenizer,tasks:List) -> Dict[str,float]: # MAY NOT BE SUFFICIATE FOR CAUSAL LM
        """
        ### DESC
            Method to compute the metrics.
        ### PARAMS
        * eval_preds: EvalPrediction instance from training.
        * decoding_args: Decoding arguments.
        ### RETURN
        * metrics: Dictionary of metrics.
        """
        inputs, targets, predictions = preprocess_eval_preds(eval_preds,decoding_args,tokenizer)

        print("INPUTS >>",inputs[0])
        print("TARGETS >>",targets[0])
        print("PREDS >>",predictions[0])

        targets = [catch_answer(text,task) for text,task in zip(targets,tasks) if task != "non_absa"]
        predictions = [catch_answer(text,task) for text,task in zip(predictions,tasks) if task != "non_absa"]

        per_task_targets, per_task_predictions = seperate_target_prediction_per_task(predictions, targets, tasks)
        
        metrics = {}

        metrics["overall_recall"] = recall(predictions,targets)
        metrics["overall_precision"] = precision(predictions,targets)
        metrics["overall_f1_score"] = f1_score(predictions,targets)

        for task in per_task_targets.keys():
            if task == "non_absa":
                continue
            metrics[f"{task}_recall"] = recall(per_task_predictions[task],per_task_targets[task])
            metrics[f"{task}_precision"] = precision(per_task_predictions[task],per_task_targets[task])
            metrics[f"{task}_f1_score"] = f1_score(per_task_predictions[task],per_task_targets[task])
        
        return metrics

# Train Arguments

In [28]:
from transformers import Seq2SeqTrainingArguments

train_args = {
    "num_train_epochs": 20,
    "learning_rate": 3e-4,
    "save_total_limit": 2,
    "gradient_accumulation_steps": 1,
    "per_device_train_batch_size": 16,
    "per_device_eval_batch_size": 16,
    "save_strategy": "epoch",
    "evaluation_strategy": "epoch",
    "logging_strategy" : "epoch",
    "metric_for_best_model": "overall_f1_score",
    "load_best_model_at_end": True,
    "adam_epsilon": 1e-08,
    "output_dir": "./output",
    "logging_dir" : "./output/log",
    "include_inputs_for_metrics" : True
}

train_args = Seq2SeqTrainingArguments(**train_args)

# Train

In [29]:
import torch
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
print(device)

cuda:0


In [30]:
from transformers import Seq2SeqTrainer

# trainer = {
#     "peng" : {},
#     "wan" : {},
#     "zhang" : {},
#     "william" : {}
# }

decoding_args = {
    "skip_special_tokens" : False
}

def preprocess_logits_for_metrics(logits, targets):
    pred_logits = logits[0] if isinstance(logits,tuple) else logits
    pred_ids = torch.argmax(pred_logits, dim=-1)
    return pred_ids, targets

In [31]:
from tqdm import tqdm

def generate_predictions(model,tokenizer,tokenized:torch.Tensor,device:torch.device=torch.device("cpu"),batch_size:int=16,max_len:int=128,decoding_args:Dict={}) -> List[str]:
    # Data loader
    input_ids_data_loader = torch.utils.data.DataLoader(tokenized["input_ids"],
                        batch_size=batch_size,shuffle=False)
    attention_mask_data_loader = torch.utils.data.DataLoader(tokenized["attention_mask"],
                        batch_size=batch_size,shuffle=False)
    # Predict
    model = model
    tokenizer = tokenizer
    tensor_predictions = []
    with torch.no_grad():
        for input_ids, attention_mask in tqdm(zip(input_ids_data_loader,attention_mask_data_loader)):
            input_ids = input_ids.to(device)
            attention_mask = attention_mask.to(device)
            tensor_predictions.extend(model.generate(input_ids=input_ids,attention_mask=attention_mask,max_length=max_len,pad_token_id=tokenizer.pad_token_id,eos_token_id=tokenizer.eos_token_id).cpu())
            input_ids = input_ids.cpu()
            attention_mask = attention_mask.cpu()
    tensor_predictions = [[token for token in row if token != -100] for row in tensor_predictions]
    predictions = tokenizer.batch_decode(tensor_predictions,**decoding_args)
    return predictions

In [32]:
import json

def save_result(str_preds_,preds,targets,filename):
    result = []
    str_preds = [el.replace("<pad>",'').replace("</s>",'') for el in str_preds_]
    assert len(str_preds) == len(preds) == len(targets)
    for i in range(len(str_preds)):
        result.append({
            "str_pred" : str_preds[i],
            "pred" : preds[i],
            "target" : targets[i]
        })
    
    with open(filename,'w') as fp:
        json.dump(result,fp)
    return result

# William Hotel

In [33]:
supporting_data_combination[0]

('doc_sa',)

In [34]:
def optuna_hp_space(trial):
    return {
        "learning_rate": trial.suggest_float("learning_rate", 1e-5, 5e-4, log=True),
        "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [16//n_gpu, 32//n_gpu, 64//n_gpu, 128//n_gpu]),
        "weight_decay": trial.suggest_float("weight_decay", 0, 0.01)
    }

In [35]:
def model_init(trial=None):
    model = AutoModelForSeq2SeqLM.from_pretrained("google/mt5-base")
    model.to(device)
    return model

In [36]:
# for combo_supporting_ds_name in supporting_data_combination:
william_2, william_tok = create_data_2(all_task,["ner","pos_tag"])
# for combo_task in combination_tasks:
# model = AutoModelForSeq2SeqLM.from_pretrained("google/mt5-base")
# model.to(device)
model = model_init()
trainer = Seq2SeqTrainer(
        model = model,
        # model_init=model_init,
        args = train_args,
        tokenizer = tokenizer_id,
        data_collator = data_collator_id,
        train_dataset = william_tok["hotel"]["train"],
        eval_dataset = william_tok["hotel"]["val"],
        compute_metrics = lambda eval_preds: compute_metrics(eval_preds,decoding_args,tokenizer_id,william_2["hotel"]["val"]["task"]),
        preprocess_logits_for_metrics = preprocess_logits_for_metrics
    )

loading configuration file config.json from cache at /home/m13519061/.cache/huggingface/hub/models--google--mt5-base/snapshots/2eb15465c5dd7f72a8f7984306ad05ebc3dd1e1f/config.json
Model config MT5Config {
  "_name_or_path": "google/mt5-base",
  "architectures": [
    "MT5ForConditionalGeneration"
  ],
  "d_ff": 2048,
  "d_kv": 64,
  "d_model": 768,
  "decoder_start_token_id": 0,
  "dense_act_fn": "gelu_new",
  "dropout_rate": 0.1,
  "eos_token_id": 1,
  "feed_forward_proj": "gated-gelu",
  "initializer_factor": 1.0,
  "is_encoder_decoder": true,
  "is_gated_act": true,
  "layer_norm_epsilon": 1e-06,
  "model_type": "mt5",
  "num_decoder_layers": 12,
  "num_heads": 12,
  "num_layers": 12,
  "output_past": true,
  "pad_token_id": 0,
  "relative_attention_max_distance": 128,
  "relative_attention_num_buckets": 32,
  "tie_word_embeddings": false,
  "tokenizer_class": "T5Tokenizer",
  "transformers_version": "4.24.0",
  "use_cache": true,
  "vocab_size": 250112
}

loading weights file pytor

In [37]:
# def compute_objective(metrics):
#     return metrics["eval_overall_f1_score"]

In [38]:
# best_trial = trainer.hyperparameter_search(
#     direction="maximize",
#     backend="optuna",
#     hp_space=optuna_hp_space,
#     n_trials=20,
#     # compute_objective=compute_objective
# )

[I 2023-06-17 21:37:22,337] A new study created in memory with name: no-name-31c206ac-059f-4419-8cd3-552656182f11
Trial: {'learning_rate': 0.0002747421770686028, 'per_device_train_batch_size': 16, 'weight_decay': 0.0051135387555659}
loading configuration file config.json from cache at /home/m13519061/.cache/huggingface/hub/models--google--mt5-base/snapshots/2eb15465c5dd7f72a8f7984306ad05ebc3dd1e1f/config.json
Model config MT5Config {
  "_name_or_path": "google/mt5-base",
  "architectures": [
    "MT5ForConditionalGeneration"
  ],
  "d_ff": 2048,
  "d_kv": 64,
  "d_model": 768,
  "decoder_start_token_id": 0,
  "dense_act_fn": "gelu_new",
  "dropout_rate": 0.1,
  "eos_token_id": 1,
  "feed_forward_proj": "gated-gelu",
  "initializer_factor": 1.0,
  "is_encoder_decoder": true,
  "is_gated_act": true,
  "layer_norm_epsilon": 1e-06,
  "model_type": "mt5",
  "num_decoder_layers": 12,
  "num_heads": 12,
  "num_layers": 12,
  "output_past": true,
  "pad_token_id": 0,
  "relative_attention_max_

Epoch,Training Loss,Validation Loss


INPUTS >> Ekstrak ABSA dengan format >> aspect : <extra_id_0>,opinion : <extra_id_1>,sentiment : <extra_id_2> | pelayanan memuaskan. cuma bantal airy kotor seperti tidak di cuci. dan tempat penyimpanan bantal biru airy kotor.</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
TARGETS >> <extra_id_0> pelayanan <extra_id_1> memuaskan <extra_id_2> positive ; <extra_id_3> bantal airy <extra_id_4> kotor <extra_id_5> negative ; <extra_id_6> tempat penyimpanan bantal biru airy <extra_id_7> kotor <extra_id_8> negative</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pa

Saving model checkpoint to ./output/run-0/checkpoint-1063
Configuration saved in ./output/run-0/checkpoint-1063/config.json
Configuration saved in ./output/run-0/checkpoint-1063/config.json
Model weights saved in ./output/run-0/checkpoint-1063/pytorch_model.bin
tokenizer config file saved in ./output/run-0/checkpoint-1063/tokenizer_config.json
Special tokens file saved in ./output/run-0/checkpoint-1063/special_tokens_map.json
Copy vocab file to ./output/run-0/checkpoint-1063/spiece.model


INPUTS >> Ekstrak ABSA dengan format >> aspect : <extra_id_0>,opinion : <extra_id_1>,sentiment : <extra_id_2> | pelayanan memuaskan. cuma bantal airy kotor seperti tidak di cuci. dan tempat penyimpanan bantal biru airy kotor.</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
TARGETS >> <extra_id_0> pelayanan <extra_id_1> memuaskan <extra_id_2> positive ; <extra_id_3> bantal airy <extra_id_4> kotor <extra_id_5> negative ; <extra_id_6> tempat penyimpanan bantal biru airy <extra_id_7> kotor <extra_id_8> negative</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pa

Saving model checkpoint to ./output/run-0/checkpoint-2126
Configuration saved in ./output/run-0/checkpoint-2126/config.json
Configuration saved in ./output/run-0/checkpoint-2126/config.json
Model weights saved in ./output/run-0/checkpoint-2126/pytorch_model.bin
tokenizer config file saved in ./output/run-0/checkpoint-2126/tokenizer_config.json
Special tokens file saved in ./output/run-0/checkpoint-2126/special_tokens_map.json
Copy vocab file to ./output/run-0/checkpoint-2126/spiece.model


Saving model checkpoint to ./output/run-0/checkpoint-3189
Configuration saved in ./output/run-0/checkpoint-3189/config.json
Configuration saved in ./output/run-0/checkpoint-3189/config.json
Model weights saved in ./output/run-0/checkpoint-3189/pytorch_model.bin
tokenizer config file saved in ./output/run-0/checkpoint-3189/tokenizer_config.json
Special tokens file saved in ./output/run-0/checkpoint-3189/special_tokens_map.json
Copy vocab file to ./output/run-0/checkpoint-3189/spiece.model
Deleting older checkpoint [output/run-0/checkpoint-1063] due to args.save_total_limit


INPUTS >> Ekstrak ABSA dengan format >> aspect : <extra_id_0>,opinion : <extra_id_1>,sentiment : <extra_id_2> | pelayanan memuaskan. cuma bantal airy kotor seperti tidak di cuci. dan tempat penyimpanan bantal biru airy kotor.</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
TARGETS >> <extra_id_0> pelayanan <extra_id_1> memuaskan <extra_id_2> positive ; <extra_id_3> bantal airy <extra_id_4> kotor <extra_id_5> negative ; <extra_id_6> tempat penyimpanan bantal biru airy <extra_id_7> kotor <extra_id_8> negative</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pa

Saving model checkpoint to ./output/run-0/checkpoint-4252
Configuration saved in ./output/run-0/checkpoint-4252/config.json
Configuration saved in ./output/run-0/checkpoint-4252/config.json
Model weights saved in ./output/run-0/checkpoint-4252/pytorch_model.bin
tokenizer config file saved in ./output/run-0/checkpoint-4252/tokenizer_config.json
Special tokens file saved in ./output/run-0/checkpoint-4252/special_tokens_map.json
Copy vocab file to ./output/run-0/checkpoint-4252/spiece.model
Deleting older checkpoint [output/run-0/checkpoint-2126] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 8


Saving model checkpoint to ./output/run-0/checkpoint-5315
Configuration saved in ./output/run-0/checkpoint-5315/config.json
Configuration saved in ./output/run-0/checkpoint-5315/config.json
Model weights saved in ./output/run-0/checkpoint-5315/pytorch_model.bin
tokenizer config file saved in ./output/run-0/checkpoint-5315/tokenizer_config.json
Special tokens file saved in ./output/run-0/checkpoint-5315/special_tokens_map.json
Copy vocab file to ./output/run-0/checkpoint-5315/spiece.model
Deleting older checkpoint [output/run-0/checkpoint-3189] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 8


INPUTS >> Ekstrak ABSA dengan format >> aspect : <extra_id_0>,opinion : <extra_id_1>,sentiment : <extra_id_2> | pelayanan memuaskan. cuma bantal airy kotor seperti tidak di cuci. dan tempat penyimpanan bantal biru airy kotor.</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
TARGETS >> <extra_id_0> pelayanan <extra_id_1> memuaskan <extra_id_2> positive ; <extra_id_3> bantal airy <extra_id_4> kotor <extra_id_5> negative ; <extra_id_6> tempat penyimpanan bantal biru airy <extra_id_7> kotor <extra_id_8> negative</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pa

Saving model checkpoint to ./output/run-0/checkpoint-6378
Configuration saved in ./output/run-0/checkpoint-6378/config.json
Configuration saved in ./output/run-0/checkpoint-6378/config.json
Model weights saved in ./output/run-0/checkpoint-6378/pytorch_model.bin
tokenizer config file saved in ./output/run-0/checkpoint-6378/tokenizer_config.json
Special tokens file saved in ./output/run-0/checkpoint-6378/special_tokens_map.json
Copy vocab file to ./output/run-0/checkpoint-6378/spiece.model
Deleting older checkpoint [output/run-0/checkpoint-4252] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 8


INPUTS >> Ekstrak ABSA dengan format >> aspect : <extra_id_0>,opinion : <extra_id_1>,sentiment : <extra_id_2> | pelayanan memuaskan. cuma bantal airy kotor seperti tidak di cuci. dan tempat penyimpanan bantal biru airy kotor.</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
TARGETS >> <extra_id_0> pelayanan <extra_id_1> memuaskan <extra_id_2> positive ; <extra_id_3> bantal airy <extra_id_4> kotor <extra_id_5> negative ; <extra_id_6> tempat penyimpanan bantal biru airy <extra_id_7> kotor <extra_id_8> negative</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pa

Saving model checkpoint to ./output/run-0/checkpoint-7441
Configuration saved in ./output/run-0/checkpoint-7441/config.json
Configuration saved in ./output/run-0/checkpoint-7441/config.json
Model weights saved in ./output/run-0/checkpoint-7441/pytorch_model.bin
tokenizer config file saved in ./output/run-0/checkpoint-7441/tokenizer_config.json
Special tokens file saved in ./output/run-0/checkpoint-7441/special_tokens_map.json
Copy vocab file to ./output/run-0/checkpoint-7441/spiece.model
Deleting older checkpoint [output/run-0/checkpoint-5315] due to args.save_total_limit


INPUTS >> Ekstrak ABSA dengan format >> aspect : <extra_id_0>,opinion : <extra_id_1>,sentiment : <extra_id_2> | pelayanan memuaskan. cuma bantal airy kotor seperti tidak di cuci. dan tempat penyimpanan bantal biru airy kotor.</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
TARGETS >> <extra_id_0> pelayanan <extra_id_1> memuaskan <extra_id_2> positive ; <extra_id_3> bantal airy <extra_id_4> kotor <extra_id_5> negative ; <extra_id_6> tempat penyimpanan bantal biru airy <extra_id_7> kotor <extra_id_8> negative</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pa

Saving model checkpoint to ./output/run-0/checkpoint-8504
Configuration saved in ./output/run-0/checkpoint-8504/config.json
Configuration saved in ./output/run-0/checkpoint-8504/config.json
Model weights saved in ./output/run-0/checkpoint-8504/pytorch_model.bin
tokenizer config file saved in ./output/run-0/checkpoint-8504/tokenizer_config.json
Special tokens file saved in ./output/run-0/checkpoint-8504/special_tokens_map.json
Copy vocab file to ./output/run-0/checkpoint-8504/spiece.model
Deleting older checkpoint [output/run-0/checkpoint-6378] due to args.save_total_limit


INPUTS >> Ekstrak ABSA dengan format >> aspect : <extra_id_0>,opinion : <extra_id_1>,sentiment : <extra_id_2> | pelayanan memuaskan. cuma bantal airy kotor seperti tidak di cuci. dan tempat penyimpanan bantal biru airy kotor.</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
TARGETS >> <extra_id_0> pelayanan <extra_id_1> memuaskan <extra_id_2> positive ; <extra_id_3> bantal airy <extra_id_4> kotor <extra_id_5> negative ; <extra_id_6> tempat penyimpanan bantal biru airy <extra_id_7> kotor <extra_id_8> negative</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pa

Saving model checkpoint to ./output/run-0/checkpoint-9567
Configuration saved in ./output/run-0/checkpoint-9567/config.json
Configuration saved in ./output/run-0/checkpoint-9567/config.json
Model weights saved in ./output/run-0/checkpoint-9567/pytorch_model.bin
tokenizer config file saved in ./output/run-0/checkpoint-9567/tokenizer_config.json
Special tokens file saved in ./output/run-0/checkpoint-9567/special_tokens_map.json
Copy vocab file to ./output/run-0/checkpoint-9567/spiece.model
Deleting older checkpoint [output/run-0/checkpoint-7441] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 8


INPUTS >> Ekstrak ABSA dengan format >> aspect : <extra_id_0>,opinion : <extra_id_1>,sentiment : <extra_id_2> | pelayanan memuaskan. cuma bantal airy kotor seperti tidak di cuci. dan tempat penyimpanan bantal biru airy kotor.</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
TARGETS >> <extra_id_0> pelayanan <extra_id_1> memuaskan <extra_id_2> positive ; <extra_id_3> bantal airy <extra_id_4> kotor <extra_id_5> negative ; <extra_id_6> tempat penyimpanan bantal biru airy <extra_id_7> kotor <extra_id_8> negative</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pa

Saving model checkpoint to ./output/run-0/checkpoint-10630
Configuration saved in ./output/run-0/checkpoint-10630/config.json
Configuration saved in ./output/run-0/checkpoint-10630/config.json
Model weights saved in ./output/run-0/checkpoint-10630/pytorch_model.bin
tokenizer config file saved in ./output/run-0/checkpoint-10630/tokenizer_config.json
Special tokens file saved in ./output/run-0/checkpoint-10630/special_tokens_map.json
Copy vocab file to ./output/run-0/checkpoint-10630/spiece.model
Deleting older checkpoint [output/run-0/checkpoint-8504] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 8


INPUTS >> Ekstrak ABSA dengan format >> aspect : <extra_id_0>,opinion : <extra_id_1>,sentiment : <extra_id_2> | pelayanan memuaskan. cuma bantal airy kotor seperti tidak di cuci. dan tempat penyimpanan bantal biru airy kotor.</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
TARGETS >> <extra_id_0> pelayanan <extra_id_1> memuaskan <extra_id_2> positive ; <extra_id_3> bantal airy <extra_id_4> kotor <extra_id_5> negative ; <extra_id_6> tempat penyimpanan bantal biru airy <extra_id_7> kotor <extra_id_8> negative</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pa

Saving model checkpoint to ./output/run-0/checkpoint-11693
Configuration saved in ./output/run-0/checkpoint-11693/config.json
Configuration saved in ./output/run-0/checkpoint-11693/config.json
Model weights saved in ./output/run-0/checkpoint-11693/pytorch_model.bin
tokenizer config file saved in ./output/run-0/checkpoint-11693/tokenizer_config.json
Special tokens file saved in ./output/run-0/checkpoint-11693/special_tokens_map.json
Copy vocab file to ./output/run-0/checkpoint-11693/spiece.model
Deleting older checkpoint [output/run-0/checkpoint-9567] due to args.save_total_limit


INPUTS >> Ekstrak ABSA dengan format >> aspect : <extra_id_0>,opinion : <extra_id_1>,sentiment : <extra_id_2> | pelayanan memuaskan. cuma bantal airy kotor seperti tidak di cuci. dan tempat penyimpanan bantal biru airy kotor.</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
TARGETS >> <extra_id_0> pelayanan <extra_id_1> memuaskan <extra_id_2> positive ; <extra_id_3> bantal airy <extra_id_4> kotor <extra_id_5> negative ; <extra_id_6> tempat penyimpanan bantal biru airy <extra_id_7> kotor <extra_id_8> negative</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pa

Saving model checkpoint to ./output/run-0/checkpoint-12756
Configuration saved in ./output/run-0/checkpoint-12756/config.json
Configuration saved in ./output/run-0/checkpoint-12756/config.json
Model weights saved in ./output/run-0/checkpoint-12756/pytorch_model.bin
tokenizer config file saved in ./output/run-0/checkpoint-12756/tokenizer_config.json
Special tokens file saved in ./output/run-0/checkpoint-12756/special_tokens_map.json
Copy vocab file to ./output/run-0/checkpoint-12756/spiece.model
Deleting older checkpoint [output/run-0/checkpoint-10630] due to args.save_total_limit


INPUTS >> Ekstrak ABSA dengan format >> aspect : <extra_id_0>,opinion : <extra_id_1>,sentiment : <extra_id_2> | pelayanan memuaskan. cuma bantal airy kotor seperti tidak di cuci. dan tempat penyimpanan bantal biru airy kotor.</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
TARGETS >> <extra_id_0> pelayanan <extra_id_1> memuaskan <extra_id_2> positive ; <extra_id_3> bantal airy <extra_id_4> kotor <extra_id_5> negative ; <extra_id_6> tempat penyimpanan bantal biru airy <extra_id_7> kotor <extra_id_8> negative</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pa

Saving model checkpoint to ./output/run-0/checkpoint-13819
Configuration saved in ./output/run-0/checkpoint-13819/config.json
Configuration saved in ./output/run-0/checkpoint-13819/config.json
Model weights saved in ./output/run-0/checkpoint-13819/pytorch_model.bin
tokenizer config file saved in ./output/run-0/checkpoint-13819/tokenizer_config.json
Special tokens file saved in ./output/run-0/checkpoint-13819/special_tokens_map.json
Copy vocab file to ./output/run-0/checkpoint-13819/spiece.model
Deleting older checkpoint [output/run-0/checkpoint-11693] due to args.save_total_limit


INPUTS >> Ekstrak ABSA dengan format >> aspect : <extra_id_0>,opinion : <extra_id_1>,sentiment : <extra_id_2> | pelayanan memuaskan. cuma bantal airy kotor seperti tidak di cuci. dan tempat penyimpanan bantal biru airy kotor.</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
TARGETS >> <extra_id_0> pelayanan <extra_id_1> memuaskan <extra_id_2> positive ; <extra_id_3> bantal airy <extra_id_4> kotor <extra_id_5> negative ; <extra_id_6> tempat penyimpanan bantal biru airy <extra_id_7> kotor <extra_id_8> negative</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pa

Saving model checkpoint to ./output/run-0/checkpoint-14882
Configuration saved in ./output/run-0/checkpoint-14882/config.json
Configuration saved in ./output/run-0/checkpoint-14882/config.json
Model weights saved in ./output/run-0/checkpoint-14882/pytorch_model.bin
tokenizer config file saved in ./output/run-0/checkpoint-14882/tokenizer_config.json
Special tokens file saved in ./output/run-0/checkpoint-14882/special_tokens_map.json
Copy vocab file to ./output/run-0/checkpoint-14882/spiece.model
Deleting older checkpoint [output/run-0/checkpoint-12756] due to args.save_total_limit


INPUTS >> Ekstrak ABSA dengan format >> aspect : <extra_id_0>,opinion : <extra_id_1>,sentiment : <extra_id_2> | pelayanan memuaskan. cuma bantal airy kotor seperti tidak di cuci. dan tempat penyimpanan bantal biru airy kotor.</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
TARGETS >> <extra_id_0> pelayanan <extra_id_1> memuaskan <extra_id_2> positive ; <extra_id_3> bantal airy <extra_id_4> kotor <extra_id_5> negative ; <extra_id_6> tempat penyimpanan bantal biru airy <extra_id_7> kotor <extra_id_8> negative</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pa

Saving model checkpoint to ./output/run-0/checkpoint-15945
Configuration saved in ./output/run-0/checkpoint-15945/config.json
Configuration saved in ./output/run-0/checkpoint-15945/config.json
Model weights saved in ./output/run-0/checkpoint-15945/pytorch_model.bin
tokenizer config file saved in ./output/run-0/checkpoint-15945/tokenizer_config.json
Special tokens file saved in ./output/run-0/checkpoint-15945/special_tokens_map.json
Copy vocab file to ./output/run-0/checkpoint-15945/spiece.model
Deleting older checkpoint [output/run-0/checkpoint-13819] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 8


INPUTS >> Ekstrak ABSA dengan format >> aspect : <extra_id_0>,opinion : <extra_id_1>,sentiment : <extra_id_2> | pelayanan memuaskan. cuma bantal airy kotor seperti tidak di cuci. dan tempat penyimpanan bantal biru airy kotor.</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
TARGETS >> <extra_id_0> pelayanan <extra_id_1> memuaskan <extra_id_2> positive ; <extra_id_3> bantal airy <extra_id_4> kotor <extra_id_5> negative ; <extra_id_6> tempat penyimpanan bantal biru airy <extra_id_7> kotor <extra_id_8> negative</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pa

Saving model checkpoint to ./output/run-0/checkpoint-17008
Configuration saved in ./output/run-0/checkpoint-17008/config.json
Configuration saved in ./output/run-0/checkpoint-17008/config.json
Model weights saved in ./output/run-0/checkpoint-17008/pytorch_model.bin
tokenizer config file saved in ./output/run-0/checkpoint-17008/tokenizer_config.json
Special tokens file saved in ./output/run-0/checkpoint-17008/special_tokens_map.json
Copy vocab file to ./output/run-0/checkpoint-17008/spiece.model
Deleting older checkpoint [output/run-0/checkpoint-15945] due to args.save_total_limit


INPUTS >> Ekstrak ABSA dengan format >> aspect : <extra_id_0>,opinion : <extra_id_1>,sentiment : <extra_id_2> | pelayanan memuaskan. cuma bantal airy kotor seperti tidak di cuci. dan tempat penyimpanan bantal biru airy kotor.</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
TARGETS >> <extra_id_0> pelayanan <extra_id_1> memuaskan <extra_id_2> positive ; <extra_id_3> bantal airy <extra_id_4> kotor <extra_id_5> negative ; <extra_id_6> tempat penyimpanan bantal biru airy <extra_id_7> kotor <extra_id_8> negative</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pa

Saving model checkpoint to ./output/run-0/checkpoint-18071
Configuration saved in ./output/run-0/checkpoint-18071/config.json
Configuration saved in ./output/run-0/checkpoint-18071/config.json
Model weights saved in ./output/run-0/checkpoint-18071/pytorch_model.bin
tokenizer config file saved in ./output/run-0/checkpoint-18071/tokenizer_config.json
Special tokens file saved in ./output/run-0/checkpoint-18071/special_tokens_map.json
Copy vocab file to ./output/run-0/checkpoint-18071/spiece.model
Deleting older checkpoint [output/run-0/checkpoint-14882] due to args.save_total_limit


INPUTS >> Ekstrak ABSA dengan format >> aspect : <extra_id_0>,opinion : <extra_id_1>,sentiment : <extra_id_2> | pelayanan memuaskan. cuma bantal airy kotor seperti tidak di cuci. dan tempat penyimpanan bantal biru airy kotor.</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
TARGETS >> <extra_id_0> pelayanan <extra_id_1> memuaskan <extra_id_2> positive ; <extra_id_3> bantal airy <extra_id_4> kotor <extra_id_5> negative ; <extra_id_6> tempat penyimpanan bantal biru airy <extra_id_7> kotor <extra_id_8> negative</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pa

Saving model checkpoint to ./output/run-0/checkpoint-19134
Configuration saved in ./output/run-0/checkpoint-19134/config.json
Configuration saved in ./output/run-0/checkpoint-19134/config.json
Model weights saved in ./output/run-0/checkpoint-19134/pytorch_model.bin
tokenizer config file saved in ./output/run-0/checkpoint-19134/tokenizer_config.json
Special tokens file saved in ./output/run-0/checkpoint-19134/special_tokens_map.json
Copy vocab file to ./output/run-0/checkpoint-19134/spiece.model
Deleting older checkpoint [output/run-0/checkpoint-17008] due to args.save_total_limit


INPUTS >> Ekstrak ABSA dengan format >> aspect : <extra_id_0>,opinion : <extra_id_1>,sentiment : <extra_id_2> | pelayanan memuaskan. cuma bantal airy kotor seperti tidak di cuci. dan tempat penyimpanan bantal biru airy kotor.</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
TARGETS >> <extra_id_0> pelayanan <extra_id_1> memuaskan <extra_id_2> positive ; <extra_id_3> bantal airy <extra_id_4> kotor <extra_id_5> negative ; <extra_id_6> tempat penyimpanan bantal biru airy <extra_id_7> kotor <extra_id_8> negative</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pa

Saving model checkpoint to ./output/run-0/checkpoint-20197
Configuration saved in ./output/run-0/checkpoint-20197/config.json
Configuration saved in ./output/run-0/checkpoint-20197/config.json
Model weights saved in ./output/run-0/checkpoint-20197/pytorch_model.bin
tokenizer config file saved in ./output/run-0/checkpoint-20197/tokenizer_config.json
Special tokens file saved in ./output/run-0/checkpoint-20197/special_tokens_map.json
Copy vocab file to ./output/run-0/checkpoint-20197/spiece.model
Deleting older checkpoint [output/run-0/checkpoint-18071] due to args.save_total_limit


In [None]:
# result_hparams = best_trial.hyperparameters

In [None]:
# with open("result_hparams.json",'w') as fp:
#     json.dump(result_hparams,fp)

In [None]:

trainer.train()

# str_preds = generate_predictions(model, tokenizer_id, william_2["hotel"]["test"]["input"], device, decoding_args)
# preds = [catch_answer(el,"aos") for el in str_preds]
str_preds = generate_predictions(model, tokenizer_id, william_tok["hotel"]["test"], device, 16, 128, decoding_args)
preds = [catch_answer(el,"aos") for el in str_preds]
targets = [catch_answer(el,"aos") for el in william_2["hotel"]["test"]["output"]]
score = summary_score(preds,targets)
print(f"Score for OURS >>", score)
fname = "OURS"
result = save_result(str_preds, preds, targets, fname + "_pred.json")
with open(fname + "_score.json", 'w') as fp:
    json.dump(score,fp)

In [None]:
# !rm -rf ./output

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
