# Load Modules

In [1]:
import os

os.chdir("..")

In [2]:
from transformers import AutoTokenizer
from utils.tokenizer import TokenizerPreprocessor
from datasets import load_dataset
from evaluate import evaluator

from transformers import (
    AutoModelForSeq2SeqLM,
    DataCollatorForSeq2Seq,
    Seq2SeqTrainingArguments,
)

# Setup 

In [3]:
MODEL_CHECKPOINT = "./model_artifacts/exported_model_pt/"

In [4]:
SPLIT_CONFIG = {"test": "./datasets/correction_test.jsonl"}

In [5]:
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_CHECKPOINT)

In [6]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT, legacy=True)
tp = TokenizerPreprocessor(
    tokenizer=tokenizer,
    max_input_length=128,
    max_target_length=128,
)

raw_datasets = load_dataset("json", data_files=SPLIT_CONFIG)
tokenized_datasets = raw_datasets.map(tp, batched=True)

# Evaluate (Default)

In [7]:
task_evaluator = evaluator("text2text-generation")

In [8]:
task_evaluator.compute(
    model_or_pipeline=model,
    data=raw_datasets["test"].shuffle(42).select(range(1000)),
    tokenizer=tokenizer,
    input_column="from",
    label_column="to",
)

{'bleu': 0.22899090475744432,
 'precisions': [0.5906562847608454,
  0.42094017094017094,
  0.31414473684210525,
  0.2427870744373918],
 'brevity_penalty': 0.6170787554426814,
 'length_ratio': 0.6744186046511628,
 'translation_length': 9889,
 'reference_length': 14663,
 'total_time_in_seconds': 77.96180178200302,
 'samples_per_second': 12.82679436778799,
 'latency_in_seconds': 0.07796180178200302}

# Evaluate (Trainer)

In [9]:
from utils.metrics import CustomTrainer

In [10]:
data_collator = DataCollatorForSeq2Seq(tokenizer)

In [11]:
args = Seq2SeqTrainingArguments(
    "./results/eval_run",
    evaluation_strategy="epoch",
    per_device_eval_batch_size=32,
    weight_decay=0.01,
    predict_with_generate=True,
    fp16=False,
    logging_steps=1,
    push_to_hub=False,
    logging_strategy="steps",
    report_to=[],
    greater_is_better=False,
    dataloader_pin_memory=True,
)

In [12]:
trainer = CustomTrainer(
    model,
    args,
    eval_dataset=tokenized_datasets["test"].select(range(16 * 3)),
    data_collator=data_collator,
    tokenizer=tokenizer,
    show_extra_metrics=True,
)

In [13]:
trainer.evaluate()

You're using a T5TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{'eval_loss': 1.932665467262268,
 'eval_bleu': 22.1788,
 'eval_wer': 0.5864,
 'eval_cer': 0.4602,
 'eval_gen_len': 14.2708,
 'eval_runtime': 0.3546,
 'eval_samples_per_second': 135.367,
 'eval_steps_per_second': 5.64}

# Evaluate (Manual)

In [14]:
import torch

In [15]:
text = "আজ শুক্রাবার"

In [16]:
with torch.inference_mode():
    tokenized_text = tokenizer(text, return_tensors="pt")
    output = model.generate(inputs=tokenized_text.input_ids.cuda(), max_new_tokens=128)

In [17]:
tokenizer.batch_decode(output)

['<pad><extra_id_0> শুক্রাবার।</s>']