In [1]:
import pandas as pd
import torch
import warnings
from tqdm import tqdm
from transformers import DataCollatorWithPadding
from transformers import T5Tokenizer
from torch.utils.data import DataLoader
from datasets import Dataset
import re
from transformers import T5Tokenizer, T5ForConditionalGeneration

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
def tokenize(batch, tokenizer):
    return tokenizer(batch["question"], padding="max_length", truncation=True)

def generate_encodings(tokenizer, dataset):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    encodings = tokenizer(dataset["question"], padding=True, truncation=True, return_tensors="pt").to(device)
    return encodings

def generate_texts(model, tokenizer, encodings):
    with torch.no_grad():
        generated_ids = model.generate(**encodings)
    generated_texts = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
    return generated_texts

def get_accuracy(expected, actual):
    correct = 0
    for index in range(len(expected)):
        prediction = re.findall(r"(?<!\d)([-]?\d*\.?\d+)", expected[index])
        if len(prediction) == 0: # No number was found in answer
            continue
        if len(prediction) > 1:
            warnings.warn("WARNING: Prediction contained multiple integers; resorting to the last number found.")
        if prediction[-1] != int(prediction[-1]): # All examples' answers should be integers
            continue
        if int(prediction[-1]) == int(actual["answer"][index]):
            correct += 1
    return correct  / len(actual["answer"])

def evaluate_model(model, tokenizer, dataset, batch_size):
    shards = len(dataset) // batch_size
    predictions = []
    for shard_index in tqdm(range(shards)):
        dataset_shard = dataset.shard(shards, shard_index, contiguous=True)
        encodings = generate_encodings(tokenizer, dataset_shard)
        generated_texts = generate_texts(model, tokenizer, encodings)
        predictions.extend(generated_texts)
    get_accuracy(predictions, dataset)

In [4]:
t5_tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
t5_model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base", device_map="auto")
multiply_dataset = Dataset.from_pandas(pd.read_csv("multiply.csv"))

evaluate_model(t5_model, t5_tokenizer, multiply_dataset, 10)

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This means that tokens that come after special tokens will not be properly handled. We recommend you to read the related pull request available at https://github.com/huggingface/transformers/pull/24565, and set the legacy attribute accordingly.
  0%|          | 33/100000 [00:12<10:35:44,  2.62it/s]


KeyboardInterrupt: 