In [1]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from datasets import load_dataset
import torch
from rich.markdown import Markdown

device = ('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [2]:
model_name = "google/flan-t5-large"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.bfloat16).to(device)



In [3]:
print(f"Number of parameters: {model.num_parameters():,}")

Number of parameters: 783,150,080


In [4]:
%cd ..
from eval_utils import Evaluation
from data_utils import format_example, format_options, create_instruct_dataset

eval = Evaluation(model, tokenizer, device)

c:\Users\rockn\Documents\Maestria en Ciencias de la Computacion\tercer semestre\Tesis 2\flan-gpt2


In [5]:
template = """
## INPUT PROMPT
{prompt}
## GROUND_TRUTH
{completion}
## MODEL OUTPUT
{prediction}
"""

### ANLI

In [10]:
dataset = create_instruct_dataset(["anli"])
example = dataset[0]
prediction = eval.generate([example["prompt"]])[0]
Markdown(template.format(prompt=example["prompt"], 
                         completion=example["completion"], 
                         prediction=prediction))

Generating response... : 100%|██████████| 1/1 [00:00<00:00,  2.44it/s]


## Common Gen

In [6]:
dataset = create_instruct_dataset(["common_gen"])
example = dataset[0]
prediction = eval.generate([example["prompt"]])[0]
Markdown(template.format(prompt=example["prompt"], 
                         completion=example["completion"], 
                         prediction=prediction))

Generating response... :   0%|          | 0/1 [00:00<?, ?it/s]The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Generating response... : 100%|██████████| 1/1 [00:02<00:00,  2.41s/it]


## SQUAD

In [8]:
dataset = create_instruct_dataset(["squad"])
example = dataset[0]
prediction = eval.generate([example["prompt"]])[0]
Markdown(template.format(prompt=example["prompt"], 
                         completion=example["completion"], 
                         prediction=prediction))

Map:   0%|          | 0/87599 [00:00<?, ? examples/s]

Filter:   0%|          | 0/87599 [00:00<?, ? examples/s]

Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

Generating response... : 100%|██████████| 1/1 [00:00<00:00,  2.09it/s]


## Cosmos QA

In [7]:
dataset = create_instruct_dataset(["cosmos_qa"])
example = dataset[0]
prediction = eval.generate([example["prompt"]])[0]
Markdown(template.format(prompt=example["prompt"], 
                         completion=example["completion"], 
                         prediction=prediction))

Map:   0%|          | 0/25262 [00:00<?, ? examples/s]

Filter:   0%|          | 0/25262 [00:00<?, ? examples/s]

Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

Generating response... : 100%|██████████| 1/1 [00:00<00:00,  1.51it/s]


## CoQA

In [8]:
dataset = create_instruct_dataset(["coqa"])
example = dataset[0]
prediction = eval.generate([example["prompt"]], max_tokens=150)[0]
Markdown(template.format(prompt=example["prompt"], 
                         completion=example["completion"], 
                         prediction=prediction))

Map:   0%|          | 0/7199 [00:00<?, ? examples/s]

Filter:   0%|          | 0/7199 [00:00<?, ? examples/s]

Map:   0%|          | 0/7199 [00:00<?, ? examples/s]

Generating response... : 100%|██████████| 1/1 [00:05<00:00,  5.19s/it]


## Human Eval

In [14]:
dataset = create_instruct_dataset(["human_eval"], train=False)
example = dataset[0]
prediction = eval.generate([example["prompt"]])[0]
Markdown(template.format(prompt=example["prompt"], 
                         completion=example["completion"], 
                         prediction=prediction))

Map:   0%|          | 0/164 [00:00<?, ? examples/s]

Filter:   0%|          | 0/164 [00:00<?, ? examples/s]

Map:   0%|          | 0/164 [00:00<?, ? examples/s]

Generating response... :   0%|          | 0/1 [00:00<?, ?it/s]The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Generating response... : 100%|██████████| 1/1 [00:04<00:00,  4.07s/it]


## XSum

In [7]:
dataset = create_instruct_dataset(["xsum"])
example = dataset[0]
prediction = eval.generate([example["prompt"]])[0]
Markdown(template.format(prompt=example["prompt"], 
                         completion=example["completion"], 
                         prediction=prediction))

Filter:   0%|          | 0/204045 [00:00<?, ? examples/s]

Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

Generating response... :   0%|          | 0/1 [00:00<?, ?it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (735 > 512). Running this sequence through the model will result in indexing errors
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Generating response... : 100%|██████████| 1/1 [00:03<00:00,  3.34s/it]
