In [1]:
import os
import datasets
import torch
from datasets import load_dataset, load_metric
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
import pandas as pd

import transformers
from transformers import (
    AutoConfig,
    AutoTokenizer,
    default_data_collator,
    AutoModelForSeq2SeqLM
)
from promptsource.templates import DatasetTemplates

In [23]:
dataset_name = 'super_glue'
dataset_subset = 'rte'
prompt_name = 'must be true'
max_length = 1024
target_max_length = 256
per_device_batch_size = 8

checkpoint = 'bigscience/T0_3B'
output_dir = './evaluation_result'

In [3]:
os.makedirs(output_dir, exist_ok=True)

In [4]:
raw_dataset = load_dataset(dataset_name, dataset_subset, split='validation')

Found cached dataset super_glue (/workspaces/seed/cache/hf_dataset/super_glue/rte/1.0.3/bb9675f958ebfee0d5d6dc5476fafe38c79123727a7258d515c450873dbdbbed)


In [5]:
raw_dataset

Dataset({
    features: ['premise', 'hypothesis', 'idx', 'label'],
    num_rows: 277
})

In [8]:
tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side='left')

In [9]:
tokenizer.pad_token

'<pad>'

In [11]:
padding = 'max_length'  # I'll need this padding scheme for jax later
# padding = False

template = DatasetTemplates(f"{dataset_name}/{dataset_subset}")
prompt = template[prompt_name]

In [12]:
prompt.apply(raw_dataset[0])

['Given that Dana Reeve, the widow of the actor Christopher Reeve, has died of lung cancer at age 44, according to the Christopher Reeve Foundation. Therefore, it must be true that "Christopher Reeve had an accident."? Yes or no?',
 'No']

In [20]:
data = []
for i in raw_dataset:
    i, o = prompt.apply(i)
    data.append({'input_text': i, 'target_text': o})


In [22]:
data[0]

{'input_text': 'Given that Dana Reeve, the widow of the actor Christopher Reeve, has died of lung cancer at age 44, according to the Christopher Reeve Foundation. Therefore, it must be true that "Christopher Reeve had an accident."? Yes or no?',
 'target_text': 'No'}

In [24]:
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint, low_cpu_mem_usage=True)
model.parallelize()

tokenizer = AutoTokenizer.from_pretrained(checkpoint)

In [45]:
from typer import progressbar


prediction = []
target = []


for e in data:
    inputs = tokenizer.encode(e['input_text'], return_tensors='pt').to('cuda')
    output = model.generate(inputs, max_length=256)
    output = tokenizer.decode(output[0], skip_special_tokens=True)
    prediction.append(output)
    target.append(e['target_text'])

In [46]:
prediction[3]

'No'

In [47]:
target[3]

'No'

In [48]:
correct = 0
for i, o in zip(prediction, target):
    if i == o:
        correct += 1

correct

191

In [49]:
correct / len(prediction)

0.6895306859205776