In [22]:
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import GPT2Config, GPT2LMHeadModel, GPT2Tokenizer, AutoModelForSequenceClassification
from datasets import load_dataset
from tqdm import tqdm

In [23]:
# Hyperparams
batch_size=32
max_length=256+128

In [24]:
class CustomDataset(Dataset):
    def __init__(self, examples):
        self.examples=examples
        # self.tokenizer = tokenizer

    def __len__(self):
        return len(self.examples)

    def __getitem__(self, idx):
        return self.examples[idx]["prompt"]

In [25]:
# GPU Config
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
print(torch.cuda.get_device_name(device))

cuda:0
NVIDIA A100-SXM4-80GB


In [36]:
# Load HF PPO Model
model_id = "smadala2/gpt2_ppo"
model = GPT2LMHeadModel.from_pretrained(model_id)
config = GPT2Config.from_pretrained(model_id)
print(model.config._name_or_path)

smadala2/gpt2_ppo


In [27]:
# Load Reward Model
def freeze_model(model):
    for name, param in model.named_parameters():
        param.requires_grad = False
        
reward_model = AutoModelForSequenceClassification.from_pretrained(
                'Ray2333/gpt2-large-harmless-reward_model',
                num_labels=1).to(device)

freeze_model(reward_model)

In [28]:
# Load tokenizer
tokenizer_id = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(tokenizer_id, padding="max_length", direction="left", padding_side="left", max_length=256, length=256)

if tokenizer.pad_token is None:
    print("Setting pad token")
    tokenizer.pad_token = tokenizer.eos_token
    
if model.config.pad_token_id is None:
    print("Setting pad token id")
    model.config.pad_token_id = model.config.eos_token_id

print(len(tokenizer))
print(tokenizer.pad_token)
print(model.config.pad_token_id)

Setting pad token
Setting pad token id
50257
<|endoftext|>
50256


In [29]:
# Load test dataset
dataset_id = "Dahoas/full-hh-rlhf"
dataset = load_dataset(dataset_id)
test_dataset = CustomDataset(dataset['test'])
test_dataloader=DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

print(len(test_dataset))
print(test_dataset[0])
print(len(test_dataloader))

12451


Human: I've been seeing a lot of slugs outside recently, even crawling up trees. Should I do something about them, or just let them be?

Assistant:
390


In [31]:
# Perform evaluation on dataset w/ PPO GPT2
model.eval()
model.to("cuda")

rewards = []
test_losses = []

for i, batch in enumerate(test_dataloader):
    print(f"Batch: {i}")
    inputs_test = tokenizer(batch, return_tensors="pt", padding="max_length", truncation=True, max_length=256).to(device)
    
    outputs_test = model.generate(input_ids=inputs_test.input_ids, attention_mask=inputs_test.attention_mask, \
        max_length=max_length, num_return_sequences=1, return_dict_in_generate=True, \
            output_scores=True)
    
    logits_test = torch.stack(outputs_test.scores, dim=1).to(device)
    outputs_ids_test = logits_test.argmax(-1) 
    outputs_attn_mask = (outputs_ids_test != model.config.pad_token_id).float().to(device)

    concatenated_input = {'input_ids': torch.cat([inputs_test["input_ids"], outputs_ids_test], dim=1).to(device),
                          'attention_mask': torch.cat([inputs_test["attention_mask"], outputs_attn_mask], dim=1).to(device)
                            }
    
    reward = torch.mean(reward_model(input_ids=concatenated_input["input_ids"], attention_mask= concatenated_input["attention_mask"]).logits.cpu()).item() 
    
    # Collect rewards and losses 
    rewards = rewards + [reward]*batch_size
    test_losses.append(1-reward)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Batch: 0


AssertionError: Cannot handle batch sizes > 1 if no padding token is defined.

In [None]:
print(test_losses)