
**Best-of-n sampling as an alternative to RLHF**

This notebook compares reward-model scores of prompt based responses from 
1. a base model (`gpt2-imdb`)
2. `RLHF` tuned model based on this base-model 
3. the base-model again from which we sample n responses to each prompt, score them and take the best scored one AKA the `best-of-n sampled` model



Import dependencies


In [1]:
import torch
import pandas as pd
from transformers import pipeline, AutoTokenizer
from datasets import load_dataset
import os
from trl import AutoModelForCausalLMWithValueHead
from trl.core import LengthSampler

os.environ['CUDA_VISIBLE_DEVICES'] = '1'
device = 0 if torch.cuda.is_available() else "cpu"

  from .autonotebook import tqdm as notebook_tqdm


Various constants

In [2]:
root = '/mnt/nas1/models/'
ref_model_name = root + "lvwerra/gpt2-imdb"
model_name = root + "lvwerra/gpt2-imdb-pos-v2"
reward_model = root + "lvwerra/distilbert-imdb"

N_BEST_OF = 4

Models and  tokenizers 

In [3]:
model = AutoModelForCausalLMWithValueHead.from_pretrained(model_name)

ref_model = AutoModelForCausalLMWithValueHead.from_pretrained(ref_model_name)

reward_pipe = pipeline("sentiment-analysis", model=reward_model, device=device)

tokenizer = AutoTokenizer.from_pretrained(ref_model_name)

tokenizer.pad_token = tokenizer.eos_token

# cuda-ize models
model.cuda()
ref_model.cuda()

  return self.fget.__get__(instance, owner)()


AutoModelForCausalLMWithValueHead(
  (pretrained_model): GPT2LMHeadModel(
    (transformer): GPT2Model(
      (wte): Embedding(50257, 768)
      (wpe): Embedding(1024, 768)
      (drop): Dropout(p=0.1, inplace=False)
      (h): ModuleList(
        (0-11): 12 x GPT2Block(
          (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (attn): GPT2Attention(
            (c_attn): Conv1D()
            (c_proj): Conv1D()
            (attn_dropout): Dropout(p=0.1, inplace=False)
            (resid_dropout): Dropout(p=0.1, inplace=False)
          )
          (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (mlp): GPT2MLP(
            (c_fc): Conv1D()
            (c_proj): Conv1D()
            (act): NewGELUActivation()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    )
    (lm_head): Linear(in_features=768, out_features=50257, bias=False)
  )
  (

In [4]:
print(tokenizer.encode("Hello world!"))
print(tokenizer.decode([15496, 995, 0]))

[15496, 995, 0]
Hello world!


Dataset building

In [5]:
def build_dataset(tokenizer, dataset_name="imdb", input_min_text_length=2, input_max_text_length=8):
    # load imdb with datasets
    ds = load_dataset(dataset_name, split="train")
    ds = ds.rename_columns({"text": "review"})
    ds = ds.filter(lambda x: len(x["review"]) > 200, batched=False)

    input_size = LengthSampler(input_min_text_length, input_max_text_length)

    def tokenize(sample):
        sample["input_ids"] = tokenizer.encode(sample["review"])[: input_size()]
        sample["query"] = tokenizer.decode(sample["input_ids"])
        return sample

    ds = ds.map(tokenize, batched=False)
    ds.set_format(type="torch")
    return ds


dataset = build_dataset(tokenizer, dataset_name='/mnt/nas1/dong-qichang/corpus/general/imdb')

In [6]:
output_min_length = 4
output_max_length = 16
output_length_sampler = LengthSampler(output_min_length, output_max_length)

#### get a batch from the dataset
bs = 16
output_data = dict()
dataset.set_format("pandas")
df_batch = dataset[:].sample(bs)
output_data["query"] = df_batch["query"].tolist()
query_tensors = df_batch["input_ids"].tolist()

# :: [Resp]
response_tensors_ref, response_tensors = [], []
# :: [[Resp]]
response_tensors_best_of = []

In [7]:
query = torch.tensor(query_tensors[0])
print(query.shape)
queries = query.repeat((N_BEST_OF, 1))
print(queries.shape)

torch.Size([4])
torch.Size([4, 4])



Generation using various models

In [8]:
gen_kwargs = {"min_length": -1, "top_k": 0.0, "top_p": 1.0, "do_sample": True, "pad_token_id": tokenizer.eos_token_id}
sent_kwargs = {"top_k": None, "function_to_apply": "none", "batch_size": 16}

for i in range(bs):
    gen_len = output_length_sampler()

    query = torch.tensor(query_tensors[i])

    output = ref_model.generate(query.unsqueeze(dim=0).to(device), max_new_tokens=gen_len, **gen_kwargs).squeeze()
    response_tensors_ref.append(tokenizer.decode(output))

    output = model.generate(query.unsqueeze(dim=0).to(device), max_new_tokens=gen_len, **gen_kwargs).squeeze()
    response_tensors.append(tokenizer.decode(output))

    # generating copies of the same query for the Best-of-n sampling
    queries = query.repeat((N_BEST_OF, 1))
    output = ref_model.generate(queries.to(device), max_new_tokens=gen_len, **gen_kwargs).squeeze()
    response_tensors_best_of.append(tokenizer.batch_decode(output))

Scoring

In [9]:
outputs = reward_pipe(response_tensors_ref, **sent_kwargs)
print(outputs)

scores_ref = [output[0]["score"] for output in reward_pipe(response_tensors_ref, **sent_kwargs)]
scores = [output[0]["score"] for output in reward_pipe(response_tensors, **sent_kwargs)]
scores_best_of = []
for i, response in enumerate(response_tensors_best_of):
    # base_score = scores_ref[i]
    scores_best_of.append(torch.tensor([output[0]["score"] for output in reward_pipe(response, **sent_kwargs)]))

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


[[{'label': 'POSITIVE', 'score': 0.7179132103919983}, {'label': 'NEGATIVE', 'score': -0.6458280682563782}], [{'label': 'NEGATIVE', 'score': 2.1390843391418457}, {'label': 'POSITIVE', 'score': -2.585556983947754}], [{'label': 'NEGATIVE', 'score': 2.3338136672973633}, {'label': 'POSITIVE', 'score': -2.741856813430786}], [{'label': 'NEGATIVE', 'score': 1.3297553062438965}, {'label': 'POSITIVE', 'score': -1.7232002019882202}], [{'label': 'POSITIVE', 'score': 1.396438479423523}, {'label': 'NEGATIVE', 'score': -1.3023478984832764}], [{'label': 'NEGATIVE', 'score': 2.2352981567382812}, {'label': 'POSITIVE', 'score': -2.5556936264038086}], [{'label': 'POSITIVE', 'score': 2.4053659439086914}, {'label': 'NEGATIVE', 'score': -2.140338182449341}], [{'label': 'POSITIVE', 'score': 1.2411270141601562}, {'label': 'NEGATIVE', 'score': -1.061644196510315}], [{'label': 'NEGATIVE', 'score': 0.6906370520591736}, {'label': 'POSITIVE', 'score': -1.0696840286254883}], [{'label': 'NEGATIVE', 'score': 1.5734862

In [16]:
print(response_tensors_ref)
print(response_tensors)
print(scores_ref)
print(scores)
print(sum(scores_ref), sum(scores))
print(len(scores_best_of), scores_best_of[0].shape)
print(sum(scores_best_of))
t = torch.concat(scores_best_of, dim=0)
print(t.shape, sum(t))

['Fidois a resemblance to Marx, and the very next day supporting it. The plot', 'A mercilessly corny and very good,', 'Really, really bad. The computer MLady is a very great director. She was also so amazing', 'Using tons of stock footage, which is a great film, but', 'Trash/bad humour/bad humour/bad humour/', 'What a disappointment!<br /><br />I loved it. It was the best of the tenth', "The acting, other reviews notwithstanding, the BBC's confirmation that Indonesia has a very good shot of the world.", 'Tobe Hooper has made a great movie, and with the colorful', "This isn't telegenic, but it", 'This is not a good creepy story. It is a great', "There's nothing else to say. It's a very funny movie.", 'I have previously seen ZuJux at first, and', "...but I regret that mistake. It's a wonderful", 'This movie is a disgrace to the morality of the so-called movie. It is a great', "John Leguizemo has a lot of personality and wonderful camera. It's his", 'In an attempt to cash in on the market

In [14]:
output_data["response (ref)"] = response_tensors_ref
output_data["scores (ref)"] = scores_ref
output_data["response (RLHF)"] = response_tensors
output_data["scores (RLHF)"] = scores
print(len(scores_best_of), len(scores_best_of[0]))
output_data["response (best_of)"] = [
    response_tensors_best_of[i][a.argmax().item()] for i, a in enumerate(scores_best_of)
]
output_data["scores (best_of)"] = [a.max().item() for a in scores_best_of]


# store results in a dataframe
df_results = pd.DataFrame(output_data)
print(df_results['scores (ref)'].sum(), df_results['scores (RLHF)'].sum(), df_results['scores (best_of)'].sum())
df_results

16 4
22.141191571950912 26.901581888087094 33.59040355682373


Unnamed: 0,query,response (ref),scores (ref),response (RLHF),scores (RLHF),response (best_of),scores (best_of)
0,Fidois a,"Fidois a mofie and Freda, Major Hans Godel, Norma",0.717913,"Fidois a resemblance to Marx, and the very nex...",1.631843,"Fidois a nice role, her role particularly cast...",2.32362
1,A mercilessly corny,A mercilessly corny and vicious homosexual movie,2.139084,"A mercilessly corny and very good,",1.899196,A mercilessly corny insult to all the,2.318306
2,"Really, really bad.","Really, really bad. I cannot rate this movie. ...",2.333814,"Really, really bad. The computer MLady is a ve...",1.724828,"Really, really bad. The movie was only good fo...",2.517596
3,Using tons of stock footage,"Using tons of stock footage, it seemed VW's ed...",1.329755,"Using tons of stock footage, which is a great ...",0.005345,Using tons of stock footage of actual episodes...,2.060562
4,Trash/bad,Trash/bad) but was pretty good. Good scen,1.396438,Trash/bad humour/bad humour/bad humour/,2.18554,Trash/bad timing/actors/hand-me,2.425057
5,What a disappointment!<br /,What a disappointment!<br /><br />Muse narrati...,2.235298,What a disappointment!<br /><br />I loved it. ...,1.688368,What a disappointment!<br /><br />Empathy dial...,2.629115
6,"The acting, other reviews notwithstanding,","The acting, other reviews notwithstanding, is ...",2.405366,"The acting, other reviews notwithstanding, the...",1.506757,"The acting, other reviews notwithstanding, pic...",2.447833
7,Tobe Hooper has made,Tobe Hooper has made his interest in the bulle...,1.241127,"Tobe Hooper has made a great movie, and with t...",2.739002,Tobe Hooper has made a magnificent portrait of...,2.764252
8,This isn't,This isn't a big deal considering the tour,0.690637,"This isn't telegenic, but it",0.129298,This isn't how they get their points across,1.088183
9,This is not a good,This is not a good chance for those who preclu...,1.573486,This is not a good creepy story. It is a great,2.135549,This is not a good adaptation of James De Havi...,1.864987
