In [1]:
from transformers import AutoTokenizer
import random
import torch

device = 'cuda' if torch.cuda.is_available() else 'cpu'

tokenizer = AutoTokenizer.from_pretrained('tokenizer/lvwerra/gpt2-imdb')
tokenizer.pad_token_id = 0

tokenizer

  from .autonotebook import tqdm as notebook_tqdm
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Using sep_token, but it is not set yet.
Using cls_token, but it is not set yet.
Using mask_token, but it is not set yet.


GPT2TokenizerFast(name_or_path='tokenizer/lvwerra/gpt2-imdb', vocab_size=50257, model_max_length=1024, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<|endoftext|>', 'eos_token': '<|endoftext|>', 'unk_token': '<|endoftext|>', 'pad_token': '!', 'additional_special_tokens': ['<|endoftext|>']}, clean_up_tokenization_spaces=True),  added_tokens_decoder={
	50256: AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}

In [2]:
from datasets import load_from_disk, concatenate_datasets

dataset = load_from_disk('dataset/imdb')
dataset = concatenate_datasets([dataset[i] for i in ['train', 'test']])


def f(data):
    question = tokenizer.encode(data['text'], add_special_tokens=False)[:5]
    return {'question': question}


dataset = dataset.map(f, remove_columns=['label', 'text'])


def f(data):
    return len(data['question']) == 5


dataset = dataset.filter(f)

dataset, dataset[0]

(Dataset({
     features: ['question'],
     num_rows: 50000
 }),
 {'question': [40, 26399, 314, 3001, 327]})

In [3]:
def get_batch_data():
    label = random.choices(range(2), k=128)
    question = random.choices(dataset, k=128)
    question = [i['question'] for i in question]

    question = [[tokenizer.convert_tokens_to_ids(str(l))] + q
                for l, q in zip(label, question)]

    return label, question


get_batch_data()

([1,
  1,
  1,
  1,
  0,
  1,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  1,
  0,
  0,
  1,
  1,
  1,
  0,
  0,
  1,
  0,
  0,
  1,
  0,
  0,
  0,
  1,
  1,
  0,
  0,
  0,
  1,
  1,
  0,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  0,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  0,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  0,
  0,
  1,
  0,
  1,
  0,
  1,
  1,
  0,
  0,
  1,
  0,
  0,
  0,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  0,
  1,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  1,
  1,
  1,
  0,
  0,
  1,
  0,
  0,
  1,
  0,
  0,
  1,
  0,
  1,
  1,
  0,
  0,
  1,
  0,
  1,
  1,
  1,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  1,
  0,
  1,
  0,
  1,
  1,
  0],
 [[16, 36623, 290, 15579, 1111, 2495],
  [16, 39, 2743, 16543, 318, 546],
  [16, 5962, 612, 373, 23459, 72],
  [16, 7149, 2618, 508, 2925, 284],
  [15, 40, 3505, 4964, 428, 319],
  [16, 3152, 262, 7396, 11533, 286],
  [15, 43920, 32520, 26494, 15992, 11],
  [15, 27991, 1472, 318, 3737, 530],
  [16, 4598, 407, 307, 6522, 28970],
  [15, 464, 29274, 45270, 11,

In [4]:
from trl import AutoModelForCausalLMWithValueHead

model_ppo = AutoModelForCausalLMWithValueHead.from_pretrained(
    'model/lvwerra/gpt2-imdb').to(device)
model_ppo_ref = AutoModelForCausalLMWithValueHead.from_pretrained(
    'model/lvwerra/gpt2-imdb').to(device)

for i in model_ppo_ref.parameters():
    i.requires_grad_(False)

Some weights of the model checkpoint at model/lvwerra/gpt2-imdb were not used when initializing GPT2LMHeadModel: ['v_head.summary.bias', 'v_head.summary.weight']
- This IS expected if you are initializing GPT2LMHeadModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing GPT2LMHeadModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at model/lvwerra/gpt2-imdb were not used when initializing GPT2LMHeadModel: ['v_head.summary.bias', 'v_head.summary.weight']
- This IS expected if you are initializing GPT2LMHeadModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a Bert

In [5]:
from transformers import AutoModelForSequenceClassification

tokenizer_cls = AutoTokenizer.from_pretrained(
    'tokenizer/lvwerra/distilbert-imdb')
model_cls = AutoModelForSequenceClassification.from_pretrained(
    'model/lvwerra/distilbert-imdb').to(device)

for i in model_cls.parameters():
    i.requires_grad_(False)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [6]:
def get_question():
    label, question = get_batch_data()
    label = torch.LongTensor(label).to(device)

    question = [torch.LongTensor(i).to(device) for i in question]

    return label, question


label, question = get_question()

label, question[:10]

(tensor([1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1,
         0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0,
         1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0,
         1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1,
         1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1,
         0, 1, 1, 0, 0, 1, 0, 0], device='cuda:0'),
 [tensor([  16,   40, 1053, 6810,  625,  262], device='cuda:0'),
  tensor([  16, 1212, 3807,  373,   11,  286], device='cuda:0'),
  tensor([  15, 3673,  881,  284,  910,  319], device='cuda:0'),
  tensor([   15, 20459,  8066,   307,  1327,   284], device='cuda:0'),
  tensor([   15, 38743, 31140,  2492,   470,   262], device='cuda:0'),
  tensor([  15, 2061,  257, 2085,  286,  257], device='cuda:0'),
  tensor([   16,    40,   460,   766,  1521, 43442], device='cuda:0'),
  tensor([  16,   40, 3521,  470, 4043,  284], device='cuda:0'),
  ten

In [7]:
#包装类,用于生成
def generate(input_ids):
    return model_ppo.generate(input_ids=input_ids,
                              min_length=-1,
                              top_k=0.0,
                              top_p=1.0,
                              do_sample=True,
                              pad_token_id=tokenizer.pad_token_id,
                              max_new_tokens=32,
                              eos_token_id=tokenizer.eos_token_id)


def get_answer(question):
    #如果question的长度确定,这里可以转换成批运算
    if True:
        answer = generate(torch.stack(question))

        answer_new = []
        for i in answer:
            if tokenizer.eos_token_id not in i:
                answer_new.append(i.unsqueeze(0))
                continue
            split = i.tolist().index(tokenizer.eos_token_id) + 1
            answer_new.append(i[:split].unsqueeze(0))
        answer = answer_new
    else:
        answer = [generate(i.unsqueeze(0)) for i in question]

    #裁剪,只要生成的部分
    answer = [a[0, len(q):] for q, a in zip(question, answer)]

    return answer


answer = get_answer(question)

answer[:10]

[tensor([  812,   326,  3427, 14479,  3371,  6918,    11,  2592,   739,   262,
         34731,   286,   366,  7353,    12, 23922,  1042,     1,   389,   783,
          5371,   286, 11560,   257, 43122,   290,   326, 22041,  2331,  1234,
           379,  9033], device='cuda:0'),
 tensor([ 1781,    11,  2208, 45002,    13,   314,  1392,   604,    14,   940,
           737, 33443, 10544,    13,   383,  7205,   373,   407,  2089,    11,
          4632,   616,  6000, 18641,   286,   262,  3807,  2277,   616,  1767,
            13,   406], device='cuda:0'),
 tensor([  287,   597,   584,  2912,   780,  1111,   423,   617,   922,  2173,
           329,   514,    13,  1119,  1111,  4893,   845,   880,   703,  6283,
           262,  1621,   318,   290,   635, 14846, 14397,   319,   257,  1241,
           326,   338], device='cuda:0'),
 tensor([  804,   379,   606,   290,   852, 15049,   644,   257,  2823,   428,
           318,    13,   314,   460,   470,   787,   503,   257,  2060,  1517,
     

In [8]:
def get_reward(question, answer, label):
    token = [q.tolist()[1:] + a.tolist() for q, a in zip(question, answer)]
    token = [tokenizer.decode(i) for i in token]

    token = tokenizer_cls(token,
                          padding=True,
                          truncation=True,
                          max_length=512,
                          return_tensors='pt').to(device)

    with torch.no_grad():
        logits = model_cls(**token).logits

    return logits.gather(1, label.reshape(-1, 1)).squeeze(1)


reward = get_reward(question, answer, label)

reward

tensor([-0.0790, -2.7419,  0.0679, -1.6332,  0.6677,  2.6076,  0.1573, -1.1461,
         0.9582, -2.5122,  2.7525, -2.3757,  1.0361,  2.5690,  1.9334, -2.1080,
        -1.7346,  0.6678, -1.2840, -0.0737,  1.8030, -1.5229, -1.9619, -1.7785,
        -1.3398,  2.2020,  0.3973, -1.9178, -1.7657, -0.1403, -1.7730,  2.3305,
         1.4456, -1.9255, -2.2839,  1.7811, -2.6034,  1.0441, -0.5461,  1.6941,
         2.0946, -1.7392,  0.8828, -0.9679,  0.2184, -0.4701, -2.4185, -0.0232,
        -0.0851,  2.2899, -2.2363, -2.2239, -1.2585,  1.6426, -0.6541,  0.9921,
         0.2738,  0.2257,  1.1809, -1.2928,  2.2934,  2.8806,  1.1941, -1.3584,
        -0.4178,  2.0240,  2.0432,  0.2561, -1.0444, -0.7836, -1.6604,  0.9020,
        -2.4616,  0.5171,  0.3906,  1.7344,  1.1841, -1.4855,  2.4969, -1.4334,
        -1.9962,  0.0924, -1.0023,  2.7003, -0.4900, -1.1669, -1.4747, -1.2043,
         0.4568,  2.5980,  1.8720, -1.3922,  2.7693, -2.4912, -1.5629, -1.2905,
         2.4059,  1.9968, -0.8069, -1.09

In [9]:
from trl import PPOConfig, PPOTrainer

config = PPOConfig(learning_rate=1e-5, batch_size=128)

trainer = PPOTrainer(config,
                     model_ppo,
                     model_ppo_ref,
                     tokenizer,
                     dataset=dataset)

trainer

<trl.trainer.ppo_trainer.PPOTrainer at 0x7f1e198a7b50>

In [10]:
import warnings

warnings.filterwarnings('ignore')

for epoch in range(200):
    label, question = get_question()
    answer = get_answer(question)
    reward = get_reward(question, answer, label)

    trainer.step(question, answer, [i for i in reward])

    if epoch % 10 == 0:
        print(epoch, reward.mean().item())

        question = tokenizer.decode(question[0].tolist())
        answer = tokenizer.decode(answer[0].tolist())
        reward = reward[0].item()

        #0差评,1好评
        print(question, '->', answer, reward)

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


0 0.09299226105213165
1First of all, this ->  movie is hardly stupid. However, it is one of the worst. It has two very real scenarios.Along with the efforts of a young couple of lost hero -2.487795829772949


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


10 -0.023181095719337463
0In my book "Basic ->  Instinct" released 17 years ago I read the book and think that such kind of things are the way the world is now. And quotes like "All that -1.2708756923675537


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


20 0.31382036209106445
0Considering it's basically low ->  budget and a lot of black nudity and an odd response from the audience, the film is a pretty per ml resolution to an issue worthy of not being all that 1.1325664520263672


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='le

30 0.309106707572937
0True stories make the best ->  work of good actors none that are born into their mid-seventies.<|endoftext|> -2.210801601409912


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


40 1.0727498531341553
1Cheesy 80's horror -> ...... composer: so beautifully chosen, I can say, has a on-screen performance even better.<|endoftext|> 2.0839757919311523


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


50 1.4121887683868408
0GBS wrote his own ->  script. One name near offense but atmospheric exposition, oddly still bland. Wipeout, rubbish. Violence, i slows.<|endoftext|> 2.3595705032348633


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


60 1.7078416347503662
0I was never in the ->  film.<|endoftext|> 0.9322419166564941


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


70 2.1755259037017822
1I must say, when ->  watching this movie, it is amazing.<|endoftext|> 2.4521334171295166


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='le

80 2.1622257232666016
1This is one of the ->  best films of all time.<|endoftext|> 2.7308685779571533


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


90 2.1979665756225586
1I resisted watching 15 Park ->  but so loved seeing it!<|endoftext|> 2.1706929206848145


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


100 2.2108092308044434
0This is a Laurel & ->  Hardy terrible movie.<|endoftext|> 1.9358655214309692


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


110 2.288398504257202
1This was the first Mickey ->  Renn masteringcom to medieval master and movie makers, and truly supported their development toward English film. highly recommended.<|endoftext|> 2.7567362785339355


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


120 2.1484575271606445
1There have been several films ->  and tv. This is great film for the ladies.<|endoftext|> 2.491626739501953


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


130 2.2387020587921143
1K Murli Mohan ->  in happiness that was tremendously 1934, and terrific movie.<|endoftext|> 2.5319623947143555


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


140 2.111945629119873
0I don't really know ->  how one will figure out how he's going to play the story, but this is a bad movie!<|endoftext|> 2.4522318840026855


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


150 2.228043556213379
1I will never get back ->  this last glorious film.<|endoftext|> 2.100973606109619


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


160 2.0983808040618896
1Night of the Twisters -> . Truly one of the best I've seen.<|endoftext|> 2.794154644012451


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


170 2.1417503356933594
1This film was a new ->  masterpiece.<|endoftext|> 2.473341464996338


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


180 2.2145256996154785
1William Faulkner was ->  just perfect! An excellent movie.<|endoftext|> 2.788565158843994


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='le

190 2.0492758750915527
0Mighty Morphin Power ->  is an embarrassment.<|endoftext|> 2.2804903984069824


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
