In [1]:

## Python >= 3.8  


In [2]:

## !pip install transformers
## !pip install wandb
## !pip install trl
## !pip install pandas
## !pip install datasets
## !pip install accelerate
## !pip install tyro
## !pip install nltk -U


In [3]:

import torch
from tqdm import tqdm
import pandas as pd
import wandb
import os

tqdm.pandas()

from transformers import pipeline, AutoTokenizer
from datasets import load_dataset

from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead
from trl.core import LengthSampler


In [4]:

config = PPOConfig(
    model_name    = "lvwerra/gpt2-imdb",
    learning_rate = 1.41e-5,
    ## log_with      = "wandb",
)

sent_kwargs = {
         "return_all_scores": True, 
         "function_to_apply": "none", 
         "batch_size": 16
}


In [5]:

## wandb.init()

wandb.init(mode="disabled") 
os.environ['WANDB_DISABLED'] = 'true'


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.



## Load IMDB dataset

The IMDB dataset contains 50k movie review annotated with "positive"/"negative" feedback indicating the sentiment. We load the IMDB dataset into a DataFrame and filter for comments that are at least 200 characters. Then we tokenize each text and cut it to random size with the LengthSampler.



## Visualize details of dataset


In [6]:

dataset_name="imdb"


In [7]:

ds = load_dataset(dataset_name, split="train")


In [8]:

ds


Dataset({
    features: ['text', 'label'],
    num_rows: 25000
})

In [9]:

ds[15:18]


{'text': ["This film is just plain horrible. John Ritter doing pratt falls, 75% of the actors delivering their lines as if they were reading them from cue cards, poor editing, horrible sound mixing (dialogue is tough to pick up in places over the background noise), and a plot that really goes nowhere. I didn't think I'd ever say this, but Dorothy Stratten is not the worst actress in this film. There are at least 3 others that suck more. Patti Hansen delivers her lines with the passion of Ben Stein. I started to wonder if she wasn't dead inside. Even Bogdanovich's kids are awful (the oldest one is definitely reading her lines from a cue card). This movie is seriously horrible. There's a reason Bogdanovich couldn't get another project until 4 years later. Please don't watch it. If you see it in your television listings, cancel your cable. If a friend suggests it to you, reconsider your friendship. If your spouse wants to watch it, you're better off finding another soulmate. I'd rather go

In [10]:

from datasets import ClassLabel
import random
import pandas as pd
from IPython.display import display, HTML


In [11]:

def show_random_elements(dataset, num_examples=20):
    assert num_examples <= len(dataset), "Can't pick more elements than there are in the dataset."
    
    picks = []
    
    for _ in range( num_examples ):
        
        pick = random.randint(0, len(dataset)-1)
        while pick in picks:
            pick = random.randint(0, len(dataset)-1)
        picks.append(pick)
    
    df = pd.DataFrame( dataset[picks] )        ## indexing 10 picks 
    
    print(df)
    print(dataset.features.items())
    
    for column, typ in dataset.features.items():
        print(column)
        print(typ)
        print(ClassLabel)
        ## The isinstance() function returns True if the specified object 
        ## is of the specified type, otherwise False
        if isinstance(typ, ClassLabel):
            print("Hello")
            df[column] = df[column].transform(lambda i: typ.names[i])
            ## print(typ.names[i])
            
    display(HTML(df.to_html()))


In [12]:

show_random_elements(ds)


                                                 text  label
0   Superhero movies pretty much always suck, and ...      0
1   Please! Do not waste any money on this movie. ...      0
2   I gave it a 10, since everyone else seemed to ...      1
3   I found this to be a surprisingly light-handed...      1
4   Fot the most part, this movie feels like a "ma...      0
5   The film picks up after last years remake with...      0
6   Back to the roots with "like it is in heaven" ...      1
7   A confused mess from start to finish. Like the...      0
8   This movie has an all star cast, John Candy, R...      1
9   in one of Neil Simon's best plays. Creaky, cra...      1
10  There have been some funny movies about spirit...      0
11  Before watching this film I had very low expec...      0
12  After seeing the trailer for Evening, you will...      1
13  Believe it or not, "The Woodchipper Massacre" ...      0
14  What an insult to Olivia D'Abo who plays the f...      0
15  Every child experien

Unnamed: 0,text,label
0,"Superhero movies pretty much always suck, and this is no exception. Its only redeeming quality is the fact the movie COULD have been even worse. I would put 'Batman & Robin' and 'Steel' above this movie, so yes it is that bad...<br /><br />If your looking for a black superhero, check out 'Blankman' its not a ""serious"" superhero movie but at least its entertaining.",neg
1,"Please! Do not waste any money on this movie. It really is nothing more than a boring German Blair Witch ripoff made by some high school kids. I couldn't finish watching it, and usually I like watching all kinds of B-movies. How on earth could they find a distributor for it?!!! Funny however: Check out Wikipedia for ""dark area"". The guy who wrote the entry must be completely out of his mind. Maybe he got loads of money from the producers. Money that should have been spend on actors, camera and editing. Even that wouldn't have helped, since there is absolutely no interesting idea behind this film. Unfortunately ""dark area"" has already gotten too much attention. Please, director, producer and author of this movie, STOP making movies like that...you are not doing yourself a favor. The world would be a better place without this film.",neg
2,"I gave it a 10, since everyone else seemed to like it and it would have been churlish not to. The reason I'm troubling you is to add a personal observation on Castle's work.<br /><br />I've seen ""Homicidal"" and ""The Tingler"" (the version with the clever colour sequence where everything except the blood is in black and white) a few times and ""The House On Haunted Hill"" many times.<br /><br />Even I am not old enough to have seen them when Castle was up to his showman tricks, thus I can appreciate them for their own merit. And while most pass him off as second-rate, schlocky, hammy, etc., I believe they do him a disservice.<br /><br />The end sequence of ""Homicidal"" is GENUINELY shocking and works today - and the premise of ""The Tingler"" while silly, was highly original.<br /><br />But ""The House On Haunted Hill"" was a TRIUMPH. Having used that Frank Lloyd Wright house as its exterior, the great Vincent Price and a solid cast, plus a good score and production values - when I first saw it at a packed late-night showing in the late Sixties, it produced an audience reaction I'd not seen before and have not seen since.<br /><br />It was the bit where the heroine is alone in the basement (if you've not seen the film, stop reading NOW) and we are waiting to hear the hero on the other side of the wall.<br /><br />With NO telegraphing of what is coming, the camera slowly pulls back, forcing the AUDIENCE to switch their gaze to... I'm saying no more (my ""spoiler"" declaration above only covers THIS movie).<br /><br />The point is, I believe this ploy was DELIBERATE - not accidental - and when it happened, the WHOLE AUDIENCE SCREAMED (including most of the men!) It took the audience about TEN MINUTES to calm down.<br /><br />Now THAT is superior film-making. A flamboyant showman he might have been, but ""House"" and the other two films I've mentioned were GOOD MOVIES. Castle may not have been a Hitchcock, but he was no Ed Wood, either.<br /><br />It's easy to concentrate on someone's quirks and forget to examine their TALENT. So I hope this documentary acknowledged that. I look forward to seeing it.",pos
3,"I found this to be a surprisingly light-handed touch at a 1950's culture-clash movie. John Wayne would hardly be one's first choice as a cultural attache, being about as diplomatic with his good intentions as a bull-run in Harrods. But this time he was left to play a part that was far more passive than his usual bluff persona, and he accomplished his task with style. The Duke was a guy who really could act well. His facial expressions and body language could be extremely subtle.<br /><br />Despite his considerable presence both as an actor and in terms of screen time, he failed to dominate this movie. Many of his good intentions came a cropper. He had authority over nobody, and the intermittent narrative was provided by the titular geisha to whom he was the barbarian.<br /><br />The story of American attempts to curry favour with an isolationist Japan was one of political intrigue rather than swashbuckling or hell-for-leather battles. I cannot comment on the accuracy of its research but the strangeness of the Oriental culture to western sensibilities was demonstrated well. There was a great deal of minutely-choreographed ceremony entailing what looked to this observer like authentic costume and props. The set pieces were complex and detailed. A lot of money and thought had been applied to it.<br /><br />The fractured romance between Wayne and his geisha added a little extra element, and stopped the movie becoming just a political or flag-waving effort. Script was good without being too wordy. There was a great deal of Japanese dialogue, but the lengthy periods of translation didn't interfere with the narrative. It was nice to see plenty of genuine orientals on the set. Whether or not they were Japanese, I couldn't say. But anyway they looked the part. At least the leads were not played by cross-dressing Caucasians, unlike other efforts such as 'Blood Alley' (yes, I know they were Chinese) 'The Inn Of The Sixth Happiness' or even 'The King And I'.<br /><br />Frankly, I enjoyed this more than any of those other movies. The script was better for a start. I never liked the songs in 'The King And I', and wasn't impressed by the heavy-laden anti-communist subtext of 'Blood Alley'. I confess to never having seen this work before and found it compared very favourably to many of The Duke's more popular outings.<br /><br />Recommended.",pos
4,"Fot the most part, this movie feels like a ""made-for-TV"" effort. The direction is ham-fisted, the acting (with the exception of Fred Gwynne) is overwrought and soapy. Denise Crosby, particularly, delivers her lines like she's cold reading them off a cue card. Only one thing makes this film worth watching, and that is once Gage comes back from the ""Semetary."" There is something disturbing about watching a small child murder someone, and this movie might be more than some can handle just for that reason. It is absolutely bone-chilling. This film only does one thing right, but it knocks that one thing right out of the park. Worth seeing just for the last 10 minutes or so.",neg
5,"The film picks up after last years remake with the military setting up electronic surveillance equipment in the desert where the attacks in the first film happened. The crew is killed not long before a group of soldiers on a training exercise show up to find no one around. You can fill in the rest.<br /><br />This is a paycheck picture all around. There seems to be no passion in anyone's performances, nor in anyone behind the camera. This is a movie that was made for the money and nothing else. On some level this should have worked, it could have been a more horrific Southern Comfort (where National Guardsmen run afoul of some people in the swamps), but instead its not much of anything. In large part you can blame the script, written unbelievably in part by Wes Craven, which hits the same old targets again and again. Add to the mess the fact that the direction is dull and the set up of sequences is so lack luster as to remove any inherent tension in any scene.<br /><br />Its not bad as such, but badly made and dull. Let me put my feelings into context: The reason I saw this film was because a local multiplex screwed up and ran this film instead of the kids flick the Last Mimzy and I wanted to see what caused the out cry(I mean the films have so much in common-he says sarcastically). I'm convinced that this film will only scare those who like the last Mimzy.",neg
6,"Back to the roots with ""like it is in heaven"" - what are the real values of life? These Swedes carve out a message that appeals to every heart. We've seen it twice now in a cinema packed to the last seat: love pure and joy within the music of a choir that's simple, yet full of power once everyone finds his or her inner tone. <br /><br />From the glitter of fame to the school of of his youth, now empty and ready to be adapted as his new home after collapsing on stage, Daniel wants to start listening and is drawn into the lives of the simple, warm and rough people of the North.<br /><br />He wins the hearts with music and gains the capacity to love and be loved unconditionally.<br /><br />Don't go see it if you've been normed to Hollywood. This stuff contains no extras, just your laughter, your compassion, your tears!",pos
7,"A confused mess from start to finish. Like they used to say about the Beatles'songs, there was a secret message if you played the LP backward. If one had the patience to watch this films scenes from finish to start, you'd come away with the same degree of disappointment.<br /><br />Apart from all of this psychedelic hodge podge of flashbacks and false starts, the clearest characters were the movie backers, out for revenge if the movie didn't get sorted. There was nothing to like about these two either. Overacting, shouting and threats were delivered in comic book fashion. I think one dimensional was an overstatement.<br /><br />Okay, so maybe the artsy types are rolling their eyes reveling in the fact that unlike them, we plebeians just didn't get it. Well I'm afraid there was nothing to get. And the two cardinal sins of any bad movie carried from start to finish. A non-existent and pathetic story line if you want to call it that, and by far the worst, not a single character you cared about in the least.",neg
8,"This movie has an all star cast, John Candy, Richard Lewis, Ornella Mutti, Cybill Shepard, and Jim Belushi to name a few, run amuck in Monte Carlo, as well as some other beautiful European locations, and is very funny. The trouble that everyone gets in when they lie to protect themselves is great, and I highly recommend that you see this movie, it is well worth it! John Candy is in top form in Once Upon A Crime, as is everyone else! If you and your family are looking for a great family film, this is your ticket. Everyone gives stellar performances, great acting, great comedy, and great timing, which is rare in movies these days. Great plot, great mystery, (which I love anyways) and overall, well worth the money you spend on it. So get the kids, grab some popcorn, juice, or tea, or sodas, and enjoy the show!!!!",pos
9,"in one of Neil Simon's best plays. Creaky, cranky ex-Vaudeville stars played by Walter Matthau and George Burns are teaming up for a TV comedy special. The problem is they haven't even SEEN each other in over a decade. Full of zippy one liners and inside showbiz jokes, this story flies along with a steady stream of humor. Good work also by Richard Benjamin as the harried nephew, Rosetta LeNoire as the nurse, and Howard Hesseman as the TV commercial director. Steve Allen and Phyllis Diller appear as themselves. Trivia note: The opening montage contains footage from Hollywood Revue of 1929 and shows Marie Dressler, Bessie Love, Polly Moran, Cliff Edwards, Charles King, Gus Edwards, and the singing Brox Sisters.",pos


In [13]:

ds = ds.rename_columns({"text": "review"})
ds = ds.filter(lambda x: len(x["review"]) > 200, batched=False)


In [14]:

ds


Dataset({
    features: ['review', 'label'],
    num_rows: 24895
})

In [15]:

tokenizer           = AutoTokenizer.from_pretrained(config.model_name)
tokenizer.pad_token = tokenizer.eos_token


In [16]:

def tokenize( sample ):
    sample["input_ids"] = tokenizer.encode( sample["review"]    )[: 20]
    sample["query"]     = tokenizer.decode( sample["input_ids"] )
    return sample


ds = ds.map(tokenize, batched=False)
ds


Dataset({
    features: ['review', 'label', 'input_ids', 'query'],
    num_rows: 24895
})

In [17]:

ds[15:18]


{'review': ["This film is just plain horrible. John Ritter doing pratt falls, 75% of the actors delivering their lines as if they were reading them from cue cards, poor editing, horrible sound mixing (dialogue is tough to pick up in places over the background noise), and a plot that really goes nowhere. I didn't think I'd ever say this, but Dorothy Stratten is not the worst actress in this film. There are at least 3 others that suck more. Patti Hansen delivers her lines with the passion of Ben Stein. I started to wonder if she wasn't dead inside. Even Bogdanovich's kids are awful (the oldest one is definitely reading her lines from a cue card). This movie is seriously horrible. There's a reason Bogdanovich couldn't get another project until 4 years later. Please don't watch it. If you see it in your television listings, cancel your cable. If a friend suggests it to you, reconsider your friendship. If your spouse wants to watch it, you're better off finding another soulmate. I'd rather 


## My own data


In [18]:

my_own_datasets = load_dataset("text", data_files={ "train": "/home/rcalix/Desktop/rc_train.txt", "validation": "/home/rcalix/Desktop/rc_validation.txt"} )


In [19]:

my_own_datasets


DatasetDict({
    train: Dataset({
        features: ['text'],
        num_rows: 4
    })
    validation: Dataset({
        features: ['text'],
        num_rows: 4
    })
})



    
## Now this for actual RLHF  



In [20]:

def build_dataset(
         config, 
         dataset_name="imdb", 
         input_min_text_length=2, 
         input_max_text_length=8
):
    """
    Build dataset for training. This builds the dataset from `load_dataset`, one should
    customize this function to train the model on its own dataset.

    Args:
        dataset_name (`str`):
            The name of the dataset to be loaded.

    Returns:
        dataloader (`torch.utils.data.DataLoader`):
            The dataloader for the dataset.
    """
    tokenizer           = AutoTokenizer.from_pretrained(config.model_name)
    tokenizer.pad_token = tokenizer.eos_token
    
    # load imdb with datasets
    
    ds = load_dataset(dataset_name, split="train")
    
    ds = ds.rename_columns({"text": "review"})
    ds = ds.filter(lambda x: len(x["review"]) > 200, batched=False)

    input_size = LengthSampler(input_min_text_length, input_max_text_length)

    def tokenize(sample):
        sample["input_ids"] = tokenizer.encode( sample["review"]    )[: input_size()]
        sample["query"]     = tokenizer.decode( sample["input_ids"] )
        return sample

    ds = ds.map(tokenize, batched=False)
    ds.set_format(type="torch")
    return ds


In [21]:

dataset = build_dataset(config)


In [22]:

def collator(data):
    return dict((key, [d[key] for d in data]) for key in data[0])
    


## Load pre-trained GPT2 language models
We load the GPT2 model with a value head and the tokenizer. We load the model twice; the first model is optimized while the second model serves as a reference to calculate the KL-divergence from the starting point. This serves as an additional reward signal in the PPO training to make sure the optimized model does not deviate too much from the original language model.


In [23]:

model     = AutoModelForCausalLMWithValueHead.from_pretrained(config.model_name)
ref_model = AutoModelForCausalLMWithValueHead.from_pretrained(config.model_name)

tokenizer = AutoTokenizer.from_pretrained(config.model_name)

tokenizer.pad_token = tokenizer.eos_token


In [24]:

ppo_trainer = PPOTrainer(
                 config, 
                 model, 
                 ref_model, 
                 tokenizer, 
                 dataset=dataset, 
                 data_collator=collator
)


Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.



## Load BERT classifier (Reward Function)

We load a BERT classifier fine-tuned on the IMDB dataset.


In [25]:

device = ppo_trainer.accelerator.device
device


device(type='cuda')

In [26]:

if ppo_trainer.accelerator.num_processes == 1:
    device = 0 if torch.cuda.is_available() else "cpu"  # to avoid a `pipeline` bug

device

0

In [27]:

sentiment_pipe = pipeline("sentiment-analysis", model="lvwerra/distilbert-imdb", device=device)



The model outputs are the logits for the negative and positive class. We will use the logits for positive class as a reward signal for the language model.


In [28]:

text = "this movie was really bad!!"

sentiment_pipe(text, **sent_kwargs)




[[{'label': 'NEGATIVE', 'score': 2.3350484371185303},
  {'label': 'POSITIVE', 'score': -2.726576328277588}]]

In [29]:

text = "this movie was really good!!"
sentiment_pipe(text, **sent_kwargs)


[[{'label': 'NEGATIVE', 'score': -2.294790267944336},
  {'label': 'POSITIVE', 'score': 2.557040214538574}]]



## Generation settings

For the response generation we just use sampling and make sure top-k and nucleus sampling are turned off as well as a minimal length.


In [30]:

gen_kwargs = {
         "min_length":   -1, 
         "top_k":       0.0, 
         "top_p":       1.0, 
         "do_sample":  True, 
         "pad_token_id": tokenizer.eos_token_id
}



## Optimize model

### Training loop

The training loop consists of the following main steps:

* Get the query and responses from the policy network (GPT-2)
* Get sentiments for query/responses from BERT
* Optimize policy with PPO using the (query, response, reward) triplet


In [31]:

output_min_length     = 4
output_max_length     = 16
output_length_sampler = LengthSampler(output_min_length, output_max_length)


In [32]:

generation_kwargs = {
    "min_length":     -1,
    "top_k":         0.0,
    "top_p":         1.0,
    "do_sample":    True,
    "pad_token_id": tokenizer.eos_token_id,
}


In [33]:

## ppo_trainer.config.steps = 100    ## 20,000
ppo_trainer.config.steps


20000

In [34]:

for epoch, batch in tqdm(enumerate(ppo_trainer.dataloader)):
    query_tensors = batch["input_ids"]
    
    print(query_tensors)
    print(len(query_tensors))
    if epoch == 1:
        break


1it [00:00, 20.20it/s]

[tensor([19197,   645,  3241,   284,   262,  3651,  2157], device='cuda:0'), tensor([1212, 3807], device='cuda:0'), tensor([  34, 1501,  418, 2899], device='cuda:0'), tensor([  40, 1183,  307, 5508], device='cuda:0'), tensor([  40, 3505, 4379, 1552], device='cuda:0'), tensor([ 1212,   318,   262,   749, 14851], device='cuda:0'), tensor([6090,  691,  307], device='cuda:0'), tensor([41389,   417, 45622,   290,  1115], device='cuda:0'), tensor([  40, 2993,  340], device='cuda:0'), tensor([  40, 8288, 9827], device='cuda:0'), tensor([   39, 50107,   318,   407,   691,   262], device='cuda:0'), tensor([   40,  4398,   470,  1865,  1100, 20642], device='cuda:0'), tensor([26886, 39452,   258,   283,   357], device='cuda:0'), tensor([  40,  550,  284, 1577,  428], device='cuda:0'), tensor([ 3673,   530,   286,  3873, 13951,   338,  1266], device='cuda:0'), tensor([8241,  404,   72], device='cuda:0'), tensor([ 40, 760, 340, 338], device='cuda:0'), tensor([  818,  5751,  2750,  2185,    11, 2309




In [35]:

for epoch, batch in tqdm(enumerate(ppo_trainer.dataloader)):
    query_tensors = batch["input_ids"]
    print(epoch)
    print(batch)
    print('*********************')
    print('*********************')
    print('*********************')
    print('*********************')
    #### Get response from gpt2
    response_tensors = []
    for query in query_tensors:
        gen_len                             = output_length_sampler()
        generation_kwargs["max_new_tokens"] = gen_len
        response                            = ppo_trainer.generate(query, **generation_kwargs)
        response_tensors.append( response.squeeze()[-gen_len:] )
    batch["response"] = [ tokenizer.decode(r.squeeze()) for r in response_tensors ]
    print(batch)
    if epoch == 1:
        break


0it [00:00, ?it/s]

0
{'label': [tensor(0, device='cuda:0'), tensor(1, device='cuda:0'), tensor(1, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(1, device='cuda:0'), tensor(0, device='cuda:0'), tensor(1, device='cuda:0'), tensor(1, device='cuda:0'), tensor(1, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(1, device='cuda:0'), tensor(1, device='cuda:0'), tensor(0, device='cuda:0'), tensor(1, device='cuda:0'), tensor(1, device='cuda:0'), tensor(0, device='cuda:0'), tensor(1, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(1, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(1, device='cuda:0'), tensor(

1it [00:07,  7.83s/it]

{'label': [tensor(0, device='cuda:0'), tensor(1, device='cuda:0'), tensor(1, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(1, device='cuda:0'), tensor(0, device='cuda:0'), tensor(1, device='cuda:0'), tensor(1, device='cuda:0'), tensor(1, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(1, device='cuda:0'), tensor(1, device='cuda:0'), tensor(0, device='cuda:0'), tensor(1, device='cuda:0'), tensor(1, device='cuda:0'), tensor(0, device='cuda:0'), tensor(1, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(1, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(1, device='cuda:0'), tensor(1,

1it [00:16, 16.32s/it]

{'label': [tensor(0, device='cuda:0'), tensor(1, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(1, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(1, device='cuda:0'), tensor(0, device='cuda:0'), tensor(1, device='cuda:0'), tensor(0, device='cuda:0'), tensor(1, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(1, device='cuda:0'), tensor(1, device='cuda:0'), tensor(1, device='cuda:0'), tensor(0, device='cuda:0'), tensor(1, device='cuda:0'), tensor(0, device='cuda:0'), tensor(1, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(1, device='cuda:0'), tensor(1, device='cuda:0'), tensor(1, device='cuda:0'), tensor(1, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(1, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(1,




In [36]:

batch.keys()


dict_keys(['label', 'input_ids', 'query', 'response'])


#### Compute sentiment score


In [37]:

batch["query"]


['This movie is poorly conceived',
 'i liked this movie',
 'The first noticeable problem about',
 'When I spotted that Noah Wyle',
 'Modern viewers know this',
 'Evidently lots',
 'There are some redeeming qualities',
 'This is a very strange',
 'good lord!',
 'This is',
 'You know,',
 'This is part one of',
 'I mean of all the obscure',
 'Okay first of all',
 'Not that many films have truly',
 "I don't know what it",
 'Cary Grant, Douglas Fairbanks',
 'I rented this DVD having seen',
 "I'm watching this on the Star",
 'The title should have',
 "It's a",
 'OK, not possibly, honestly',
 'The folks at Disney',
 'Ugh',
 'Terrific film with',
 "I'm a pretty old",
 'I thoroughly enjoyed this',
 'A movie you start watching',
 'The Wayward Cloud is a',
 'Cavemen was by',
 'During the Civil War',
 'On the back burner',
 'THE O',
 'There are just',
 'Shazbot,',
 'I suppose JEDI',
 'Brian Yuzna',
 '1st watched 5/',
 'When I was',
 'I was having just',
 'Neatly skipping over',
 'Kudos',
 'There i

In [38]:

batch["response"]


['---Monster Hunter is supposed to',
 ', even though it was released in',
 " Tonto: he was fuelling the characters' personalities at a young age",
 ' had made an early cameo appearance at',
 " is a critic's I think",
 ' of good plot holes, with a',
 ' that make Paterno work in a',
 ' film which can be very',
 ' For the first time, even',
 ' because a compass and compass (another form of sound cancellation) is used to',
 ' it wasn\'t half bad, but it had it all."<|endoftext|>',
 ' the five sections of the film. There are several humorous remarks that border',
 ' French and Irish myths that',
 ' - I wanted to see the full length',
 ' had a "Stalker" moment," but Seidl does done',
 ' is about that feature, but I like Bens',
 ', Juan Diego, Becky Hegar, Dee Dee Richards Areha Fisher,',
 ' one episode and considered purchasing it, so I watched it one',
 " Buddy TV channel. Jones' production is absolutely fantastic. All of the actors",
 ' been "Home Alone 2"',
 ' bold move and seems to demon

In [39]:

texts = [ q + r for q, r in zip(batch["query"], batch["response"]) ]


In [40]:

texts


['This movie is poorly conceived---Monster Hunter is supposed to',
 'i liked this movie, even though it was released in',
 "The first noticeable problem about Tonto: he was fuelling the characters' personalities at a young age",
 'When I spotted that Noah Wyle had made an early cameo appearance at',
 "Modern viewers know this is a critic's I think",
 'Evidently lots of good plot holes, with a',
 'There are some redeeming qualities that make Paterno work in a',
 'This is a very strange film which can be very',
 'good lord! For the first time, even',
 'This is because a compass and compass (another form of sound cancellation) is used to',
 'You know, it wasn\'t half bad, but it had it all."<|endoftext|>',
 'This is part one of the five sections of the film. There are several humorous remarks that border',
 'I mean of all the obscure French and Irish myths that',
 'Okay first of all - I wanted to see the full length',
 'Not that many films have truly had a "Stalker" moment," but Seidl doe

In [41]:

pipe_outputs = sentiment_pipe(texts, **sent_kwargs)
pipe_outputs


[[{'label': 'NEGATIVE', 'score': 2.2946932315826416},
  {'label': 'POSITIVE', 'score': -2.7317020893096924}],
 [{'label': 'NEGATIVE', 'score': -1.925742506980896},
  {'label': 'POSITIVE', 'score': 2.2019805908203125}],
 [{'label': 'NEGATIVE', 'score': -0.18878592550754547},
  {'label': 'POSITIVE', 'score': -0.011356303468346596}],
 [{'label': 'NEGATIVE', 'score': -0.01333677675575018},
  {'label': 'POSITIVE', 'score': -0.11825768649578094}],
 [{'label': 'NEGATIVE', 'score': -1.9098631143569946},
  {'label': 'POSITIVE', 'score': 2.153944730758667}],
 [{'label': 'NEGATIVE', 'score': -1.0707851648330688},
  {'label': 'POSITIVE', 'score': 1.1191564798355103}],
 [{'label': 'NEGATIVE', 'score': -1.4140279293060303},
  {'label': 'POSITIVE', 'score': 1.6278727054595947}],
 [{'label': 'NEGATIVE', 'score': -1.9853274822235107},
  {'label': 'POSITIVE', 'score': 2.2685697078704834}],
 [{'label': 'NEGATIVE', 'score': -1.6584359407424927},
  {'label': 'POSITIVE', 'score': 1.8835939168930054}],
 [{'l

In [42]:

rewards = [ torch.tensor(output[1]["score"]) for output in pipe_outputs]
rewards


[tensor(-2.7317),
 tensor(2.2020),
 tensor(-0.0114),
 tensor(-0.1183),
 tensor(2.1539),
 tensor(1.1192),
 tensor(1.6279),
 tensor(2.2686),
 tensor(1.8836),
 tensor(-0.2499),
 tensor(1.4789),
 tensor(1.3212),
 tensor(0.2753),
 tensor(1.0822),
 tensor(0.8769),
 tensor(1.6423),
 tensor(0.7947),
 tensor(1.6409),
 tensor(2.6443),
 tensor(-0.7279),
 tensor(2.3880),
 tensor(-1.0589),
 tensor(2.3231),
 tensor(-1.1874),
 tensor(2.7740),
 tensor(-0.5302),
 tensor(2.6618),
 tensor(-0.1971),
 tensor(-0.1593),
 tensor(1.9095),
 tensor(-0.0538),
 tensor(1.9543),
 tensor(-1.1134),
 tensor(0.5438),
 tensor(-1.5954),
 tensor(0.1579),
 tensor(-1.8283),
 tensor(1.7351),
 tensor(-0.1821),
 tensor(-1.6691),
 tensor(-1.7416),
 tensor(0.7545),
 tensor(-1.8997),
 tensor(1.0599),
 tensor(-2.6420),
 tensor(0.3579),
 tensor(1.1189),
 tensor(-0.8248),
 tensor(2.8067),
 tensor(-2.7147),
 tensor(-2.6384),
 tensor(-2.2516),
 tensor(0.0565),
 tensor(1.4536),
 tensor(1.7251),
 tensor(2.6113),
 tensor(-0.4088),
 tensor

In [43]:

len(rewards)


128

In [None]:

for epoch, batch in tqdm(enumerate(ppo_trainer.dataloader)):
    query_tensors = batch["input_ids"]
    print(epoch)

    #### Get response from gpt2
    response_tensors = []
    for query in query_tensors:
        gen_len                             = output_length_sampler()
        generation_kwargs["max_new_tokens"] = gen_len
        response                            = ppo_trainer.generate(query, **generation_kwargs)
        response_tensors.append( response.squeeze()[-gen_len:] )
    batch["response"] = [ tokenizer.decode(r.squeeze()) for r in response_tensors ]

    #### Compute sentiment score
    texts = [q + r for q, r in zip(batch["query"], batch["response"])]
    pipe_outputs = sentiment_pipe(texts, **sent_kwargs)
    rewards = [ torch.tensor(output[1]["score"]) for output in pipe_outputs]

    #### Run PPO step
    stats = ppo_trainer.step(
                     query_tensors, 
                     response_tensors, 
                     rewards
    )
    ppo_trainer.log_stats(stats, batch, rewards)
    
