In [18]:
import torch
from tqdm import tqdm
import pandas as pd

tqdm.pandas()

from transformers import pipeline, AutoTokenizer
from datasets import load_dataset

from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead, create_reference_model
from trl.core import LengthSampler

In [16]:
config = PPOConfig(
    # model_name="t5_small_train_out",
    model_name="almanach/camembert-base",
    learning_rate = 1.41e-2
)

sent_kwargs = {
    "return_all_scores": True,
    "function_to_apply" : "none",
    "batch_size": 16
}

In [8]:
import torch
from torch.utils.data import Dataset, DataLoader

class DFSet(Dataset):
    def __init__(self, df, text_col='text', title_col='titles'):
        self.inputs = df[text_col].tolist()
        self.labels = df[title_col].tolist()

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        input_text = self.inputs[idx]
        label = self.labels[idx]
        return input_text, label

def loader_from_df(df, batch_size=16, text_col='text', title_col='titles'):
    dataset = DFSet(df, text_col = text_col, title_col = title_col)
    dataloader = DataLoader(dataset, batch_size = batch_size, shuffle=True)
    return dataloader

def df_from_csv(filename):
    return pd.read_csv(filename)

def loader_from_csv(filename, batch_size=16, text_col='text', title_col='titles'):
    return loader_from_df(df_from_csv(filename), batch_size, text_col, title_col)

def generate_summary(text, tokenizer, model, device):
   inputs = tokenizer([text], padding="max_length", truncation=True, max_length=512, return_tensors="pt")
   input_ids = inputs.input_ids.to(device)
   attention_mask = inputs.attention_mask.to(device)
   output = model.generate(input_ids, attention_mask=attention_mask)
   return tokenizer.batch_decode(output[0], skip_special_tokens=True)

def predict_headlines(articles, tokenizer, model, device):
    inputs = tokenizer(articles, padding="max_length", truncation=True, max_length=512, return_tensors="pt")
    input_ids = inputs.input_ids.to(device)
    attention_mask = inputs.attention_mask.to(device)
    output = model.generate(input_ids, attention_mask=attention_mask)
    return tokenizer.batch_decode(output, skip_special_tokens=True)

In [11]:
loader = loader_from_csv("data/validation.csv")

In [17]:
model = AutoModelForCausalLMWithValueHead.from_pretrained(config.model_name)
tokenizer = AutoTokenizer.from_pretrained(config.model_name)

tokenizer.pad_token = tokenizer.eos_token

If you want to use `CamembertLMHeadModel` as a standalone, add `is_decoder=True.`


In [19]:
ref_model = create_reference_model(model)
ppo_trainer = PPOTrainer(config, model, ref_model, tokenizer)



In [None]:
train_loader = loader_from_csv("data/test.csv")

In [None]:
model()

In [25]:
from trl.core import respond_to_batch

x = respond_to_batch(model, tokenizer.encode("Le soleil est caché par les nuages", return_tensors='pt', max_length = 256).to('cuda'))
tokenizer.decode(x[0])

'</s>NOTUSED</s>NOTUSED</s>NOTUSED</s>NOTUSED</s>NOTUSED</s>NOTUSED</s>NOTUSED</s>NOTUSED</s>NOTUSED</s>NOTUSED</s>NOTUSED</s>NOTUSED</s>NOTUSED</s>NOTUSED</s>NOTUSED</s>NOTUSED</s>NOTUSED</s>NOTUSED</s>NOTUSED</s>NOTUSED'

In [None]:

generation_kwargs = {
    "min_length": -1,
    "top_k": 0.0,
    "top_p": 1.0,
    "do_sample": True,
    "pad_token_id": tokenizer.eos_token_id,
}


for epoch, batch in tqdm(enumerate(test_loader)):
    query_tensors = batch["input_ids"]

    #### Get response from gpt2
    response_tensors = []
    for query in query_tensors:
        gen_len = output_length_sampler()
        generation_kwargs["max_new_tokens"] = gen_len
        response = ppo_trainer.generate(query, **generation_kwargs)
        response_tensors.append(response.squeeze()[-gen_len:])
    batch["response"] = [tokenizer.decode(r.squeeze()) for r in response_tensors]

    #### Compute sentiment score
    texts = [q + r for q, r in zip(batch["query"], batch["response"])]
    pipe_outputs = sentiment_pipe(texts, **sent_kwargs)
    rewards = [torch.tensor(output[1]["score"]) for output in pipe_outputs]

    #### Run PPO step
    stats = ppo_trainer.step(query_tensors, response_tensors, rewards)
    ppo_trainer.log_stats(stats, batch, rewards)