In [1]:
import torch
from tqdm import tqdm
import pandas as pd

tqdm.pandas()

from transformers import pipeline, AutoTokenizer
from datasets import load_dataset

from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead
from trl.core import LengthSampler

In [2]:
config = PPOConfig(
    model_name="cyberagent/calm2-7b",
    learning_rate=1.41e-5,
    log_with="wandb",
)

sent_kwargs = {"return_all_scores": True, "function_to_apply": "none", "batch_size": 16}

In [3]:
import wandb

wandb.init()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mnaga-lcw-ld-0203[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [4]:
def build_dataset(config, dataset_name="shunk031/wrime", ver="ver1", input_min_text_length=5, input_max_text_length=8):
	tokenizer = AutoTokenizer.from_pretrained(config.model_name)
	tokenizer.pad_token = tokenizer.eos_token

	ds = load_dataset(dataset_name, ver, split="train")
	ds = ds.remove_columns(["user_id", "datetime", "writer", "reader1", "reader2", "reader3", "avg_readers"])
	
	input_size = LengthSampler(input_min_text_length, input_max_text_length)

	def tokenize(sample):
		sample["input_ids"] = tokenizer.encode(sample["sentence"])[: input_size()]
		sample["query"] = tokenizer.decode(sample["input_ids"])
		return sample

	ds = ds.map(tokenize, batched=False)
	ds.set_format(type="torch")
	return ds

In [5]:
dataset = build_dataset(config)

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


In [6]:
dataset[0]

{'sentence': 'ぼけっとしてたらこんな時間｡チャリあるから食べにでたいのに…',
 'input_ids': tensor([ 2875,   408, 52411,  4421,  4740]),
 'query': 'ぼけっとしてたらこんな'}

In [7]:
def collator(data):
	return dict((key, [d[key] for d in data]) for key in data[0])

In [8]:
# Load LLM models
model = AutoModelForCausalLMWithValueHead.from_pretrained(config.model_name)
ref_model = AutoModelForCausalLMWithValueHead.from_pretrained(config.model_name)
tokenizer = AutoTokenizer.from_pretrained(config.model_name)

tokenizer.pad_token = tokenizer.eos_token

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

: 

In [None]:
# initialize PPOTrainer
ppo_trainer = PPOTrainer(config, model, ref_model, tokenizer, dataset=dataset, data_collator=collator)