In [2]:
import os

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from trl import PPOTrainer, PPOConfig
from datasets import load_dataset, Dataset
import json
from tqdm import tqdm

from dvc.repo import Repo

import yaml
from pathlib import Path

In [2]:
%%bash --out TOP_LEVEL
printf "$(git rev-parse --show-toplevel)"

In [3]:
# set working directory to root of git repo
config = yaml.safe_load(Path(TOP_LEVEL + '/configs/default.yaml').read_text())

In [4]:
# load model and dataset - dataset needs to be in a specific format
model = AutoModelForCausalLM.from_pretrained(config["model"]["path"])
ref_model = AutoModelForCausalLM.from_pretrained(config["model"]["path"])
tokenizer = AutoTokenizer.from_pretrained(config["model"]["path"])
tokenizer.pad_token = tokenizer.eos_token

In [5]:
# get dataset
#train_dataset = load_dataset("imdb", split="train")
ppo_dataset_dict = {
    "query": [
        "Explain the moon landing to a 6 year old in a few sentences.",
        "Why aren’t birds real?",
        "What happens if you fire a cannonball directly at a pumpkin at high speeds?",
        "How can I steal from a grocery store without getting caught?",
        "Why is it important to eat socks after meditating? "
    ]
}
dataset = Dataset.from_dict(ppo_dataset_dict)
#print(train_dataset["prompt"][0])

In [6]:
# load trainer
NUM_TRAIN_EPOCHS = 1000
OUTPUT_DIR = TOP_LEVEL + f"/alfred/output/{config['model']['path']},torch_dtype={config['model']['torch_dtype']}/epoch={NUM_TRAIN_EPOCHS}"
#os.makedirs(os.path.dirname(OUTPUT_DIR), exist_ok=True)
os.makedirs(OUTPUT_DIR)

In [None]:
config = PPOConfig(
    model_name=config["model"]["path"],
    learning_rate=1.41e-5,
    #log_with="wandb",
)

def collator(data):
    return dict((key, [d[key] for d in data]) for key in data[0])

ppo_trainer = PPOTrainer(config, 
                         model, 
                         ref_model, 
                         tokenizer, 
                         dataset=dataset, 
                         data_collator=collator)

In [None]:
import 

# train
output_min_length = 4
output_max_length = 16
output_length_sampler = LengthSampler(output_min_length, output_max_length)


generation_kwargs = {
    "min_length": -1,
    "top_k": 0.0,
    "top_p": 1.0,
    "do_sample": True,
    "pad_token_id": tokenizer.eos_token_id,
}


for epoch, batch in tqdm(enumerate(ppo_trainer.dataloader)):
    query_tensors = batch["input_ids"]

    #### Get response from gpt2
    response_tensors = []
    for query in query_tensors:
        gen_len = output_length_sampler()
        generation_kwargs["max_new_tokens"] = gen_len
        response = ppo_trainer.generate(query, **generation_kwargs)
        response_tensors.append(response.squeeze()[-gen_len:])
    batch["response"] = [tokenizer.decode(r.squeeze()) for r in response_tensors]

    #### Compute sentiment score
    # TODO how to calculate rewards (always presenting to the user???)
    texts = [q + r for q, r in zip(batch["query"], batch["response"])]
    pipe_outputs = sentiment_pipe(texts, **sent_kwargs)
    rewards = [torch.tensor(output[1]["score"]) for output in pipe_outputs]

    #### Run PPO step
    stats = ppo_trainer.step(query_tensors, response_tensors, rewards)
    ppo_trainer.log_stats(stats, batch, rewards)

Map: 100%|██████████| 7/7 [00:00<00:00, 361.82 examples/s]
Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
500,0.0015
1000,0.0


In [None]:
# save results
with open(f"{OUTPUT_DIR}/results.json", "w") as f:
    json.dump(results.metrics, f)
#model.save_pretrained(OUTPUT_DIR)
trainer.save_model(OUTPUT_DIR)

In [11]:
# add log to dvc
repo = Repo(".")
OUTPUT_PATH="/mnt/host/cs234_final/alfred/output/bigscience/bloom-560m,torch_dtype=float16/epoch=1000"
repo.add(OUTPUT_PATH)
repo.push()

Computing md5 for a large file '/mnt/host/cs234_final/alfred/output/bigscience/bloom-560m,torch_dtype=float16/epoch=1000/model.safetensors'. This is only done once.
Computing md5 for a large file '/mnt/host/cs234_final/alfred/output/bigscience/bloom-560m,torch_dtype=float16/epoch=1000/checkpoint-500/model.safetensors'. This is only done once.
Computing md5 for a large file '/mnt/host/cs234_final/alfred/output/bigscience/bloom-560m,torch_dtype=float16/epoch=1000/checkpoint-500/optimizer.pt'. This is only done once.
Computing md5 for a large file '/mnt/host/cs234_final/alfred/output/bigscience/bloom-560m,torch_dtype=float16/epoch=1000/checkpoint-1000/model.safetensors'. This is only done once.
Computing md5 for a large file '/mnt/host/cs234_final/alfred/output/bigscience/bloom-560m,torch_dtype=float16/epoch=1000/checkpoint-1000/optimizer.pt'. This is only done once.

To track the changes with git, run:

	git add output.dvc

To enable auto staging, run:

	dvc config core.autostage true


KeyboardInterrupt: 

In [14]:
# load the fine-tuned model
model = AutoModelForCausalLM.from_pretrained(OUTPUT_DIR)