In [1]:
%pip install -q trl==0.10.1

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from trl import PPOConfig, PPOTrainer, AutoModelForCausalLMWithValueHead
import torch

import os

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

model_id = "meta-llama/Llama-3.2-3B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLMWithValueHead.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    low_cpu_mem_usage=True,
    device_map="auto"
)

ref_model = AutoModelForCausalLMWithValueHead.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    low_cpu_mem_usage=True,
    device_map="auto"
)


config = PPOConfig(
    model_name=model_id,
    learning_rate=1e-5,
    batch_size=1,
    mini_batch_size=1,
    cliprange=0.2,
    kl_penalty="kl",
    init_kl_coef=0.05
)

trainer = PPOTrainer(
    model=model,
    ref_model=ref_model,
    tokenizer=tokenizer,
    config=config
)

messages = [
    {"role": "user", "content": "Hi, what’s the capital of France?"},
    {"role": "assistant", "content": "The capital of France is Paris."},
    {"role": "user", "content": "Thanks! And what’s the population?"}
]
prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)

query = tokenizer(prompt, return_tensors="pt").input_ids.to(model.pretrained_model.device)
response = tokenizer("100 million people!", return_tensors="pt").input_ids.to(model.pretrained_model.device)

device = next(model.parameters()).device
reward = torch.tensor(0.6, device=device)

trainer.step([query[0]], [response[0]], [reward])

In [4]:
save_path = f"/content/test_model"
trainer.model.save_pretrained(save_path)
trainer.tokenizer.save_pretrained(save_path)
torch.save(trainer.model.v_head.state_dict(), f"{save_path}/value_head.pt")

In [None]:
base_model = AutoModelForCausalLM.from_pretrained(
    "/content/test_model",
    torch_dtype=torch.float16,
    device_map="auto"
)

model_new = AutoModelForCausalLMWithValueHead(base_model)

tokenizer = AutoTokenizer.from_pretrained("/content/test_model")
tokenizer.pad_token = tokenizer.eos_token

In [6]:
lm_model = model_new.pretrained_model
lm_model.eval()

pipe = pipeline(
    "text-generation",
    model=lm_model,
    tokenizer=tokenizer,
    torch_dtype=torch.float16,
    device_map="auto",
)

messages = [
    {"role": "user", "content": "Hi, what’s the capital of France?"},
    {"role": "assistant", "content": "The capital of France is Paris."},
    {"role": "user", "content": "Thanks! And what’s the population?"}
]

outputs = pipe(messages, max_new_tokens=500, do_sample=True, temperature=0.8, pad_token_id=128001)

print(outputs[0]["generated_text"][-1]["content"])

Device set to use cuda:0


As of my knowledge cutoff in 2023, the estimated population of France is approximately 67.2 million people. However, please note that population numbers can change over time due to various factors such as births, deaths, and migration.


In [7]:
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
tokenizer.pad_token = tokenizer.eos_token

base_model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-3.2-3B-Instruct",
    torch_dtype=torch.float16,
    device_map="auto"
)

pipe = pipeline(
    "text-generation",
    model=base_model,
    tokenizer=tokenizer,
    torch_dtype=torch.float16,
    device_map="auto",
)

messages = [
    {"role": "user", "content": "Hi, what’s the capital of France?"},
    {"role": "assistant", "content": "The capital of France is Paris."},
    {"role": "user", "content": "Thanks! And what’s the population?"}
]

outputs = pipe(messages, max_new_tokens=500, do_sample=True, temperature=0.8, pad_token_id=128001)

print(outputs[0]["generated_text"][-1]["content"])

As of my knowledge cutoff in 2023, the population of Paris is approximately 2.1 million people within the city limits. However, the larger metropolitan area of Paris, known as the Île-de-France region, has a population of around 12.2 million people.
