In [1]:
import gc
import os

import torch
import wandb
from datasets import load_dataset
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
)
from trl import ORPOConfig, ORPOTrainer, setup_chat_format

wandb.init(project="orpo-ft")

attn_implementation = "eager"
torch_dtype = torch.float16


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mnoahpunintended[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [2]:
base_model = "HuggingFaceTB/SmolLM-360M-instruct"
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model)


base_model = "HuggingFaceTB/SmolLM-360M"

# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    device_map="auto",
    attn_implementation=attn_implementation
)
model, tokenizer = setup_chat_format(model, tokenizer)

tokenizer.model_max_length = 2048
tokenizer.pad_token = "<|endoftext|>" 
tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(tokenizer.pad_token )
tokenizer.padding_side = 'right'


In [3]:
dataset_name = "HuggingFaceH4/orca_dpo_pairs"
dataset = load_dataset(dataset_name, split="all")
dataset = dataset.shuffle(seed=42)#.select(range(100))

def format_chat_template(row):
    row["chosen"] = tokenizer.apply_chat_template(row["chosen"], tokenize=False)
    row["rejected"] = tokenizer.apply_chat_template(row["rejected"], tokenize=False)
    return row

dataset = dataset.map(
    format_chat_template,
    num_proc= os.cpu_count(),
)
dataset = dataset.train_test_split(test_size=0.01)

In [4]:
# test an example model forward pass
model.eval();

#example_prompts = ["What does your name stand for?" , "Tell me some fun facts about space", "Who are you?", "What can you do for me?", "Who won the US presidential election?"]
example_prompts = ["Square root of 100?"]
for prompt in example_prompts:
  chat_prompt = f"""<|im_start|>system\nYou are Pi-Card, the Raspberry Pi voice assistant.<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"""
#    chat_prompt = f"""<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"""

  input_ids = tokenizer.encode(chat_prompt, return_tensors='pt')
  # send to cuda
  input_ids = input_ids.to(model.device)

  output = model.generate(input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id, do_sample=False)

  output_text = tokenizer.decode(output[0], skip_special_tokens=True) 
  print(output_text)

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


system
You are Pi-Card, the Raspberry Pi voice assistant.
user
Square root of 100?
assistant

## 1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1

In [5]:
orpo_args = ORPOConfig(
    learning_rate=8e-5,
    beta=0.1,
    lr_scheduler_type="linear",
    max_length=1024,
    max_prompt_length=512,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=4,
    optim="paged_adamw_8bit",
    num_train_epochs=3,
    evaluation_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    report_to="wandb",
    output_dir="./orpo_checkpoint_dir/",
)

trainer = ORPOTrainer(
    model=model,
    args=orpo_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    tokenizer=tokenizer,
)
trainer.train()
trainer.save_model(new_model)



Map:   0%|          | 0/12730 [00:00<?, ? examples/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (2453 > 2048). Running this sequence through the model will result in indexing errors


Map:   0%|          | 0/129 [00:00<?, ? examples/s]

Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss,Validation Loss,Runtime,Samples Per Second,Steps Per Second,Rewards/chosen,Rewards/rejected,Rewards/accuracies,Rewards/margins,Logps/rejected,Logps/chosen,Logits/rejected,Logits/chosen,Nll Loss,Log Odds Ratio,Log Odds Chosen
955,1.2201,1.303726,10.908,11.826,5.959,-0.064073,-1.354176,0.884615,1.290103,-13.541759,-0.640728,-3.047161,-1.000978,1.294628,-0.117854,13.883141
1910,1.0316,1.278734,10.8624,11.876,5.984,-0.063308,-1.568314,0.830769,1.505006,-15.683137,-0.633084,-3.196254,-1.271801,1.269806,-0.117618,16.06662
2865,0.9455,1.26501,10.9952,11.732,5.912,-0.062469,-1.65443,0.846154,1.591961,-16.544298,-0.624688,-3.210003,-1.328838,1.256219,-0.117552,16.950958
3820,1.0021,1.281111,10.9644,11.765,5.928,-0.063985,-1.737926,0.892308,1.673941,-17.379267,-0.639854,-3.08611,-1.318003,1.272396,-0.117518,17.764767


NameError: name 'new_model' is not defined

In [6]:
new_model = "smol_orpo"
trainer.save_model(new_model)

In [16]:
# test an example model forward pass
model.eval();

#example_prompts = ["What does your name stand for?" , "Tell me some fun facts about space", "Who are you?", "What can you do for me?", "Who won the US presidential election?"]
example_prompts = ["Can you tell me 5 fun facts about space?"]
for prompt in example_prompts:
  chat_prompt = f"""<|im_start|>system\nYou are Pi-Card, the Raspberry Pi voice assistant.<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"""
#    chat_prompt = f"""<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"""

  input_ids = tokenizer.encode(chat_prompt, return_tensors='pt')
  # send to cuda
  input_ids = input_ids.to(model.device)

  output = model.generate(input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id, do_sample=False)

  output_text = tokenizer.decode(output[0], skip_special_tokens=True) 
  print(output_text)
  print('------')

system
You are Pi-Card, the Raspberry Pi voice assistant.
user
Can you tell me 5 fun facts about space?
assistant
Sure! Here are five fun facts about space:

1. Space is the final frontier: It's the vast expanse of space that exists beyond Earth's atmosphere.
2. Space is a place for exploration: Astronauts and scientists study space to learn more about the universe and its mysteries.
3. Space is full of wonders: From black holes to distant galaxies, space is filled with incredible phenomena that are still being discovered.
4. Space is a big place: The universe is vast, with billions of galaxies and countless stars, planets, and other celestial bodies.
5. Space is a big business: The space industry is a billion-dollar business, with companies like SpaceX and Blue Origin investing heavily in space exploration and technology.

These are just a few fun facts about space. There is so much more to learn about this fascinating and mysterious place!
------


In [17]:
# Create gguf file

# Please note you'll need to update the checkpoint path and model names to the one you want to convert & save
!python llama.cpp/convert_hf_to_gguf.py orpo_checkpoint_dir/checkpoint-4773 --outfile picard-orpo-0.36b-f16.gguf --outtype f16


INFO:hf-to-gguf:Loading model: checkpoint-4773
INFO:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only
INFO:hf-to-gguf:Exporting model...
INFO:hf-to-gguf:gguf: loading model part 'model.safetensors'
INFO:hf-to-gguf:token_embd.weight,           torch.float32 --> F16, shape = {960, 49152}
INFO:hf-to-gguf:blk.0.attn_norm.weight,      torch.float32 --> F32, shape = {960}
INFO:hf-to-gguf:blk.0.ffn_down.weight,       torch.float32 --> F16, shape = {2560, 960}
INFO:hf-to-gguf:blk.0.ffn_gate.weight,       torch.float32 --> F16, shape = {960, 2560}
INFO:hf-to-gguf:blk.0.ffn_up.weight,         torch.float32 --> F16, shape = {960, 2560}
INFO:hf-to-gguf:blk.0.ffn_norm.weight,       torch.float32 --> F32, shape = {960}
INFO:hf-to-gguf:blk.0.attn_k.weight,         torch.float32 --> F16, shape = {960, 320}
INFO:hf-to-gguf:blk.0.attn_output.weight,    torch.float32 --> F16, shape = {960, 960}
INFO:hf-to-gguf:blk.0.attn_q.weight,         torch.float32 --> F16, shape = {960, 960}
INFO:hf-to