<div><svg width="600" height="150" viewBox="0 0 600 150" xmlns="http://www.w3.org/2000/svg">
  <defs>
    <linearGradient id="kaviGrad" x1="0%" y1="0%" x2="100%" y2="100%">
      <stop offset="0%" style="stop-color:#1e3a8a;stop-opacity:1" />
      <stop offset="100%" style="stop-color:#3b82f6;stop-opacity:1" />
    </linearGradient>
    <filter id="glowText">
      <feGaussianBlur stdDeviation="1.5" result="coloredBlur"/><feMerge><feMergeNode in="coloredBlur"/><feMergeNode in="SourceGraphic"/></feMerge>
    </filter>
  </defs>
  <rect width="600" height="150" fill="#0f172a" rx="20"/>
  <text x="50" y="90" font-family="Verdana, sans-serif" font-size="60" font-weight="bold" fill="url(#kaviGrad)" filter="url(#glowText)">kavi.ai</text>
  <text x="300" y="90" font-family="Verdana, sans-serif" font-size="24" fill="#94a3b8">by machha kiran</text>
  <line x1="50" x2="550" y1="110" y2="110" stroke="#334155" stroke-width="2"/>
  <circle cx="560" cy="40" r="10" fill="#3b82f6"/>
  <circle cx="530" cy="30" r="6" fill="#1e40af"/>
</svg></div>


In [None]:
!pip install transformers --upgrade
!pip install datasets
!pip install trl[peft] --upgrade
!pip install -U git+https://github.com/huggingface/trl
!pip install bitsandbytes loralib
!pip install wandb -U
!pip install hf_transfer



In [None]:
%env HF_HUB_ENABLE_HF_TRANSFER=True
%env WANDB_PROJECT=LLM-Training-Course
%env WANDB_RUN_ID=ORPO
%env WANDB_NOTEBOOK_NAME={__vsc_ipynb_file__}

In [None]:
import wandb
wandb.login()

In [None]:
import sys
sys.path.append('/root/llm-training-course/')

In [None]:
from datasets import load_dataset
train_ds, eval_ds = load_dataset("mlabonne/orpo-dpo-mix-40k", split=["train[:30%]","train[30%:35%]"])

In [None]:
train_ds

In [None]:

columns_to_remove = [c for c in train_ds.column_names if c not in ["chosen", "rejected", "prompt"]]
orpo_train_ds = train_ds.remove_columns(columns_to_remove)
orpo_train_ds = orpo_train_ds.map()
columns_to_remove = [c for c in eval_ds.column_names if c not in ["chosen", "rejected", "prompt"]]
orpo_eval_ds = eval_ds.remove_columns(columns_to_remove)

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

model_id = "meta-llama/Meta-Llama-3-8B"
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B")

In [None]:
def process_convo_for_orpo(item):
    return {
        "prompt": item["prompt"],
        "chosen": tokenizer.apply_chat_template(item["chosen"], tokenize=False),
        "rejected": tokenizer.apply_chat_template(item["rejected"], tokenize=False)
    }
orpo_train_ds = orpo_train_ds.map(process_convo_for_orpo)
orpo_eval_ds = orpo_eval_ds.map(process_convo_for_orpo)

In [None]:
tokenizer.eos_token = "<|eot_id|>"
tokenizer.eos_token_id = tokenizer.convert_tokens_to_ids(tokenizer.eos_token)
print(tokenizer)
print("---")
print("Vocab size:", tokenizer.vocab_size)
print("---")
print("Chat template:", tokenizer.chat_template)
chat_template = open('../chat_templates/llama-3-chat.jinja').read()
chat_template = chat_template.replace('    ', '').replace('\n', '')
print("Chat Template", chat_template)
tokenizer.chat_template = chat_template
print("---")


In [None]:

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="cuda",
    torch_dtype=torch.bfloat16,
    trust_remote_code=True
)

In [None]:
from helpers import set_padding_for_tokenizer
set_padding_for_tokenizer(tokenizer)

In [None]:
from helpers import stream_responses_for_sample
from transformers import GenerationConfig

generation_config =  GenerationConfig(max_new_tokens=50)
sample_conversations = [
    [{"role": "user", "content": "What is the capital of France?"}],
    [{"role": "user", "content": "Write me a javascript function that check if string is palindrome."}],
    [{"role": "user", "content": "Given x^2=36-4 what is x?"}]
]
stream_responses_for_sample(model, tokenizer, sample_conversations, generation_config=generation_config)

In [None]:
from peft import LoraConfig

peft_config = LoraConfig(
    r=8,
    lora_alpha=32,
    lora_dropout=0.05,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                     "gate_proj", "up_proj","down_proj"],
    modules_to_save=["embed_tokens", "input_layernorm", "post_attention_layernorm"],
    bias="none",
    task_type="CAUSAL_LM"
)



In [None]:
from trl import ORPOConfig, ORPOTrainer
import os
training_args = ORPOConfig(
    output_dir=os.getenv("WANDB_RUN_ID") ,
    report_to="wandb",
    num_train_epochs=1.0,
    do_train=True,
    do_eval=True,
    log_level="debug",
    gradient_checkpointing=True,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=8,
    per_device_eval_batch_size=4,
    lr_scheduler_type="constant",
    bf16=True,
    warmup_steps=0,
    evaluation_strategy="steps",
    eval_steps=0.2,
    logging_steps=0.2,
    max_grad_norm=.3,
    learning_rate=1e-6,
    beta=0.1,
)

In [None]:
orpo_trainer = ORPOTrainer(
    model,
    peft_config=peft_config,
    args=training_args,
    train_dataset=orpo_train_ds,
    eval_dataset=orpo_eval_ds,
    tokenizer=tokenizer
)

In [None]:
orpo_trainer.train()