<a href="https://colab.research.google.com/github/sajabdoli/fine_tune_LLMs/blob/main/DPO_fine_tune_LLMS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install datasets
!pip install trl

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
from datasets import load_dataset
from trl import DPOTrainer, DPOConfig
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model



In [None]:
def load_model_and_tokenizer(model_name):
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float16,
        device_map="auto"
    )
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    tokenizer.pad_token = tokenizer.eos_token
    return model, tokenizer

In [None]:
def prepare_dataset(dataset):
    def format_dataset(samples):
        return {
            "prompt": samples["prompt"],
            "chosen": samples["chosen"],
            "rejected": samples["rejected"]
        }
    return dataset.map(format_dataset)

In [None]:
def configure_lora(model):
    lora_config = LoraConfig(
        r=8,  # Rank of the update matrices
        lora_alpha=32,  # Scaling factor for LoRA updates
        target_modules=[
            # Target all linear layers typically found in transformer models
            "q_proj", "k_proj", "v_proj",  # Query, Key, Value projections
            "o_proj",  # Output projection
            "gate_proj", "down_proj", "up_proj",  # MLP layers
            "embed_tokens",  # Embedding layer
            "lm_head"  # Language model head
        ],
        lora_dropout=0.1,  # Dropout rate for LoRA layers
        bias="none",  # No bias adaptation
        task_type="CAUSAL_LM"
    )

    # Prepare model for low-bit training
    model = prepare_model_for_kbit_training(model)

    # Apply LoRA configuration
    return get_peft_model(model, lora_config)

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    "microsoft/phi-2",
    load_in_8bit=True,  # Enable 8-bit quantization
    device_map="auto"
)

In [None]:
# Load and preprocess dataset
dataset = load_dataset("ayoubkirouane/Orca-Direct-Preference-Optimization")
processed_dataset = prepare_dataset(dataset)

# Prepare model with LoRA
model = configure_lora(model)
model.print_trainable_parameters()

trainable params: 4,792,320 || all params: 2,784,476,160 || trainable%: 0.1721


In [None]:
training_args = DPOConfig(
    output_dir="./dpo_model",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    learning_rate=1e-5,
    lr_scheduler_type="cosine",
    warmup_steps=100,
    max_steps=200,
    logging_steps=10,
    fp16=True,
    max_grad_norm=0.3
)

In [None]:
dpo_trainer = DPOTrainer(
    model,
    ref_model=None,  # Optional: you can pass a reference model if needed
    args=training_args,
    train_dataset=processed_dataset["train"],
    tokenizer=tokenizer,
    max_length=256,  # Add max sequence length
    max_prompt_length=128,  # Limit prompt length
    max_target_length=128  # Limit target length
)


In [None]:
# Start training
dpo_trainer.train()