In [20]:
!pip -q install trl transformers peft bitsandbytes datasets accelerate peft


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip[0m


In [21]:
from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed, BitsAndBytesConfig
from trl import DPOTrainer, DPOConfig
from datasets import load_dataset, Dataset, DatasetDict
from peft import LoraConfig, get_peft_model
import torch
import json
import time

In [26]:
device = torch.device("mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

if device.type == "cuda":
    device_index = torch.cuda.current_device()
    device_name = torch.cuda.get_device_name(device_index)
    total_mem = torch.cuda.get_device_properties(device_index).total_memory / 1e9  # bytes to GB
    allocated_mem = torch.cuda.memory_allocated(device_index) / 1e9
    reserved_mem = torch.cuda.memory_reserved(device_index) / 1e9

    print(f"CUDA device name: {device_name}")
    print(f"Total memory: {total_mem:.2f} GB")
    print(f"Memory allocated: {allocated_mem:.2f} GB")
    print(f"Memory reserved: {reserved_mem:.2f} GB")

Using device: mps


In [27]:
dataset = load_dataset("HumanLLMs/Human-Like-DPO-Dataset")
dataset

DatasetDict({
    train: Dataset({
        features: ['prompt', 'chosen', 'rejected'],
        num_rows: 10884
    })
})

In [28]:
train_test = dataset['train'].train_test_split(test_size=0.2, shuffle=True, seed=42)
test_val = train_test['test'].train_test_split(test_size=200, shuffle=True, seed=42)

dataset = DatasetDict({
    "train": train_test['train'],
    "test": test_val['train'],
    "val": test_val['test']
})

dataset

DatasetDict({
    train: Dataset({
        features: ['prompt', 'chosen', 'rejected'],
        num_rows: 8707
    })
    test: Dataset({
        features: ['prompt', 'chosen', 'rejected'],
        num_rows: 1977
    })
    val: Dataset({
        features: ['prompt', 'chosen', 'rejected'],
        num_rows: 200
    })
})

In [29]:
compute_dtype = getattr(torch, "float16")
bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False,
)

In [30]:
model_name='Qwen/Qwen2.5-7B-Instruct'
device_map = {"": 0}
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map=device_map,
    quantization_config=bnb_config,
    trust_remote_code=True,
    use_auth_token=True
)
MAX_LENGTH = model.config.max_position_embeddings

ImportError: The installed version of bitsandbytes (<0.43.1) requires CUDA, but CUDA is not available. You may need to install PyTorch with CUDA support or upgrade bitsandbytes to >=0.43.1.

In [None]:
tokenizer = AutoTokenizer.from_pretrained(
    model_name, trust_remote_code=True, padding=True, padding_side="left",
    add_eos_token=False, add_bos_token=False, use_fast=False
)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
PROMPT_TEMPLATE = """
<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a helpful, polite, and friendly assistant. Answer questions to the best of your ability.
If you don't know something, be honest and say so. Keep responses clear and concise.
<|eot_id|>

<|start_header_id|>user<|end_header_id|>
{0}

<|eot_id|><|start_header_id|>assistant<|end_header_id|>
{1}
"""


def generate_response(
    model, question, answer='',
    max_length=MAX_LENGTH, prompt_template=PROMPT_TEMPLATE,
    seed=42, tokenizer=tokenizer
):
    set_seed(seed)
    prompt = prompt_template.format(question, answer)
    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        return_attention_mask=True,
        padding=True
    ).to(device)

    outputs = model.generate(
        **inputs,
        max_length=max_length,
        pad_token_id=tokenizer.eos_token_id
    )

    # Decode full output and prompt
    full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    prompt_text = tokenizer.decode(inputs['input_ids'][0], skip_special_tokens=True)

    # Get only the response part
    response_only = full_text[len(prompt_text):].strip()

    return response_only

    

NameError: name 'MAX_LENGTH' is not defined

In [None]:
lora_config = LoraConfig(
    r=16,
    lora_alpha=16,
    target_modules=[
        'q_proj',
        'k_proj',
        'v_proj',
        'dense'
    ],
    bias="none",
    lora_dropout=0.05,
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)
model

In [None]:
timestamp_str = str(int(time.time()))
output_dir = f'./dpo-explainer-lora-output-{timestamp_str}'

training_args = DPOConfig(
    output_dir=output_dir,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=3,
    gradient_accumulation_steps=2,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_steps=20,
    learning_rate=5e-5,
    fp16=True,
    report_to="none",
    beta=0.1,
    loss_type='sigmoid'
)

trainer = DPOTrainer(
    model=model,
    ref_model=None,
    args=training_args,
    train_dataset=dataset['train'],
    eval_dataset=dataset['validation'],
    processing_class=tokenizer,
)

In [None]:
trainer.train()

In [None]:
# === SAVE ===
trainer.model.save_pretrained(f"{output_dir}/best_model")
tokenizer.save_pretrained(f"{output_dir}/best_model")