# Direct Preference Optimization

In [2]:
# import libraries
import torch
import pandas as pd
import tqdm
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
from trl import DPOTrainer, DPOConfig
from datasets import load_dataset, Dataset

## Define helpers function

In [4]:
def generate_responses(model, tokenizer, user_message, system_message=None, max_new_tokens=100):
    # Format chat using tokenizer's chat template
    messages = []
    if system_message:
        messages.append({"role": "system", "content": system_message})
    # we assume the data are all single turn conversation
    messages.append({"role": "user", "content": user_message})

    prompt = tokenizer.apply_chat_template(
        messages, 
        tokenize=False, 
        add_generation_prompt=True,
        enable_thinking=False
    )

    # Convert messages to token IDs, send to device incase the model is on gpu
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    # Recommended to use vllm, sgland or TensorRt
    with torch.no_grad():
        outputs = model.generate(
            **inputs, 
            max_new_tokens=max_new_tokens,
            do_sample=False,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )
    # extract the generated ids and the responses
    input_len = inputs["input_ids"].shape[1]
    generated_ids = outputs[0][input_len:]
    # Decode response to text base response
    response = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()
    return response


In [5]:
def test_model_with_questions(model, tokenizer, questions,
                             system_message=None, title="Model Output"):
    print(f"\n==== {title} ====\n")
    rows = []
    for i, question in enumerate(questions, 1):
        response = generate_responses(model, tokenizer, question, system_message)
        rows.append({"User Prompt": question, "Assistant Response": response})
    df = pd.DataFrame(rows)
    pd.set_option('display.max_colwidth', None) # avoid truncating long text
    display(df)


In [6]:
def load_model_and_tokenizer(model_name, use_gpu=True):
    """
    Load the model and tokenizer from the model name
    """
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name)
    if use_gpu:
        model.to("cuda")
    # if the model doesn't have a chat template, we need to define it
    if not tokenizer.chat_template:
        tokenizer.chat_template = """{% for message in messages %}
            {% if message['role'] == 'system' % }System: {{ message['content'] }}\n
            {% elif message['role'] == 'user' % }User: {{ message['content'] }}\n
            {% elif message['role'] == 'assistant' % }Assistant: {{ message['content'] }} <|endoftext|>\n
            {% endif %}
            {% endfor %}"""
    # Tokenizer config
    if not tokenizer.pad_token:
        tokenizer.pad_token = tokenizer.eos_token
        
    return model, tokenizer
