In [1]:
from datasets import load_dataset
from trl import DPOConfig, DPOTrainer
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch

### load data

In [2]:
dataset = load_dataset("stramzik/youtube-titles-dpo")

README.md:   0%|          | 0.00/650 [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/39.4k [00:00<?, ?B/s]

valid-00000-of-00001.parquet:   0%|          | 0.00/14.1k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1026 [00:00<?, ? examples/s]

Generating valid split:   0%|          | 0/114 [00:00<?, ? examples/s]

### load model

In [3]:
model_name = "Qwen/Qwen2.5-0.5B-Instruct"

model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token # set pad token

config.json:   0%|          | 0.00/659 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/988M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/7.30k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

### generate title with base model

In [4]:
def format_chat_prompt(user_input, system_message="You are a helpful assistant."):
    """
    Formats user input into the chat template format with <|im_start|> and <|im_end|> tags.

    Args:
        user_input (str): The input text from the user.

    Returns:
        str: Formatted prompt for the model.
    """
    
    # Format user message
    user_prompt = f"<|im_start|>user\n{user_input}<|im_end|>\n"
    
    # Start assistant's turn
    assistant_prompt = "<|im_start|>assistant\n"
    
    # Combine prompts
    formatted_prompt = user_prompt + assistant_prompt
    
    return formatted_prompt

In [8]:
# Set up text generation pipeline
generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device='cpu')

# Example prompt
prompt = format_chat_prompt(dataset['valid']['prompt'][0][0]['content'])

# Generate output
outputs = generator(prompt, max_length=100, truncation=True, num_return_sequences=1, temperature=0.7)

print(outputs[0]['generated_text'])

Device set to use cpu


<|im_start|>user
Given the YouTube video idea write an engaging title.

**Video Idea**: intro independent component analysis

**Additional Guidance**:
- Title should be between 30 and 75 characters long
- Only return the title idea, nothing else!<|im_end|>
<|im_start|>assistant
"Unlocking Hidden Components: The Power of Independent Component Analysis in Data Analysis"


### train model

In [9]:
ft_model_name = model_name.split('/')[1].replace("Instruct", "DPO")

training_args = DPOConfig(
    output_dir=ft_model_name, 
    logging_steps=25,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    save_strategy="epoch",
    eval_strategy="epoch",
    eval_steps=1,
)

device = torch.device('cpu')

  return torch._C._cuda_getDeviceCount() > 0


In [10]:
trainer = DPOTrainer(
    model=model, 
    args=training_args, 
    processing_class=tokenizer, 
    train_dataset=dataset['train'],
    eval_dataset=dataset['valid'],
)
trainer.train()

Extracting prompt in train dataset:   0%|          | 0/1026 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/1026 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/1026 [00:00<?, ? examples/s]

Extracting prompt in eval dataset:   0%|          | 0/114 [00:00<?, ? examples/s]

Applying chat template to eval dataset:   0%|          | 0/114 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/114 [00:00<?, ? examples/s]

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Epoch,Training Loss,Validation Loss,Rewards/chosen,Rewards/rejected,Rewards/accuracies,Rewards/margins,Logps/chosen,Logps/rejected,Logits/chosen,Logits/rejected
1,0.5601,0.562803,2.502599,1.936242,0.658333,0.566358,-31.025215,-40.126804,-3.391751,-3.383547
2,0.4092,0.519936,1.102252,-0.035704,0.766667,1.137956,-45.028698,-59.84626,-3.432317,-3.412039
3,0.2868,0.581661,0.236642,-1.215212,0.725,1.451855,-53.684792,-71.64135,-3.230275,-3.201569


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


TrainOutput(global_step=387, training_loss=0.43497028030474366, metrics={'train_runtime': 3301.2469, 'train_samples_per_second': 0.932, 'train_steps_per_second': 0.117, 'total_flos': 0.0, 'train_loss': 0.43497028030474366, 'epoch': 3.0})

### use fine-tuned model

In [11]:
# Load the fine-tuned model
ft_model = trainer.model

In [13]:
# Set up text generation pipeline
generator = pipeline("text-generation", model=ft_model, tokenizer=tokenizer, device='cpu')

# Example prompt
prompt = format_chat_prompt(dataset['valid']['prompt'][0][0]['content'])

# Generate output
outputs = generator(prompt, max_length=100, truncation=True, num_return_sequences=1, temperature=0.7)

print(outputs[0]['generated_text'])

Device set to use cpu


<|im_start|>user
Given the YouTube video idea write an engaging title.

**Video Idea**: intro independent component analysis

**Additional Guidance**:
- Title should be between 30 and 75 characters long
- Only return the title idea, nothing else!<|im_end|>
<|im_start|>assistant
Independent Component Analysis for Beginners


### push to HF hub

In [14]:
model_id = f"stramzik/{ft_model_name}"
trainer.push_to_hub(model_id)

Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]

training_args.bin:   0%|          | 0.00/6.61k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/stramzik/Qwen2.5-0.5B-DPO/commit/88300f154b513eaf8ec272d11ad29cfaef670267', commit_message='stramzik/Qwen2.5-0.5B-DPO', commit_description='', oid='88300f154b513eaf8ec272d11ad29cfaef670267', pr_url=None, repo_url=RepoUrl('https://huggingface.co/stramzik/Qwen2.5-0.5B-DPO', endpoint='https://huggingface.co', repo_type='model', repo_id='stramzik/Qwen2.5-0.5B-DPO'), pr_revision=None, pr_num=None)

In [11]:
format_chat_prompt(dataset['valid']['prompt'][0][0]['content'])

'<|im_start|>user\nGiven the YouTube video idea write an engaging title.\n\n**Video Idea**: intro independent component analysis\n\n**Additional Guidance**:\n- Title should be between 30 and 75 characters long\n- Only return the title idea, nothing else!<|im_end|>\n<|im_start|>assistant\n'