In [46]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TrainingArguments, BitsAndBytesConfig, pipeline
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, PeftModel
from trl import DPOTrainer, DPOConfig
import torch
import huggingface_hub

In [37]:
huggingface_hub.login(token='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')

In [19]:
dataset = load_dataset("Intel/orca_dpo_pairs")
dataset = dataset['train']
dataset = dataset.shuffle(seed=42).select(range(1000))
split = dataset.train_test_split(test_size=0.2, seed=42)
train_dataset = split["train"]
eval_dataset  = split["test"]

In [20]:
train_dataset

Dataset({
    features: ['system', 'question', 'chosen', 'rejected'],
    num_rows: 800
})

In [21]:
eval_dataset

Dataset({
    features: ['system', 'question', 'chosen', 'rejected'],
    num_rows: 200
})

In [22]:
def chatml_format(example):
  system_prompt = f"<|im_start|>system\n{example['system']}\n<|im_end|>\n"
  user_prompt = f"<|im_start|>user\n{example['question']}\n<|im_end|>\n"
  assistant_prompt = "<|im_start|>assistant\n"
  return {
        "prompt": system_prompt + user_prompt + assistant_prompt,
        "chosen": example['chosen'],
        "rejected": example['rejected'],
    }

In [23]:
train_dataset = train_dataset.map(
    chatml_format,
    remove_columns=dataset.column_names
)

eval_dataset = eval_dataset.map(
    chatml_format,
    remove_columns=dataset.column_names
)

In [24]:
train_dataset[1]

{'chosen': 'Mexico and the United States have signed an agreement to strengthen security at their shared border, according to the Mexican Public Security Ministry.',
 'rejected': " Sure! Here's a very short summary of the text:\n\nMexico and the United States have signed an agreement to strengthen security at their shared border.",
 'prompt': '<|im_start|>system\nYou are an AI assistant. You will be given a task. You must generate a detailed and long answer.\n<|im_end|>\n<|im_start|>user\nmexico and the united states signed an agreement thursday to reinforce the security at the common border , the mexican public security ministry said .\n\nWhat is a very short summary of the above text?\n<|im_end|>\n<|im_start|>assistant\n'}

In [25]:
model_name = "Qwen/Qwen2.5-1.5B-Instruct"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)
model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

In [26]:
train_dataset[1]['prompt']

'<|im_start|>system\nYou are an AI assistant. You will be given a task. You must generate a detailed and long answer.\n<|im_end|>\n<|im_start|>user\nmexico and the united states signed an agreement thursday to reinforce the security at the common border , the mexican public security ministry said .\n\nWhat is a very short summary of the above text?\n<|im_end|>\n<|im_start|>assistant\n'

In [31]:
print('rejected is :',train_dataset[1]['chosen'])
print('chosen is :',train_dataset[1]['rejected'])
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
prompt = train_dataset[1]['prompt']
inputs = tokenizer(prompt, return_tensors="pt").to(device)
input_token_length = inputs["input_ids"].shape[1]
output = tokenizer.decode(
    model.generate(
        **inputs,
        max_new_tokens=100,
    )[0][input_token_length:],
    skip_special_tokens=True
)
print('model generated output is :', output)

rejected is : Mexico and the United States have signed an agreement to strengthen security at their shared border, according to the Mexican Public Security Ministry.
chosen is :  Sure! Here's a very short summary of the text:

Mexico and the United States have signed an agreement to strengthen security at their shared border.
model generated output is : The U. S. and Mexico agreed on Thursday to strengthen border security, according to Mexican officials.
This extremely concise summary captures the essence of the original statement in just 12 words, providing a quick overview of the key points without any extraneous information. The summary effectively conveys that two countries have reached an agreement regarding border safety, which appears to involve cooperation between law enforcement agencies or other relevant organizations to enhance protection along their shared borders.


In [32]:
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules="all-linear"
)

In [33]:
dpo_config = DPOConfig(
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=2,
    gradient_checkpointing=True,
    remove_unused_columns=False,
    learning_rate=5.0e-06,
    eval_strategy="epoch",
    logging_strategy="epoch",
    lr_scheduler_type="cosine",
    num_train_epochs=6,
    save_strategy="epoch",
    logging_steps=1,
    output_dir="./qwen2-dpo-orca",
    optim="paged_adamw_32bit",
    warmup_steps=2,
    bf16=True,
    report_to="none",
    beta=0.1,
    max_prompt_length=2048,
    max_length=2048,
)

In [39]:
trainer = DPOTrainer(
    model,
    args=dpo_config,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    processing_class=tokenizer,
    peft_config=peft_config,
)

In [40]:
trainer.train()

Epoch,Training Loss,Validation Loss,Rewards/chosen,Rewards/rejected,Rewards/accuracies,Rewards/margins,Logps/chosen,Logps/rejected,Logits/chosen,Logits/rejected
1,0.2877,0.128795,-1.019058,-5.474181,0.955,4.455123,-174.196411,-286.691895,-0.842329,-0.978002
2,0.0532,0.075451,-1.825165,-9.913341,0.97,8.088176,-182.257462,-331.083527,-0.858395,-0.961803
3,0.0232,0.056552,-2.118336,-11.838317,0.97,9.719979,-185.189178,-350.333252,-0.878102,-0.978861
4,0.0128,0.050093,-2.255131,-12.749587,0.97,10.494455,-186.557129,-359.445923,-0.886178,-0.988147
5,0.0095,0.048643,-2.37697,-13.282022,0.97,10.90505,-187.775513,-364.770325,-0.904641,-1.006998
6,0.0087,0.049035,-2.394111,-13.352375,0.97,10.958265,-187.946915,-365.473816,-0.905259,-1.006544




TrainOutput(global_step=600, training_loss=0.06585770606994629, metrics={'train_runtime': 1724.3484, 'train_samples_per_second': 2.784, 'train_steps_per_second': 0.348, 'total_flos': 0.0, 'train_loss': 0.06585770606994629, 'epoch': 6.0})

In [55]:
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    return_dict=True,
    torch_dtype=torch.bfloat16,
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

`torch_dtype` is deprecated! Use `dtype` instead!


In [41]:
PATH_MODEL="final_checkpoint_Qwen2.5_dpo"

In [42]:
trainer.model.save_pretrained(PATH_MODEL)
tokenizer.save_pretrained(PATH_MODEL)

('final_checkpoint_Qwen2.5_dpo/tokenizer_config.json',
 'final_checkpoint_Qwen2.5_dpo/special_tokens_map.json',
 'final_checkpoint_Qwen2.5_dpo/chat_template.jinja',
 'final_checkpoint_Qwen2.5_dpo/vocab.json',
 'final_checkpoint_Qwen2.5_dpo/merges.txt',
 'final_checkpoint_Qwen2.5_dpo/added_tokens.json',
 'final_checkpoint_Qwen2.5_dpo/tokenizer.json')

In [56]:
ft_model = PeftModel.from_pretrained(base_model, PATH_MODEL)
ft_model = ft_model.merge_and_unload()

In [61]:
hf_token = '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'
model_name_reop = 'malihoseini/Qwen2.5_dpo'
ft_model.push_to_hub(model_name_reop,
                  private=True,
                  use_temp_dir=True,
                  token=hf_token)
tokenizer.push_to_hub(model_name_reop,
                      private=True,
                      use_temp_dir=True,
                      token=hf_token)

Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

CommitInfo(commit_url='https://huggingface.co/malihoseini/Qwen2.5_dpo/commit/2b8d99540dba61a102609c17cc1b05c067743235', commit_message='Upload tokenizer', commit_description='', oid='2b8d99540dba61a102609c17cc1b05c067743235', pr_url=None, repo_url=RepoUrl('https://huggingface.co/malihoseini/Qwen2.5_dpo', endpoint='https://huggingface.co', repo_type='model', repo_id='malihoseini/Qwen2.5_dpo'), pr_revision=None, pr_num=None)

In [65]:
new_model="malihoseini/Qwen2.5_dpo"
tokenizer_new_model = AutoTokenizer.from_pretrained(new_model)
new_model = AutoModelForCausalLM.from_pretrained(new_model)

config.json:   0%|          | 0.00/1.32k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

In [69]:
print('rejected is :',train_dataset[1]['chosen'])
print('chosen is :',train_dataset[1]['rejected'])
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
new_model.to(device)
prompt = train_dataset[1]['prompt']
inputs = tokenizer_new_model(prompt, return_tensors="pt").to(device)
input_token_length = inputs["input_ids"].shape[1]
output = tokenizer_new_model.decode(
    new_model.generate(
        **inputs,
        max_new_tokens=100,
    )[0][input_token_length:],
    skip_special_tokens=True
)
print('model generated output is :', output)

rejected is : Mexico and the United States have signed an agreement to strengthen security at their shared border, according to the Mexican Public Security Ministry.
chosen is :  Sure! Here's a very short summary of the text:

Mexico and the United States have signed an agreement to strengthen security at their shared border.
model generated output is : A very short summary of the given text is:
Mexico-US border security pact announced.
This concise statement captures the key elements of the original longer passage, which details Mexico's Mexican Public Security Ministry confirming that the two countries have reached an agreement to strengthen their joint border protection efforts on Thursday.
