In [None]:
!pip install -qU datasets trl peft bitsandbytes sentencepiece wandb

# Fine-tune a Mistral-7B model with DPO

In [None]:
import os
import gc
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig
from datasets import load_dataset
from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training
from trl import DPOTrainer
import bitsandbytes as bnb
import wandb

from google.colab import userdata

# setups
hf_token = userdata.get('HF_TOKEN')
wb_token = userdata.get('WB_TOKEN')
wandb.login(key=wb_token)

model_name = 'teknium/OpenHermes-2.5-Mistral-7B'
new_model = 'NeuralHermes-2.5-Mistral-7B'

In [None]:
# tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'left'

# model to fine-tune
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    load_in_4_bit=True
)

## Format dataset

In [None]:
def chatml_format(example):
    # Format system
    if len(example['system']) > 0:
        message = {'role': 'system', 'content': example['system']}
        system = tokenizer.apply_chat_template([message], tokenize=False)
    else:
        system = ""

    # Format instruction
    message = {'role': 'user', 'content': example['question']}
    prompt = tokenizer.apply_chat_template(
        [message],
        tokenize=False,
        add_generation_prompt=True # add `<|assistant|>` to indicate the start of a meesage
    )

    # Format chosen answer
    chosen = example['chosen'] + "<|im_end|>\n"

    # Format rejected answer
    rejected = example['rejected'] + "<|im_end|>\n"

    return {
        'prompt': system + prompt,
        'chosen': chosen,
        'rejected': rejected
    }


# load dataset
dataset = load_dataset('Intel/orca_dpo_pairs')['train']

# save columns
original_columns = dataset.column_names

# Format dataset
dataset = dataset.map(
    chatml_format,
    remove_columns=original_columns
)

In [None]:
dataset[0]

## Train model with DPO

In [None]:
# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=16,
    lora_dropout=0.05,
    bias='none',
    task_type='CAUSAL_LM',
    target_modules=['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']
)

# Training arguments
training_args = TrainingArguments(
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    gradient_checkpointing=True,
    learning_rate=5e-5,
    lr_scheduler_type='cosine',
    max_steps=200,
    save_strategy='no',
    logging_steps=1,
    output_dir=new_model,
    optim='paged_adamw_32bit',
    warmup_steps=100,
    bf16=True,
    report_to='wandb'
)

# Create DPO trainer
dpo_trainer = DPOTrainer(
    model,
    args=training_args,
    train_dataset=dataset,
    tokenizer=tokenizer,
    peft_config=peft_config,
    beta=0.1,
    max_prompt_length=1024
    max_length=1536
)

In [None]:
dop_trainer.train()

## Save model

In [None]:
dpo_trainer.model.save_pretrained('final_checkpoint')
tokenizer.save_pretrained('final_checkpoint')

# flush memory
del dpo_trainer, model
torch.cuda.empty_cache()
gc.collect()

# reload model in fp16 (instead of nf4)
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    return_dict=True,
    torch_dtype=torch.float16
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Merge base model with the adapter
model = PeftModel.from_pretrained(base_model, 'final_checkpoint')
model = model.merge_and_unload()

# Save model and tokenizer
model.save_pretrained(new_model)
tokenizer.save_pretrained(new_model)

# upload to the hub
model.push_to_hub(new_model, use_temp_dir=False, token=hf_token)
tokenizer.push_to_hub(new_model, use_temp_dir=False, token=hf_token)

## Inference

In [None]:
tokenizer = AutoTokenizer.from_pretrained(new_model)
pipeline = transformers.pipeline(
    'text-generation',
    model=new_model,
    tokenizer=tokenizer
)

In [None]:
message = [
    {'role': 'system', 'content': "You are a helpful assistant chatbot."},
    {'role': 'user', 'content': 'What is a Large Language Model?'}
]
prompt = tokenizer.apply_chat_template(
    message,
    tokenize=False,
    add_generation_prompt=True
)

sequences = pipeline(
    prompt,
    do_smaple=True,
    temperature=0.7,
    top_p=0.9,
    num_return_sequences=1,
    max_length=200
)
print(sequences[0]['generated_text'])