### DPO Finetune the `Open-Hermes-2.5` model to `Neural-Hermes` 
Use the `distilablled_orca_dpo_pairs` to finetune the  `SFT` Open Hermes Model.
- Follows an excellent blogpost from [Maxime Labonne](https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=&cad=rja&uact=8&ved=2ahUKEwjAvYS-ismEAxXdwjgGHfrfAeIQFnoECA4QAQ&url=https%3A%2F%2Ftowardsdatascience.com%2Ffine-tune-a-mistral-7b-model-with-direct-preference-optimization-708042745aac&usg=AOvVaw04Cuzrpb0fcRrxWcV_5Nox&opi=89978449)

In [1]:
# Install dependencies
# ! pip install -q datasets transformers bitsandbytes sentencepiece wandb

In [2]:
import os
import gc
import json
import torch

import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig
from datasets import load_dataset
from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training
from trl import DPOTrainer
import bitsandbytes as bnb
import wandb

In [3]:
torch.cuda.empty_cache()
gc.collect()

20

In [4]:
secrets_path = "./secrets/secrets.json"
# load the tokens
with open(secrets_path, "r") as f:
    secrets = json.load(f)

hf_token = secrets["HF_TOKEN"]
wandb_token = secrets["WANDB_TOKEN"]

# login to wandb
wandb.login(key=wandb_token)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mparth-shastri[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/ostrich/.netrc


True

In [5]:
# ChatML template  - Template used by models like ChatGPT for a chat interface
# <|im_start|>system
# You are a helpful chatbot assistant.<|im_end|>
# <|im_start|>user
# Hi<|im_end|>
# <|im_start|>assistant
# Hi, how can I help you?<|im_end|> 

In [6]:
def make_chatml_format(example, tokenizer: AutoTokenizer):
    """
    Convert the example to chatml format.
    # <|im_start|>system
    # You are a helpful chatbot assistant.<|im_end|>
    # <|im_start|>user
    # Hi<|im_end|>
    # <|im_start|>assistant
    # Hi, how can I help you?<|im_end|> 
    """

    # Format system prompt
    if len(example["system"]) > 0:
        message = {'role': 'system', 'content': example['system']}
        # the add_generation_prompt = True arg appends the <|im_start|>assistant str at the end
        system = tokenizer.apply_chat_template([message], tokenize=False, add_generation_prompt=False)
    else:
        system = ''

    # Format instruction input prompt
    message = {'role': 'user', 'content': example['input']}
    prompt = tokenizer.apply_chat_template([message], tokenize=False, add_generation_prompt=True)

    # Format chosen prompt
    chosen = example["chosen"] + "<|im_end|>"  # add the EOS token

    # Format rejected prompt
    rejected = example['rejected'] + "<|im_end|>"

    return {
        "prompt": prompt,
        "chosen": chosen,
        "rejected": rejected
    }

#### Load the dataset and Tokenizer

In [7]:
# Dataset
model_name = "teknium/OpenHermes-2.5-Mistral-7B"   # requires around 13 GiGs
new_model = "NeuralHermes-2.5-Mistral-7B"
dataset = load_dataset("argilla/distilabel-intel-orca-dpo-pairs", split='train')

original_columns = dataset.column_names

# Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [8]:
dataset

Dataset({
    features: ['system', 'input', 'chosen', 'rejected', 'generations', 'order', 'labelling_model', 'labelling_prompt', 'raw_labelling_response', 'rating', 'rationale', 'status', 'original_chosen', 'original_rejected', 'chosen_score', 'in_gsm8k_train'],
    num_rows: 12859
})

In [9]:
# Format the dataset
dataset = dataset.map(
    lambda x: make_chatml_format(x, tokenizer),
    remove_columns = original_columns,
)

In [10]:
# View one example of the dataset
dataset[0]

{'chosen': '[\n  ["AFC Ajax (amateurs)", "has ground", "Sportpark De Toekomst"],\n  ["Ajax Youth Academy", "plays at", "Sportpark De Toekomst"]\n]<|im_end|>',
 'rejected': " Sure, I'd be happy to help! Here are the RDF triplets for the input sentence:\n\n[AFC Ajax (amateurs), hasGround, Sportpark De Toekomst]\n[Ajax Youth Academy, playsAt, Sportpark De Toekomst]\n\nExplanation:\n\n* AFC Ajax (amateurs) is the subject of the first triplet, and hasGround is the predicate that describes the relationship between AFC Ajax (amateurs) and Sportpark De Toekomst.\n* Ajax Youth Academy is the subject of the second triplet, and playsAt is the predicate that describes the relationship between Ajax Youth Academy and Sportpark De Toekomst.\n\nNote that there may be other possible RDF triplets that could be derived from the input sentence, but the above triplets capture the main relationships present in the sentence.<|im_end|>",
 'prompt': "<|im_start|>user\nYou will be given a definition of a task f

#### Load the model with less precision

In [11]:
# quantization config
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quantization_config)
model.config.use_cache = False

# reference model for DPO
reference_model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quantization_config)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

#### Define the `LoRA` parameters

In [12]:
lora_config = LoraConfig(
    r=16,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj"]
)

#### Trainer initialization


In [13]:
# Training Arguments

training_args = TrainingArguments(
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    gradient_checkpointing=True,
    learning_rate=5e-5,
    lr_scheduler_type="cosine",
    max_steps=200,
    save_strategy='no',
    logging_steps=1,
    output_dir=f'models/{new_model}',
    optim='paged_adamw_32bit',
    warmup_steps=100,
    bf16=True,
    report_to="wandb"
)

# DPO Trainer 
dpo_trainer = DPOTrainer(
    model=model,
    ref_model=reference_model,
    args=training_args,
    loss_type='sigmoid',
    train_dataset = dataset,
    tokenizer=tokenizer,
    peft_config=lora_config,
    beta=0.1,
    max_prompt_length=1024,
    max_length=1536
)



Map:   0%|          | 0/12859 [00:00<?, ? examples/s]

In [14]:
# Finetune with dpo
dpo_trainer.train()

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/200 [00:00<?, ?it/s]

Could not estimate the number of tokens of the input, floating-point operations will not be computed


OutOfMemoryError: CUDA out of memory. Tried to allocate 486.00 MiB. GPU 0 has a total capacty of 15.70 GiB of which 474.94 MiB is free. Including non-PyTorch memory, this process has 14.52 GiB memory in use. Of the allocated memory 12.76 GiB is allocated by PyTorch, and 1.46 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF