In [None]:
import torch
from trl import DPOTrainer

In [None]:
from unsloth import FastLanguageModel
from transformers import  AutoTokenizer ,TrainingArguments
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.



if True:
    model, _ = FastLanguageModel.from_pretrained(
        model_name = 'gemma_unsloth_dpo',#"lora_model", # YOUR MODEL YOU USED FOR TRAINING
        max_seq_length = max_seq_length,
        dtype = dtype,
        load_in_4bit = load_in_4bit,
    )

tokenizer = AutoTokenizer.from_pretrained("philschmid/gemma-tokenizer-chatml", use_fast=True)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
Unsloth: Your Flash Attention 2 installation seems to be broken?
A possible explanation is you have a new CUDA version which isn't
yet compatible with FA2? Please file a ticket to Unsloth or FA2.
We shall now use Xformers instead, which does not have any performance hits!
We found this negligible impact by benchmarking on 1x A100.
🦥 Unsloth Zoo will now patch everything to make training faster!
Unsloth: If you want to finetune Gemma 2, install flash-attn to make it faster!
To install flash-attn, do the below:

pip install --no-deps --upgrade "flash-attn>=2.6.3"
==((====))==  Unsloth 2024.12.12: Fast Gemma2 patching. Transformers: 4.47.1.
   \\   /|    GPU: NVIDIA A100-SXM4-80GB MIG 7g.80gb. Max memory: 79.151 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 8.0. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post1. FA2 = False]
 "-____-"     Free Apache license: 

Unsloth 2024.12.12 patched 42 layers with 42 QKV layers, 42 O layers and 42 MLP layers.


In [None]:
import json
# sample dataset creation
dataset = {}
with open('curr_dpo_dataset_train.json','r') as f:
    train = json.load(f)
with open('curr_dpo_dataset_eval.json','r') as f:
    test = json.load(f)


train_dict = {'chosen':[],
              'rejected':[]}

eval_dict = {'chosen':[],
              'rejected':[]}

for k in range(len(train['ids'])):
    data_chosen = [{
        'content':train['prompt'][k],
        'role':'user',
    },
          {  'content':train['rank 1'][k],
        'role':'assistant',
    }]

    data_rejected = [{
        'content':train['prompt'][k],
        'role':'user',
    },
            {'content':train['rank 2'][k],
        'role':'assistant',
    }]
    train_dict['chosen'].append(data_chosen)
    train_dict['rejected'].append(data_rejected)
for k in range(len(test['ids'])):
    data_chosen = [{
        'content':test['prompt'][k],
        'role':'user',
    },
          {  'content':test['rank 1'][k],
        'role':'assistant',
    }]

    data_rejected = [{
        'content':test['prompt'][k],
        'role':'user',
    },
            {'content':test['rank 2'][k],
        'role':'assistant',
    }]
    eval_dict['chosen'].append(data_chosen)
    eval_dict['rejected'].append(data_rejected)


from datasets import Dataset,  DatasetDict
dataset_train = Dataset.from_dict(train_dict)
dataset_test = Dataset.from_dict(eval_dict)
dataset = DatasetDict({
    'train': dataset_train,
    'test': dataset_test
})

In [None]:
dataset

In [None]:
column_names = list(dataset["train"].features)
def apply_dpo_template(example):
  if all(k in example.keys() for k in ("chosen", "rejected")):
    # For DPO, the inputs are triples of (prompt, chosen, rejected), where `chosen` and `rejected` are the final turn of a dialogue
    # We therefore need to extract the N-1 turns to form the prompt
    prompt_messages = example["chosen"][:-1]


    # Now we extract the final turn to define chosen/rejected responses
    chosen_messages = example["chosen"][-1:]
    rejected_messages = example["rejected"][-1:]
    example["text_chosen"] = tokenizer.apply_chat_template(chosen_messages, tokenize=False)
    example["text_rejected"] = tokenizer.apply_chat_template(rejected_messages, tokenize=False)
    example["text_prompt"] = tokenizer.apply_chat_template(prompt_messages, tokenize=False)
  return example

dataset = dataset.map(apply_dpo_template,remove_columns=column_names,
          desc="Formatting comparisons with prompt template",)
for split in ["train", "test"]:
    dataset[split] = dataset[split].rename_columns(
        {"text_prompt": "prompt", "text_chosen": "chosen", "text_rejected": "rejected"}
    )

Formatting comparisons with prompt template:   0%|          | 0/283 [00:00<?, ? examples/s]

Formatting comparisons with prompt template:   0%|          | 0/83 [00:00<?, ? examples/s]

In [None]:
dataset['train']['chosen'][0],dataset['train']['rejected'][0],dataset['train']['prompt'][0]

In [None]:
training_args = TrainingArguments(
        do_eval=True,
        evaluation_strategy = "epoch",
        #eval_steps = 100,
        #save_strategy = "epoch",
        per_device_train_batch_size = 1, #Zephyr
        gradient_accumulation_steps = 16, #Zephyr
        per_device_eval_batch_size = 2,
        warmup_ratio = 0.1, #Zephyr
        num_train_epochs = 5, #Zephyr
        learning_rate = 5.0e-07, #Zephyr
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 10,
        optim = "paged_adamw_8bit",
        lr_scheduler_type = "cosine", #Zephyr
        seed = 3407,
        output_dir = "./gemma9b_DPO_curr/",
)


from unsloth import PatchDPOTrainer
PatchDPOTrainer()

trainer = DPOTrainer(
    model,
    ref_model=None,
    args=training_args,
    beta=0.05, #Zephyr
    train_dataset=dataset['train'],
    eval_dataset=dataset['test'],
    tokenizer=tokenizer
)




Extracting prompt from train dataset:   0%|          | 0/283 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/283 [00:00<?, ? examples/s]

Extracting prompt from eval dataset:   0%|          | 0/83 [00:00<?, ? examples/s]

Applying chat template to eval dataset:   0%|          | 0/83 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/283 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/83 [00:00<?, ? examples/s]

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [None]:
trainer.train()

In [None]:
# model.save_pretrained("lora_model") # Local saving
# tokenizer.save_pretrained("lora_model")
from huggingface_hub import login

login('Your ID')

model.push_to_hub("your_name/lora_model", token = "...") # Online saving
tokenizer.push_to_hub("your_name/lora_model", token = "...") # Online saving