In [None]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.37.2 trl==0.7.11 datasets==2.16.1 wandb

In [None]:
import os
from huggingface_hub import login
import wandb
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import DPOTrainer


wandb.login(key="WANDB_TOKEN")

[34m[1mwandb[0m: Currently logged in as: [33mmishra4[0m ([33mmishra4-deeplogic-ai[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [None]:
train_ds=load_dataset("Intel/orca_dpo_pairs")["train"]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Using the latest cached version of the dataset since Intel/orca_dpo_pairs couldn't be found on the Hugging Face Hub
Found the latest cached dataset configuration 'default' at /root/.cache/huggingface/datasets/Intel___orca_dpo_pairs/default/0.0.0/624952e3f420ae18d88b31977ee2ea436c833abb (last modified on Mon Jul 22 14:02:11 2024).


In [None]:
model_name="microsoft/Phi-3-mini-4k-instruct"

In [None]:
def chatml_format(example):
    # format system
    if len(example['system'])>0:
        message ={"role":"system","content":example['system']}
        system=tokenizer.apply_chat_template([message], tokenize=False)
    else:
        system=""

    #format instruction
    message={"role":"user","content":example['question']}
    prompt=tokenizer.apply_chat_template([message], tokenize=False, add_generation_prompt=True)

    # format chosen answer
    chosen =example['chosen']+"<im_end>\n"

    # format rejected answer
    rejected = example['rejected']+"<im_end>\n"

    return {
        "prompt": system+prompt,
        "chosen": chosen,
        "rejected": rejected,
    }

# we have load datasets in above section

# save columns
original_columns=train_ds.column_names

# tokenizer
tokenizer=AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token=tokenizer.eos_token
tokenizer.padding_side="left"

#format dataset
train_dataset=train_ds.map(
    function=chatml_format,
    remove_columns=original_columns
)

# checking only one example
train_dataset[0]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


{'chosen': '[\n  ["AFC Ajax (amateurs)", "has ground", "Sportpark De Toekomst"],\n  ["Ajax Youth Academy", "plays at", "Sportpark De Toekomst"]\n]<im_end>\n',
 'rejected': " Sure, I'd be happy to help! Here are the RDF triplets for the input sentence:\n\n[AFC Ajax (amateurs), hasGround, Sportpark De Toekomst]\n[Ajax Youth Academy, playsAt, Sportpark De Toekomst]\n\nExplanation:\n\n* AFC Ajax (amateurs) is the subject of the first triplet, and hasGround is the predicate that describes the relationship between AFC Ajax (amateurs) and Sportpark De Toekomst.\n* Ajax Youth Academy is the subject of the second triplet, and playsAt is the predicate that describes the relationship between Ajax Youth Academy and Sportpark De Toekomst.\n\nNote that there may be other possible RDF triplets that could be derived from the input sentence, but the above triplets capture the main relationships present in the sentence.<im_end>\n",
 'prompt': "<|user|>\nYou will be given a definition of a task first, th

In [None]:
bnb_4bit_compute_dtype = "float16"
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model=AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map=device,
    quantization_config=bnb_config,
    trust_remote_code=True,
)
model.config.use_cache=False
model



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Phi3ForCausalLM(
  (model): Phi3Model(
    (embed_tokens): Embedding(32064, 3072, padding_idx=32000)
    (embed_dropout): Dropout(p=0.0, inplace=False)
    (layers): ModuleList(
      (0-31): 32 x Phi3DecoderLayer(
        (self_attn): Phi3Attention(
          (o_proj): Linear4bit(in_features=3072, out_features=3072, bias=False)
          (qkv_proj): Linear4bit(in_features=3072, out_features=9216, bias=False)
          (rotary_emb): Phi3RotaryEmbedding()
        )
        (mlp): Phi3MLP(
          (gate_up_proj): Linear4bit(in_features=3072, out_features=16384, bias=False)
          (down_proj): Linear4bit(in_features=8192, out_features=3072, bias=False)
          (activation_fn): SiLU()
        )
        (input_layernorm): Phi3RMSNorm()
        (resid_attn_dropout): Dropout(p=0.0, inplace=False)
        (resid_mlp_dropout): Dropout(p=0.0, inplace=False)
        (post_attention_layernorm): Phi3RMSNorm()
      )
    )
    (norm): Phi3RMSNorm()
  )
  (lm_head): Linear(in_features=3072, o

Phi-3 doesn't have any ```adapter_config.json``` will to be configured with peft. Additionally we will use get_peft_model to check for the trainable parameters.

In [None]:
peft_config = LoraConfig(
    r=16,
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["qkv_proj", "lm_head"]
)

In [None]:
from peft import get_peft_model

lora_model = get_peft_model(model, peft_config)

In [None]:
lora_model.print_trainable_parameters()

trainable params: 6,853,632 || all params: 2,015,993,856 || trainable%: 0.33996294083943873


In [None]:
lora_model.get_memory_footprint()
lora_model.config

Phi3Config {
  "_name_or_path": "microsoft/Phi-3-mini-4k-instruct",
  "architectures": [
    "Phi3ForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "auto_map": {
    "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config",
    "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM"
  },
  "bos_token_id": 1,
  "embd_pdrop": 0.0,
  "eos_token_id": 32000,
  "hidden_act": "silu",
  "hidden_size": 3072,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 4096,
  "model_type": "phi3",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 32,
  "original_max_position_embeddings": 4096,
  "pad_token_id": 32000,
  "quantization_config": {
    "bnb_4bit_compute_dtype": "bfloat16",
    "bnb_4bit_quant_type": "nf4",
    "bnb_4bit_use_double_quant": true,
    "llm_int8_enable_fp32_cpu_offload": false,
    "llm_int8_has_fp16_weight": false,
    "llm_int8_sk

In [None]:
args = TrainingArguments(
    output_dir="saturo/dpo",
    per_device_train_batch_size=2,
    max_steps=50,
    gradient_accumulation_steps=4,
    gradient_checkpointing=True,
    optim="adamw_torch_fused",
    learning_rate=5e-5,                     # 10x higher LR than QLoRA paper
    lr_scheduler_type="cosine",
    logging_steps=1,
    save_strategy="no",
    fp16=True,
    warmup_steps = 5,
    load_best_model_at_end=True,
    report_to="wandb",
)

dpo_args = {
    "beta": 0.1,
    "loss_type": "sigmoid"
}
max_seq_length = 1512
prompt_length = 1024

trainer = DPOTrainer(
    model,
    ref_model=None, # set to none since we use peft
    peft_config=peft_config,
    args=args,
    train_dataset=train_dataset,
    tokenizer=tokenizer,
    max_length=max_seq_length,
    max_prompt_length=prompt_length,
    beta=dpo_args["beta"],
    loss_type=dpo_args["loss_type"],
)



In [None]:
trainer.train()

Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
1,0.6931
2,0.6931
3,0.6904
4,0.6775
5,0.6676
6,0.665
7,0.6332
8,0.6223
9,0.5453
10,0.5586


TrainOutput(global_step=50, training_loss=0.35566621139645577, metrics={'train_runtime': 1601.6636, 'train_samples_per_second': 0.25, 'train_steps_per_second': 0.031, 'total_flos': 0.0, 'train_loss': 0.35566621139645577, 'epoch': 0.03})