In [1]:
# # Install necessary libraries
# !pip install -q datasets trl bitsandbytes sentencepiece
# !pip install -q -U git+https://github.com/huggingface/transformers.git
# !pip install -q -U git+https://github.com/huggingface/peft.git
# # Importing packages

In [2]:

import os
import gc
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig
from datasets import load_dataset
from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training, AutoPeftModelForCausalLM
from trl import DPOTrainer, DPOConfig
import bitsandbytes as bnb
from datasets import load_from_disk

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Define model names and tokens
peft_model_name = "Ronal999/phi2_finance_SFT" # The model obtained after the SFT step
new_model = "phi2_DPO" #the name of the DPO trained model

# Tokenizer setup
tokenizer = AutoTokenizer.from_pretrained(peft_model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"
tokenizer.chat_template = "chat_template_function"

In [4]:
def chatml_format(example):

    # Formatting user instruction
    message = {"role": "user", "content": example['input']}
    prompt = tokenizer.apply_chat_template([message], tokenize=False, add_generation_prompt=True)

    # Formatting the chosen answer
    chosen = example['preferred'] + "\n"

    # Formatting the rejected answer
    rejected = example['dispreferred'] + "\n"

    return {
        "prompt": prompt,
        "chosen": chosen,
        "rejected": rejected,
    }

# Loading the dataset
dataset = load_from_disk("dataset")
# Saving original columns for removal
original_columns = dataset.column_names

# Applying formatting to the dataset
dataset = dataset.map(
    chatml_format,
    remove_columns=original_columns
)

# Displaying a sample from the dataset
print(dataset[1])

{'prompt': 'chat_template_function', 'chosen': '{"person_details": {"phone": "+91 7374091655", "email": "rk220217@gmail.com", "github": "rohan220217", "linkedin": "rohan-kumar-a65a87175/", "name": "Rohan Kumar"}, "education": {"B.Tech + M.Tech in Computer Science": {"university": "Jawaharlal Nehru University", "duration": "2018 \\u2014 2023", "cgpa": "6.9/ 9.0"}, "Higher Secondary": {"school": "Satyadeo College (BSEB Board)", "duration": "2016 \\u2014 2018", "percentage": "73.4%"}, "Secondary": {"school": "RamaKrishna Mission Vidya Mandir (CBSE Board)", "duration": "2015 \\u2014 2016", "cgpa": "9.6/ 10"}}, "skills": {"Languages": ["C++", "Javascript", "Python", "Dart"], "Frontend Development": ["Html", "Css", "Sass", "Tailwind", "Bootstrap", "Vuejs", "Reactjs", "Redux", "Vuetify", "QT"], "Backend Development": ["NodeJs", "ExpressJs", "Django", "Flask"], "App Development": ["Flutter", "Vue Native", "Nativescript Vue"], "Tools": ["NuxtJs", "Postman", "Git", "Canva", "Figma", "Vim"]}, "wo

In [5]:
peft_config = LoraConfig(
    r=8,
    lora_alpha=8,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['k_proj', 'v_proj', 'q_proj', 'dense']
)

# Load the base model with BitsAndBytes configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    llm_int8_threshold=6.0,
    llm_int8_has_fp16_weight=False,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
)

model = AutoPeftModelForCausalLM.from_pretrained(
    peft_model_name,
    low_cpu_mem_usage=True,
    torch_dtype=torch.float16,
    quantization_config=bnb_config,
    is_trainable=True,
)

model.config.use_cache = False
model.load_adapter(peft_model_name, adapter_name="training2")
model.load_adapter(peft_model_name, adapter_name="reference")

Loading checkpoint shards: 100%|██████████| 2/2 [00:06<00:00,  3.01s/it]


_IncompatibleKeys(missing_keys=['base_model.model.model.embed_tokens.weight', 'base_model.model.model.layers.0.self_attn.q_proj.base_layer.weight', 'base_model.model.model.layers.0.self_attn.q_proj.base_layer.bias', 'base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.weight', 'base_model.model.model.layers.0.self_attn.q_proj.lora_A.training2.weight', 'base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.weight', 'base_model.model.model.layers.0.self_attn.q_proj.lora_B.training2.weight', 'base_model.model.model.layers.0.self_attn.k_proj.base_layer.weight', 'base_model.model.model.layers.0.self_attn.k_proj.base_layer.bias', 'base_model.model.model.layers.0.self_attn.k_proj.lora_A.default.weight', 'base_model.model.model.layers.0.self_attn.k_proj.lora_A.training2.weight', 'base_model.model.model.layers.0.self_attn.k_proj.lora_B.default.weight', 'base_model.model.model.layers.0.self_attn.k_proj.lora_B.training2.weight', 'base_model.model.model.layers.0.self_attn.v_p

In [6]:
# Initialize Training arguments
training_args = DPOConfig(output_dir="checkpoints", logging_steps=10)

# Initialize DPO Trainer
dpo_trainer = DPOTrainer(model=model, args=training_args , processing_class = tokenizer,train_dataset=dataset)

Extracting prompt from train dataset:   0%|          | 0/170 [00:00<?, ? examples/s]

Extracting prompt from train dataset: 100%|██████████| 170/170 [00:00<00:00, 2604.82 examples/s]
Applying chat template to train dataset: 100%|██████████| 170/170 [00:00<00:00, 5650.60 examples/s]
Tokenizing train dataset: 100%|██████████| 170/170 [00:00<00:00, 174.62 examples/s]
Detected kernel version 4.15.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [None]:
# Start Fine-tuning with DPO
dpo_trainer.train()

# Saving the fine-tuned model and tokenizer
dpo_trainer.model.save_pretrained("saved_model/model")
tokenizer.save_pretrained("saved_model/tokenizer")

# # Releasing memory resources
# del dpo_trainer, model
# gc.collect()
# torch.cuda.empty_cache()

# # Loading the base model and tokenizer
# base_model = AutoPeftModelForCausalLM.from_pretrained(
#     peft_model_name,
#     low_cpu_mem_usage=True,
#     torch_dtype=torch.float16,
#     return_dict=True
# )
# tokenizer = AutoTokenizer.from_pretrained(peft_model_name)

# # Merging the base model with the adapter and unloading
# model = PeftModel.from_pretrained(base_model, "final_checkpoint")
# model = model.merge_and_unload()

# # Saving the merged model and tokenizer
# model.save_pretrained(new_model)
# tokenizer.save_pretrained(new_model)

Generating train split: 100%|██████████| 62135/62135 [00:01<00:00, 43777.76 examples/s]
Generating test split: 100%|██████████| 1000/1000 [00:00<00:00, 38992.12 examples/s]
Extracting prompt from train dataset: 100%|██████████| 62135/62135 [00:19<00:00, 3150.41 examples/s]
Applying chat template to train dataset:  60%|██████    | 37440/62135 [00:32<00:21, 1141.51 examples/s]


KeyboardInterrupt: 