In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '3'

In [2]:
from dpo_utils import *
from dpo_data_module import CombinedForgetRetainDataset
from collators import dpo_retain_collator
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
from accelerate import  Accelerator
from config import Config
import torch
from peft import  LoraConfig, get_peft_model
from utils import find_all_linear_names
import pandas as pd
from torch.utils.data import Subset

In [3]:
cfg = Config()

accelerator = Accelerator()

In [4]:
cfg.save_dir = 'outputs/wpu_batch_dpo_1_7'
cfg.save_dir

'outputs/wpu_batch_dpo_1_7'

In [5]:
tokenizer = AutoTokenizer.from_pretrained('meta-llama/Meta-Llama-3.1-8B-Instruct', token = cfg.access_token)
if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
if tokenizer.pad_token_id is None:
        tokenizer.pad_token_id = tokenizer.eos_token_id

In [6]:
policy_model = AutoModelForCausalLM.from_pretrained(
    cfg.model_id,
    torch_dtype=torch.bfloat16, 
    token=cfg.access_token 
    )
print("Base model loaded.")


# --- Apply LoRA on policy model ---
print("Applying LoRA...")
lora_config = LoraConfig(
    r=cfg.LoRA_r,
    lora_alpha=cfg.LoRA_alpha,
    lora_dropout=cfg.LoRA_dropout,
    target_modules=find_all_linear_names(policy_model), 
    bias='none',
    task_type='CAUSAL_LM',
)


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Base model loaded.
Applying LoRA...


In [7]:
# Get PEFT model 
model = get_peft_model(policy_model, lora_config)
print("PEFT model created.")
model.print_trainable_parameters()
model.config.use_cache = False # Important for gradient checkpointing


PEFT model created.
trainable params: 20,971,520 || all params: 8,051,232,768 || trainable%: 0.2605


In [8]:
ref_model = AutoModelForCausalLM.from_pretrained(
    cfg.model_id,
    torch_dtype=torch.bfloat16,
    token=cfg.access_token
)


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [9]:
forget = pd.read_csv(cfg.forget_path)
retain = pd.read_csv(cfg.retain_path)

In [10]:
forget['factor'] = -1.0
retain['factor'] = 1.0
forget['factor'] = forget['factor'].astype('float')
retain['factor'] = retain['factor'].astype('float')
retain['idk'] = 'idk'


In [11]:
total_batch_size = 8
n_forget_in_batch = 1
n_retain_in_batch = total_batch_size - n_forget_in_batch
print(f"Batch size: {total_batch_size}, Forget samples in batch: {n_forget_in_batch}, Retain samples in batch: {n_retain_in_batch}")

Batch size: 8, Forget samples in batch: 1, Retain samples in batch: 7


In [12]:
train_dataset =  CombinedForgetRetainDataset(
    forget_df = forget,
    retain_df = retain,
    tokenizer = tokenizer,
    max_length = 256,
    block_size = total_batch_size,
    n_forget   = n_forget_in_batch,
    n_retain   = n_retain_in_batch
)


Combined dataset initialized with 2112 samples.
Verifying sample structure (first few blocks):
  Block 0: 1 forget, 7 retain samples. Expected: 1, 7
  Block 1: 1 forget, 7 retain samples. Expected: 1, 7
  Block 2: 1 forget, 7 retain samples. Expected: 1, 7


In [13]:
training_args = TrainingArguments(
        output_dir = f'{cfg.save_dir}',
        overwrite_output_dir= True,
        max_grad_norm=1.0,
        learning_rate = cfg.lr,
        per_device_train_batch_size= cfg.batch_size, 
        num_train_epochs= cfg.num_epochs,
        weight_decay = cfg.weight_decay,
        logging_dir = f'{cfg.save_dir}/logs',
        logging_steps= 250,
        eval_strategy= 'no',
        label_names = ['labels'],
        bf16 = True,
        gradient_accumulation_steps= cfg.gradient_accumulation_steps,
        remove_unused_columns=False,
        report_to = 'wandb',
        seed = 42,
        ddp_find_unused_parameters=False,
)

In [15]:
# trainer = BatchRetainNPOTrainer(
#      model = model,
#      ref_model= ref_model,
#      args = training_args,
#      train_dataset = train_dataset, 
#      data_collator = dpo_retain_collator,
#      beta=cfg.npo_beta,
#)

trainer = BatchRetainDPOTrainer(
    model = model,
    ref_model= ref_model,
    args = training_args,
    train_dataset = train_dataset, 
    data_collator = dpo_retain_collator,
    beta=cfg.npo_beta,
)

Preparing reference model...
Rank 0: In _prepare_ref_model. Accelerator device: cuda, torch.cuda.current_device(): 0
Reference model prepared and set to eval mode.


In [16]:
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Rank 0: Instantiating SequentialSampler for single GPU.


[34m[1mwandb[0m: Currently logged in as: [33mpraveenbushipaka942[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


SingleGPU Step 0 BatchOrigIndices: ['Idx:0(F:-1)', 'Idx:1(F:1)'] -> FinalLoss: 8.4228


Could not estimate the number of tokens of the input, floating-point operations will not be computed


SingleGPU Step 0 BatchOrigIndices: ['Idx:2(F:1)', 'Idx:3(F:1)'] -> FinalLoss: 8.5041
SingleGPU Step 0 BatchOrigIndices: ['Idx:4(F:1)', 'Idx:5(F:1)'] -> FinalLoss: 6.0920
SingleGPU Step 0 BatchOrigIndices: ['Idx:6(F:1)', 'Idx:7(F:1)'] -> FinalLoss: 5.5773


Step,Training Loss
250,14.7134
500,11.4487
750,9.9395
1000,8.5359
1250,7.4038
1500,6.5837
1750,5.9638
2000,5.5271
2250,5.1604
2500,4.9937


SingleGPU Step 1 BatchOrigIndices: ['Idx:8(F:-1)', 'Idx:9(F:1)'] -> FinalLoss: 9.1381
SingleGPU Step 1 BatchOrigIndices: ['Idx:10(F:1)', 'Idx:11(F:1)'] -> FinalLoss: 5.3126
SingleGPU Step 1 BatchOrigIndices: ['Idx:12(F:1)', 'Idx:13(F:1)'] -> FinalLoss: 6.9179
SingleGPU Step 1 BatchOrigIndices: ['Idx:14(F:1)', 'Idx:15(F:1)'] -> FinalLoss: 3.6753
SingleGPU Step 2 BatchOrigIndices: ['Idx:16(F:-1)', 'Idx:17(F:1)'] -> FinalLoss: 8.4325
SingleGPU Step 2 BatchOrigIndices: ['Idx:18(F:1)', 'Idx:19(F:1)'] -> FinalLoss: 3.6124
SingleGPU Step 2 BatchOrigIndices: ['Idx:20(F:1)', 'Idx:21(F:1)'] -> FinalLoss: 4.1671
SingleGPU Step 2 BatchOrigIndices: ['Idx:22(F:1)', 'Idx:23(F:1)'] -> FinalLoss: 3.7822
SingleGPU Step 3 BatchOrigIndices: ['Idx:24(F:-1)', 'Idx:25(F:1)'] -> FinalLoss: 8.9793
SingleGPU Step 3 BatchOrigIndices: ['Idx:26(F:1)', 'Idx:27(F:1)'] -> FinalLoss: 3.6077
SingleGPU Step 3 BatchOrigIndices: ['Idx:28(F:1)', 'Idx:29(F:1)'] -> FinalLoss: 4.5549
SingleGPU Step 3 BatchOrigIndices: ['Idx:3

TrainOutput(global_step=2640, training_loss=7.868261233243075, metrics={'train_runtime': 6511.6216, 'train_samples_per_second': 3.243, 'train_steps_per_second': 0.405, 'total_flos': 0.0, 'train_loss': 7.868261233243075, 'epoch': 10.0})

In [17]:
model.save_pretrained(cfg.save_dir)
tokenizer.save_pretrained(cfg.save_dir)

('outputs/wpu_batch_dpo_1_7/tokenizer_config.json',
 'outputs/wpu_batch_dpo_1_7/special_tokens_map.json',
 'outputs/wpu_batch_dpo_1_7/tokenizer.json')