In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '5'

In [2]:
from dpo_utils import *
from dpo_data_module import CombinedForgetRetainDataset
from collators import dpo_retain_collator
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
from accelerate import  Accelerator
from config import Config
import torch
from peft import  LoraConfig, get_peft_model
from utils import find_all_linear_names
import pandas as pd
from torch.utils.data import Subset

In [3]:
cfg = Config()

accelerator = Accelerator()

In [4]:
cfg.save_dir = 'outputs/wpu_batch_dpo_4_4'
cfg.save_dir

'outputs/wpu_batch_dpo_4_4'

In [5]:
tokenizer = AutoTokenizer.from_pretrained('meta-llama/Meta-Llama-3.1-8B-Instruct', token = cfg.access_token)
if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
if tokenizer.pad_token_id is None:
        tokenizer.pad_token_id = tokenizer.eos_token_id

In [6]:
policy_model = AutoModelForCausalLM.from_pretrained(
    cfg.model_id,
    torch_dtype=torch.bfloat16, 
    token=cfg.access_token 
    )
print("Base model loaded.")


# --- Apply LoRA on policy model ---
print("Applying LoRA...")
lora_config = LoraConfig(
    r=cfg.LoRA_r,
    lora_alpha=cfg.LoRA_alpha,
    lora_dropout=cfg.LoRA_dropout,
    target_modules=find_all_linear_names(policy_model), 
    bias='none',
    task_type='CAUSAL_LM',
)


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Base model loaded.
Applying LoRA...


In [7]:
# Get PEFT model 
model = get_peft_model(policy_model, lora_config)
print("PEFT model created.")
model.print_trainable_parameters()
model.config.use_cache = False # Important for gradient checkpointing


PEFT model created.
trainable params: 20,971,520 || all params: 8,051,232,768 || trainable%: 0.2605


In [8]:
ref_model = AutoModelForCausalLM.from_pretrained(
    cfg.model_id,
    torch_dtype=torch.bfloat16,
    token=cfg.access_token
)


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [9]:
forget = pd.read_csv(cfg.forget_path)
retain = pd.read_csv(cfg.retain_path)

In [10]:
forget.head()

Unnamed: 0,title,question,answer,idk
0,Benedetto Varchi,What nationality was Benedetto Varchi?,Italian,"I must confess, that's unknown to me."
1,Benedetto Varchi,What professions did Benedetto Varchi have?,"Humanist, historian, poet",I can't say I'm familiar with that.
2,Benedetto Varchi,Where was Benedetto Varchi born?,Florence,My capabilities do not extend to that subject.
3,Benedetto Varchi,Who commissioned Benedetto Varchi to write a h...,Cosimo I,I'm not privy to that information.
4,Benedetto Varchi,When was Varchi's Storia fiorentina first publ...,1721,I have no clue about that.


In [11]:
forget['factor'] = -1.0
retain['factor'] = 1.0
forget['factor'] = forget['factor'].astype('float')
retain['factor'] = retain['factor'].astype('float')
retain['idk'] = 'idk'


In [12]:
total_batch_size = 8
n_forget_in_batch = 4
n_retain_in_batch = total_batch_size - n_forget_in_batch
print(f"Batch size: {total_batch_size}, Forget samples in batch: {n_forget_in_batch}, Retain samples in batch: {n_retain_in_batch}")

Batch size: 8, Forget samples in batch: 4, Retain samples in batch: 4


In [13]:
train_dataset =  CombinedForgetRetainDataset(
    forget_df = forget,
    retain_df = retain,
    tokenizer = tokenizer,
    max_length = 256,
    block_size = total_batch_size,
    n_forget   = n_forget_in_batch,
    n_retain   = n_retain_in_batch
)


Combined dataset initialized with 3648 samples.
Verifying sample structure (first few blocks):
  Block 0: 4 forget, 4 retain samples. Expected: 4, 4
  Block 1: 4 forget, 4 retain samples. Expected: 4, 4
  Block 2: 4 forget, 4 retain samples. Expected: 4, 4


In [14]:
training_args = TrainingArguments(
        output_dir = f'{cfg.save_dir}',
        overwrite_output_dir= True,
        max_grad_norm=1.0,
        learning_rate = cfg.lr,
        per_device_train_batch_size= cfg.batch_size, 
        num_train_epochs= cfg.num_epochs,
        weight_decay = cfg.weight_decay,
        logging_dir = f'{cfg.save_dir}/logs',
        logging_steps= 250,
        eval_strategy= 'no',
        label_names = ['labels'],
        bf16 = True,
        gradient_accumulation_steps= cfg.gradient_accumulation_steps,
        remove_unused_columns=False,
        report_to = 'wandb',
        seed = 42,
        ddp_find_unused_parameters=False,
)

In [15]:
trainer = BatchRetainDPOTrainer(
      model = model,
      ref_model= ref_model,
      args = training_args,
      train_dataset = train_dataset, 
      data_collator = dpo_retain_collator,
      beta=cfg.npo_beta,
)
# trainer = BatchRetainNPOTrainer(
#      model = model,
#      ref_model= ref_model,
#      args = training_args,
#      train_dataset = train_dataset, 
#      data_collator = dpo_retain_collator,
#      beta=cfg.npo_beta,
#)

[2025-05-19 15:48:49,342] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect)


/home/praveen/miniconda3/envs/emnlp/compiler_compat/ld: /usr/local/cuda-12/lib64/libcufile.so: undefined reference to `std::runtime_error::~runtime_error()@GLIBCXX_3.4'
/home/praveen/miniconda3/envs/emnlp/compiler_compat/ld: /usr/local/cuda-12/lib64/libcufile.so: undefined reference to `__gxx_personality_v0@CXXABI_1.3'
/home/praveen/miniconda3/envs/emnlp/compiler_compat/ld: /usr/local/cuda-12/lib64/libcufile.so: undefined reference to `std::ostream::tellp()@GLIBCXX_3.4'
/home/praveen/miniconda3/envs/emnlp/compiler_compat/ld: /usr/local/cuda-12/lib64/libcufile.so: undefined reference to `std::chrono::_V2::steady_clock::now()@GLIBCXX_3.4.19'
/home/praveen/miniconda3/envs/emnlp/compiler_compat/ld: /usr/local/cuda-12/lib64/libcufile.so: undefined reference to `std::string::_M_replace_aux(unsigned long, unsigned long, unsigned long, char)@GLIBCXX_3.4'
/home/praveen/miniconda3/envs/emnlp/compiler_compat/ld: /usr/local/cuda-12/lib64/libcufile.so: undefined reference to `typeinfo for bool@CXXA

Preparing reference model...
Rank 0: In _prepare_ref_model. Accelerator device: cuda, torch.cuda.current_device(): 0
Reference model prepared and set to eval mode.


In [16]:
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Rank 0: Instantiating SequentialSampler for single GPU.


[34m[1mwandb[0m: Currently logged in as: [33mpraveenbushipaka942[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 39.38 GiB of which 9.38 MiB is free. Including non-PyTorch memory, this process has 39.36 GiB memory in use. Of the allocated memory 38.85 GiB is allocated by PyTorch, and 13.73 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [16]:
model.save_pretrained(cfg.save_dir)
tokenizer.save_pretrained(cfg.save_dir)

('outputs/wpu_batch_npo_4_4/tokenizer_config.json',
 'outputs/wpu_batch_npo_4_4/special_tokens_map.json',
 'outputs/wpu_batch_npo_4_4/tokenizer.json')