In [1]:
import torch
torch.cuda.empty_cache()  # ✅ Clears unused GPU memory
torch.cuda.memory_allocated()  # ✅ Prints current memory allocation
#torch.cuda.memory_reserved()  # ✅ Prints total reserved memory


0

In [2]:
### **1️⃣ Setup and Imports**
import os
import torch
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainerCallback
)
from peft import LoraConfig, get_peft_model
from trl import DPOTrainer, DPOConfig
import pandas as pd

#### Task 1

In [3]:
### **2️⃣ Set Device and Load Dataset**
# ✅ Force all computations to use GPU 0 (Avoid multi-GPU mismatches)
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# ✅ Load a smaller dataset sample
dataset = load_dataset("Anthropic/hh-rlhf", split="train[:5%]")  # Use only 5% of data

def extract_prompt(sample):
    """Extracts the prompt and ensures correct structuring."""
    search_term = "\n\nAssistant:"
    search_idx = sample["chosen"].rfind(search_term)
    if search_idx == -1:
        prompt = sample["chosen"]  # Use full text if no match
    else:
        prompt = sample["chosen"][: search_idx + len(search_term)]
    return {
        "prompt": prompt,
        "chosen": sample["chosen"][len(prompt):],
        "rejected": sample["rejected"][len(prompt):],
    }

# ✅ Apply extraction function
dataset = dataset.map(extract_prompt, batched=False)


In [4]:
### **3️⃣ Load and Tokenize Data**
# ✅ Load tokenizer & ensure correct padding
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

# ✅ Processing Class to Truncate Inputs & Maintain 1024 Token Limit
class DPOProcessingClass:
    def __init__(self, tokenizer, max_length=1024):
        self.tokenizer = tokenizer
        self.max_length = max_length
    
    def __call__(self, examples):
        """Tokenizes batch data while enforcing max_length constraints."""
        prompt_tokens = self.tokenizer(
            examples["prompt"],
            truncation=True,
            padding="max_length",
            max_length=self.max_length // 2,
            return_tensors="pt",
            add_special_tokens=True
        )["input_ids"].squeeze(0).tolist()

        chosen_tokens = self.tokenizer(
            examples["chosen"],
            truncation=True,
            padding="max_length",
            max_length=self.max_length // 2,
            return_tensors="pt",
            add_special_tokens=True
        )["input_ids"].squeeze(0).tolist()

        rejected_tokens = self.tokenizer(
            examples["rejected"],
            truncation=True,
            padding="max_length",
            max_length=self.max_length // 2,
            return_tensors="pt",
            add_special_tokens=True
        )["input_ids"].squeeze(0).tolist()
        
        return {
            "prompt_input_ids": prompt_tokens,
            "chosen_input_ids": chosen_tokens,
            "rejected_input_ids": rejected_tokens,
        }

# ✅ Apply Processing Class
tokenized_dataset = dataset.map(DPOProcessingClass(tokenizer), batched=True)


#### Task 2

In [5]:
### **4️⃣ Load Model and Set Up PEFT Configuration**
# ✅ Define Quantization Config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4",
)

In [6]:
# ✅ Load Model on `cuda:0`
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    torch_dtype=torch.bfloat16,
).to(device)

# ✅ Resize token embeddings
model.resize_token_embeddings(len(tokenizer))
model.config.pad_token_id = tokenizer.pad_token_id
model.config.use_cache = False

`low_cpu_mem_usage` was None, now default to True since model is quantized.


In [7]:
from copy import deepcopy
# Create a reference model
ref_model = deepcopy(model)
ref_model.to(device)
ref_model.eval()

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Linear4bit(in_features=768, out_features=2304, bias=True)
          (c_proj): Linear4bit(in_features=768, out_features=768, bias=True)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Linear4bit(in_features=768, out_features=3072, bias=True)
          (c_proj): Linear4bit(in_features=3072, out_features=768, bias=True)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affin

In [8]:
import gc
import torch

gc.collect()  # ✅ Force garbage collection
torch.cuda.empty_cache()  # ✅ Free CUDA memory

In [9]:
### **5️⃣ Training Process & Hyperparameter Experimentation**
experiments = [
    {"learning_rate": 5e-7, "batch_size": 1, "epochs": 3},
    {"learning_rate": 1e-6, "batch_size": 2, "epochs": 3}
]

results = []

for exp in experiments:
    print(f"Running experiment: {exp}")
    
    # ✅ Apply PEFT (LoRA) for Fine-Tuning
    peft_config = LoraConfig(
        lora_alpha=128,
        lora_dropout=0.05,
        r=64,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=["c_attn", "c_proj"],
    )
    
    model = get_peft_model(model, peft_config).to(device)
    
    # ✅ Define Training Arguments
    training_args = DPOConfig(
        num_train_epochs=exp["epochs"],
        learning_rate=exp["learning_rate"],
        per_device_train_batch_size=exp["batch_size"],
        do_eval=True,
        per_device_eval_batch_size=1,
        adam_epsilon=1e-8,
        lr_scheduler_type="linear",
        warmup_ratio=0.1,
        seed=42,
        logging_steps=100,
        save_steps=500,
        save_strategy="steps",
        output_dir="./fine_tuned_model",
        gradient_checkpointing=True,
        bf16=True,
        remove_unused_columns=False,
        label_names=["input_ids"],
    )
    
    # ✅ Initialize Trainer
    dpo_trainer = DPOTrainer(
        model=model,
        ref_model=None,
        args=training_args,
        train_dataset=tokenized_dataset,
        eval_dataset=tokenized_dataset,
        tokenizer=tokenizer,
        peft_config=peft_config,
    )
    
    # ✅ Start Training
    dpo_trainer.train()
    
    # ✅ Save Model After Each Training Run
    model_save_path = "./fine_tuned_model"
    dpo_trainer.save_model(model_save_path)
    torch.save(model.state_dict(), os.path.join(model_save_path, "pytorch_model.bin"))
    tokenizer.save_pretrained(model_save_path)
    print(f"✅ Model and tokenizer saved to {model_save_path}")
    
    # ✅ Log results
    final_loss = dpo_trainer.state.log_history[-1]["loss"] if dpo_trainer.state.log_history and "loss" in dpo_trainer.state.log_history[-1] else None
    results.append({
        "learning_rate": exp["learning_rate"],
        "batch_size": exp["batch_size"],
        "epochs": exp["epochs"],
        "final_loss": final_loss
    })


Running experiment: {'learning_rate': 5e-07, 'batch_size': 1, 'epochs': 3}


  dpo_trainer = DPOTrainer(


Step,Training Loss
100,0.7092
200,0.6944
300,0.6759
400,0.6833
500,0.7068
600,0.6973
700,0.6955
800,0.6905
900,0.7044
1000,0.691




✅ Model and tokenizer saved to ./fine_tuned_model
Running experiment: {'learning_rate': 1e-06, 'batch_size': 2, 'epochs': 3}


  dpo_trainer = DPOTrainer(


Step,Training Loss
100,0.7
200,0.6894
300,0.7014
400,0.6956
500,0.6896
600,0.6974
700,0.6961
800,0.6929
900,0.6993
1000,0.6931




✅ Model and tokenizer saved to ./fine_tuned_model


In [11]:
### **6️⃣ Save and Analyze Results**
results_df = pd.DataFrame(results)
results_df.to_csv("hyperparameter_experiments.csv", index=False)
print("Hyperparameter experiments completed. Results saved to hyperparameter_experiments.csv")


Hyperparameter experiments completed. Results saved to hyperparameter_experiments.csv
