# Phase 3.6: Stage 6 - QLoRA Full Adaptation

EEVE Stage 6: Full model adaptation with QLoRA + embeddings.

## Purpose
- Deep adaptation of model internals for Korean language
- QLoRA for parameter-efficient fine-tuning of attention/MLP layers
- Continue training all embeddings

## Contents
1. Setup and Configuration
2. Load Model from Stage 5 with Quantization
3. Apply QLoRA
4. Training
5. Save Stage 6 Checkpoint

In [None]:
# Setup
import sys
import os
sys.path.append("..")

import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling,
    BitsAndBytesConfig,
    TrainerCallback,
)
from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training
from datasets import load_from_disk
import json


class MetricsCallback(TrainerCallback):
    """Callback to print training metrics at each logging step."""

    def on_log(self, args, state, control, logs=None, **kwargs):
        if logs is not None:
            step = state.global_step
            metrics = []
            if "loss" in logs:
                metrics.append(f"loss={logs['loss']:.4f}")
            if "eval_loss" in logs:
                metrics.append(f"eval_loss={logs['eval_loss']:.4f}")
            if "learning_rate" in logs:
                metrics.append(f"lr={logs['learning_rate']:.2e}")
            if metrics:
                print(f"[Step {step}] {', '.join(metrics)}")


# GPU setup
from config.gpu_utils import setup_gpu, print_memory_usage, clear_memory
device = setup_gpu()

print_memory_usage()

In [None]:
# Directories
STAGE5_MODEL_DIR = "../models/staged_training/stage5_harmonization"
DATA_DIR = "../data/processed"
OUTPUT_DIR = "../models/staged_training/stage6_qlora_full"

os.makedirs(OUTPUT_DIR, exist_ok=True)

print(f"Input model: {STAGE5_MODEL_DIR}")
print(f"Output dir: {OUTPUT_DIR}")

---
## 1. Stage Configuration

In [None]:
# Stage 6 configuration
STAGE_CONFIG = {
    "name": "stage6_qlora_full",
    "description": "Full adaptation with QLoRA + all embeddings",
    "train_input_embeddings": True,
    "train_output_embeddings": True,
    "train_lora_layers": True,
    "freeze_old_embeddings": False,  # Can adjust all embeddings
    "learning_rate": 2e-4,
    "num_epochs": 3,
    "warmup_ratio": 0.03,
    "batch_size": 1,
    "gradient_accumulation_steps": 16,
    # LoRA configuration
    "lora_r": 64,
    "lora_alpha": 128,
    "lora_dropout": 0.05,
    "lora_target_modules": [
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj"
    ],
}

print("Stage 6 Configuration:")
for key, value in STAGE_CONFIG.items():
    print(f"  {key}: {value}")

---
## 2. Load Model with 4-bit Quantization

In [None]:
# Load token mapping
mapping_path = f"{STAGE5_MODEL_DIR}/token_mapping.json"
with open(mapping_path, "r", encoding="utf-8") as f:
    token_mapping = json.load(f)

original_vocab_size = token_mapping["original_vocab_size"]
new_vocab_size = token_mapping["new_vocab_size"]

print(f"Original vocab: {original_vocab_size}")
print(f"New vocab: {new_vocab_size}")
print(f"New tokens: {new_vocab_size - original_vocab_size}")

In [None]:
# 4-bit quantization config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

print("BitsAndBytes config created for 4-bit quantization")

In [None]:
# Load model from Stage 5 with quantization
print("\nLoading model from Stage 5 with 4-bit quantization...")

model = AutoModelForCausalLM.from_pretrained(
    STAGE5_MODEL_DIR,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

tokenizer = AutoTokenizer.from_pretrained(STAGE5_MODEL_DIR)

# Ensure padding token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

print(f"Model loaded!")
print_memory_usage()

In [None]:
# Prepare model for k-bit training
model = prepare_model_for_kbit_training(model)
print("Model prepared for k-bit training")

---
## 3. Apply QLoRA

In [None]:
# LoRA configuration
lora_config = LoraConfig(
    r=STAGE_CONFIG["lora_r"],
    lora_alpha=STAGE_CONFIG["lora_alpha"],
    target_modules=STAGE_CONFIG["lora_target_modules"],
    lora_dropout=STAGE_CONFIG["lora_dropout"],
    bias="none",
    task_type=TaskType.CAUSAL_LM,
    # Include embeddings in modules_to_save (train fully, not with LoRA)
    modules_to_save=["embed_tokens", "lm_head"],
)

print("LoRA configuration:")
print(f"  r: {lora_config.r}")
print(f"  alpha: {lora_config.lora_alpha}")
print(f"  target_modules: {lora_config.target_modules}")
print(f"  modules_to_save: {lora_config.modules_to_save}")

In [None]:
# Apply LoRA to model
model = get_peft_model(model, lora_config)

print("\nPEFT model created")
model.print_trainable_parameters()

In [None]:
# Verify trainable parameters
trainable_params = 0
total_params = 0

for name, param in model.named_parameters():
    total_params += param.numel()
    if param.requires_grad:
        trainable_params += param.numel()

print(f"\nTrainable parameters: {trainable_params:,} / {total_params:,}")
print(f"Percentage: {100 * trainable_params / total_params:.4f}%")
print_memory_usage()

---
## 4. Load Training Data

In [None]:
# Load language modeling data
lm_data_path = f"{DATA_DIR}/korean_medical_lm"

if os.path.exists(lm_data_path):
    dataset = load_from_disk(lm_data_path)
    print(f"Loaded dataset: {dataset}")
else:
    print(f"Dataset not found at {lm_data_path}")

In [None]:
# Tokenize dataset
def tokenize_function(examples):
    return tokenizer(
        examples["text"],
        truncation=True,
        max_length=1024,
        padding="max_length",
    )

print("Tokenizing dataset...")
tokenized_dataset = dataset.map(
    tokenize_function,
    batched=True,
    remove_columns=dataset["train"].column_names,
    num_proc=4,
)

print(f"Tokenized dataset: {tokenized_dataset}")

In [None]:
# Data collator
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
)

---
## 5. Training

In [None]:
# Training arguments
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=STAGE_CONFIG["num_epochs"],
    per_device_train_batch_size=STAGE_CONFIG["batch_size"],
    per_device_eval_batch_size=STAGE_CONFIG["batch_size"],
    gradient_accumulation_steps=STAGE_CONFIG["gradient_accumulation_steps"],
    learning_rate=STAGE_CONFIG["learning_rate"],
    warmup_ratio=STAGE_CONFIG["warmup_ratio"],
    lr_scheduler_type="cosine",
    bf16=True,
    logging_steps=10,
    save_strategy="epoch",
    save_total_limit=2,
    optim="paged_adamw_8bit",  # Memory-efficient optimizer
    max_grad_norm=1.0,
    report_to="tensorboard",
    gradient_checkpointing=True,  # Save memory
    dataloader_num_workers=4,
    eval_strategy="steps",
    eval_steps=500,
)

print("Training arguments configured")

In [None]:
# Enable gradient checkpointing
model.gradient_checkpointing_enable()
print("Gradient checkpointing enabled")

In [None]:
# Create trainer with metrics callback
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"] if "validation" in tokenized_dataset else None,
    data_collator=data_collator,
    callbacks=[MetricsCallback()],
)

print("Trainer created")

In [None]:
# Train!
print("\n" + "=" * 60)
print("Starting Stage 6 Training: QLoRA Full Adaptation")
print("=" * 60)
print_memory_usage()

trainer.train()

print("\nTraining complete!")
print_memory_usage()

---
## 6. Save Checkpoint

In [None]:
# Save PEFT model (LoRA adapters + modules_to_save)
trainer.save_model(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)

print(f"\nModel saved to {OUTPUT_DIR}")

In [None]:
# Save stage info
stage_info = {
    "stage": 6,
    "name": STAGE_CONFIG["name"],
    "description": STAGE_CONFIG["description"],
    "config": STAGE_CONFIG,
    "trainable_params": trainable_params,
    "total_params": total_params,
    "original_vocab_size": original_vocab_size,
    "new_vocab_size": new_vocab_size,
    "previous_stage": STAGE5_MODEL_DIR,
    "peft_type": "LoRA",
    "quantization": "4-bit NF4",
}

info_path = f"{OUTPUT_DIR}/stage_info.json"
with open(info_path, "w", encoding="utf-8") as f:
    json.dump(stage_info, f, indent=2)

print(f"Stage info saved to {info_path}")

In [None]:
# Copy token mapping
import shutil
shutil.copy(
    f"{STAGE5_MODEL_DIR}/token_mapping.json",
    f"{OUTPUT_DIR}/token_mapping.json"
)
print("Copied token mapping")

In [None]:
print("\n" + "=" * 60)
print("Stage 6 Complete: QLoRA Full Adaptation Done!")
print("=" * 60)
print(f"\nCheckpoint saved to: {OUTPUT_DIR}")
print("\nNext steps:")
print("  Run 07_stage7_cooldown.ipynb for stabilization training")