# Phase D ? Hybrid LoRA + KD (500 samples)

Runs LoRA on BERT-large with a 500-example KD set and partial unfreezing of the top two encoder layers.


In [None]:
import os
import sys
from pathlib import Path

def find_project_root() -> Path:
    current = Path.cwd().resolve()
    for path in [current, *current.parents]:
        if (path / "src").exists() and (path / "notebooks").exists():
            return path
    raise RuntimeError("Unable to locate the repository root. Please run this notebook from inside the project.")

PROJECT_ROOT = find_project_root()
os.chdir(PROJECT_ROOT)
if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))
print(f"Project root: {PROJECT_ROOT}")


In [None]:

from pathlib import Path

from src import cost, data, eval as eval_utils, kd, models, train, utils
from src.eval import trainer_compute_metrics
from src.utils import GLOBAL_CONFIG, configure_tf32, set_seed_everywhere

kd_path = Path("outputs/kd/kd_500")
model_name = "bert-large-uncased"
lora_targets = ["query", "key", "value"]

if not kd_path.exists():
    raise FileNotFoundError("KD subset missing. Run Phase A first.")

set_seed_everywhere(GLOBAL_CONFIG.seed)
configure_tf32(GLOBAL_CONFIG.tf32)

dataset = data.load_sst2()
kd_raw = data.load_local_dataset(kd_path)
model, tokenizer = models.load_model_and_tokenizer(model_name)
model = models.apply_lora(model, target_modules=lora_targets, r=32, alpha=64, dropout=0.1)
models.enable_gradient_checkpointing(model)
models.enable_input_require_grads(model)
models.unfreeze_last_bert_layers(model, num_layers=2)

tokenized = data.tokenize_text_dataset(dataset, tokenizer, GLOBAL_CONFIG.max_length)
validation_dataset = data.format_for_torch(tokenized["validation"])
test_dataset = data.format_for_torch(tokenized["test"])

kd_tokenized = data.tokenize_text_dataset(kd_raw, tokenizer, GLOBAL_CONFIG.max_length)
kd_train_dataset = data.format_for_torch(kd_tokenized, include_teacher_logits=True)


In [None]:

training_args = train.build_training_arguments(
    output_dir="outputs/runs/phase_d_hybrid",
    num_train_epochs=3,
    learning_rate=5e-4,
    gradient_checkpointing=True,
)

trainer = train.create_trainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=kd_train_dataset,
    eval_dataset=validation_dataset,
    training_args=training_args,
    compute_metrics=trainer_compute_metrics,
    trainer_cls=kd.DistillationTrainer,
    alpha=0.7,
    temperature=2.0,
)

train_metrics, eval_metrics = train.train_and_evaluate(trainer)
test_metrics = trainer.evaluate(eval_dataset=test_dataset)
display(eval_metrics)
display(test_metrics)


In [None]:

reports_dir = utils.ensure_dir("outputs/reports")
param_counts = cost.count_parameters(model)
train_seconds = float(train_metrics.get("train_runtime", 0.0))
efficiency = cost.efficiency_metrics(
    accuracy=eval_metrics.get("eval_accuracy", 0.0),
    trainable_params=param_counts["trainable"],
    train_seconds=train_seconds,
)

metrics = {
    "phase": "D",
    "model": model_name,
    "train_runtime_seconds": train_seconds,
    "dev": eval_metrics,
    "test": test_metrics,
    "parameter_counts": param_counts,
    "efficiency": efficiency,
    "kd_path": str(kd_path),
}
utils.write_json(metrics, reports_dir / "phase_d_metrics.json")
metrics
