From a5b963370c01d3ff0d0ed028c3fe261efa5267bd Mon Sep 17 00:00:00 2001
From: Ishan Aryendu <iaryendu@meta.com>
Date: Fri, 6 Jun 2025 16:53:16 -0700
Subject: [PATCH] Starter Task 1: Get learning rate for llm_pte_finetuning
 example from config file (#11445)

Summary:
Pull Request resolved: https://github.com/pytorch/executorch/pull/11445

1. **Cloned Repository**: Cloned the relevant repository from a source onto the local desktop.
2. **Uploaded Files to Server**: Uploaded the cloned files to the server at `/tmp/Qwen2-0.5B-Instruct`.
3. **Created a New Directory for Model**: Created a new directory named `model` under `fbcode/executorch/examples/llm_pte_finetuning`.
4. **Configured Learning Rate**: Updated the configuration file to set the `learning_rate` to `5e-3` under model configuration parameters.
5. **Updated Runner Script**: Modified the script to use the learning rate from the configuration settings.

Reviewed By: mcr229, silverguo

Differential Revision: D75807517
---
 examples/llm_pte_finetuning/llama3_config.yaml           | 2 ++
 examples/llm_pte_finetuning/phi3_alpaca_code_config.yaml | 2 ++
 examples/llm_pte_finetuning/phi3_config.yaml             | 2 ++
 examples/llm_pte_finetuning/qwen_05b_config.yaml         | 4 ++++
 examples/llm_pte_finetuning/runner.py                    | 2 +-
 examples/llm_pte_finetuning/training_lib.py              | 4 ----
 6 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/examples/llm_pte_finetuning/llama3_config.yaml b/examples/llm_pte_finetuning/llama3_config.yaml
index e4428ff6e01..fec41383163 100644
--- a/examples/llm_pte_finetuning/llama3_config.yaml
+++ b/examples/llm_pte_finetuning/llama3_config.yaml
@@ -24,6 +24,8 @@ dataset:
 seed: null
 shuffle: True
 
+learning_rate: 5e-3
+
 checkpointer:
   _component_: torchtune.training.FullModelHFCheckpointer
   checkpoint_dir: /tmp/Llama-3.2-1B-Instruct/
diff --git a/examples/llm_pte_finetuning/phi3_alpaca_code_config.yaml b/examples/llm_pte_finetuning/phi3_alpaca_code_config.yaml
index 4ca3804f086..6d3ebc90a1a 100644
--- a/examples/llm_pte_finetuning/phi3_alpaca_code_config.yaml
+++ b/examples/llm_pte_finetuning/phi3_alpaca_code_config.yaml
@@ -13,6 +13,8 @@ batch_size: 1
 loss:
   _component_: torch.nn.CrossEntropyLoss
 
+learning_rate: 5e-3
+
 model:
   _component_: torchtune.models.phi3.lora_phi3_mini
   lora_attn_modules: ['q_proj', 'v_proj']
diff --git a/examples/llm_pte_finetuning/phi3_config.yaml b/examples/llm_pte_finetuning/phi3_config.yaml
index 7417ece79bd..aeb495a7d1c 100644
--- a/examples/llm_pte_finetuning/phi3_config.yaml
+++ b/examples/llm_pte_finetuning/phi3_config.yaml
@@ -12,6 +12,8 @@ batch_size: 1
 loss:
   _component_: torch.nn.CrossEntropyLoss
 
+learning_rate: 5e-3
+
 model:
   _component_: torchtune.models.phi3.lora_phi3_mini
   lora_attn_modules: ['q_proj', 'v_proj']
diff --git a/examples/llm_pte_finetuning/qwen_05b_config.yaml b/examples/llm_pte_finetuning/qwen_05b_config.yaml
index f5ab2dbad68..8a0f3458a03 100644
--- a/examples/llm_pte_finetuning/qwen_05b_config.yaml
+++ b/examples/llm_pte_finetuning/qwen_05b_config.yaml
@@ -13,12 +13,16 @@ batch_size: 1
 loss:
   _component_: torch.nn.CrossEntropyLoss
 
+learning_rate: 5e-3
+
 model:
   _component_: torchtune.models.qwen2.lora_qwen2_0_5b
   lora_attn_modules: ['q_proj', 'k_proj', 'v_proj']
   apply_lora_to_mlp: False
   lora_rank: 32
   lora_alpha: 64
+  # lr parameter is not supported by lora_qwen2_0_5b function
+  # lr: 5e-3
 
 checkpointer:
   _component_: torchtune.training.FullModelHFCheckpointer
diff --git a/examples/llm_pte_finetuning/runner.py b/examples/llm_pte_finetuning/runner.py
index 0baf160a56b..c126aa93067 100644
--- a/examples/llm_pte_finetuning/runner.py
+++ b/examples/llm_pte_finetuning/runner.py
@@ -84,7 +84,7 @@ def main() -> None:
         # params run from [param_start, outputs_end]
         grad_start = et_mod.run_method("__et_training_gradients_index_forward", [])[0]
         param_start = et_mod.run_method("__et_training_parameters_index_forward", [])[0]
-        learning_rate = 5e-3
+        learning_rate = cfg.learning_rate
         f.seek(0)
         losses = []
         for i, batch in tqdm(enumerate(train_dataloader), total=num_training_steps):
diff --git a/examples/llm_pte_finetuning/training_lib.py b/examples/llm_pte_finetuning/training_lib.py
index edd5e04d291..f0b407c68d6 100644
--- a/examples/llm_pte_finetuning/training_lib.py
+++ b/examples/llm_pte_finetuning/training_lib.py
@@ -106,10 +106,6 @@ def eval_model(
         token_size = tokens.shape[1]
         labels_size = labels.shape[1]
 
-        tokens, labels = batch["tokens"], batch["labels"]
-        token_size = tokens.shape[1]
-        labels_size = labels.shape[1]
-
         # Fixed length for now. We need to resize as the input shapes
         # should be the same passed as examples to the export function.
         if token_size > max_seq_len: