From a5b963370c01d3ff0d0ed028c3fe261efa5267bd Mon Sep 17 00:00:00 2001 From: Ishan Aryendu Date: Fri, 6 Jun 2025 16:53:16 -0700 Subject: [PATCH] Starter Task 1: Get learning rate for llm_pte_finetuning example from config file (#11445) Summary: Pull Request resolved: https://github.com/pytorch/executorch/pull/11445 1. **Cloned Repository**: Cloned the relevant repository from a source onto the local desktop. 2. **Uploaded Files to Server**: Uploaded the cloned files to the server at `/tmp/Qwen2-0.5B-Instruct`. 3. **Created a New Directory for Model**: Created a new directory named `model` under `fbcode/executorch/examples/llm_pte_finetuning`. 4. **Configured Learning Rate**: Updated the configuration file to set the `learning_rate` to `5e-3` under model configuration parameters. 5. **Updated Runner Script**: Modified the script to use the learning rate from the configuration settings. Reviewed By: mcr229, silverguo Differential Revision: D75807517 --- examples/llm_pte_finetuning/llama3_config.yaml | 2 ++ examples/llm_pte_finetuning/phi3_alpaca_code_config.yaml | 2 ++ examples/llm_pte_finetuning/phi3_config.yaml | 2 ++ examples/llm_pte_finetuning/qwen_05b_config.yaml | 4 ++++ examples/llm_pte_finetuning/runner.py | 2 +- examples/llm_pte_finetuning/training_lib.py | 4 ---- 6 files changed, 11 insertions(+), 5 deletions(-) diff --git a/examples/llm_pte_finetuning/llama3_config.yaml b/examples/llm_pte_finetuning/llama3_config.yaml index e4428ff6e01..fec41383163 100644 --- a/examples/llm_pte_finetuning/llama3_config.yaml +++ b/examples/llm_pte_finetuning/llama3_config.yaml @@ -24,6 +24,8 @@ dataset: seed: null shuffle: True +learning_rate: 5e-3 + checkpointer: _component_: torchtune.training.FullModelHFCheckpointer checkpoint_dir: /tmp/Llama-3.2-1B-Instruct/ diff --git a/examples/llm_pte_finetuning/phi3_alpaca_code_config.yaml b/examples/llm_pte_finetuning/phi3_alpaca_code_config.yaml index 4ca3804f086..6d3ebc90a1a 100644 --- a/examples/llm_pte_finetuning/phi3_alpaca_code_config.yaml +++ b/examples/llm_pte_finetuning/phi3_alpaca_code_config.yaml @@ -13,6 +13,8 @@ batch_size: 1 loss: _component_: torch.nn.CrossEntropyLoss +learning_rate: 5e-3 + model: _component_: torchtune.models.phi3.lora_phi3_mini lora_attn_modules: ['q_proj', 'v_proj'] diff --git a/examples/llm_pte_finetuning/phi3_config.yaml b/examples/llm_pte_finetuning/phi3_config.yaml index 7417ece79bd..aeb495a7d1c 100644 --- a/examples/llm_pte_finetuning/phi3_config.yaml +++ b/examples/llm_pte_finetuning/phi3_config.yaml @@ -12,6 +12,8 @@ batch_size: 1 loss: _component_: torch.nn.CrossEntropyLoss +learning_rate: 5e-3 + model: _component_: torchtune.models.phi3.lora_phi3_mini lora_attn_modules: ['q_proj', 'v_proj'] diff --git a/examples/llm_pte_finetuning/qwen_05b_config.yaml b/examples/llm_pte_finetuning/qwen_05b_config.yaml index f5ab2dbad68..8a0f3458a03 100644 --- a/examples/llm_pte_finetuning/qwen_05b_config.yaml +++ b/examples/llm_pte_finetuning/qwen_05b_config.yaml @@ -13,12 +13,16 @@ batch_size: 1 loss: _component_: torch.nn.CrossEntropyLoss +learning_rate: 5e-3 + model: _component_: torchtune.models.qwen2.lora_qwen2_0_5b lora_attn_modules: ['q_proj', 'k_proj', 'v_proj'] apply_lora_to_mlp: False lora_rank: 32 lora_alpha: 64 + # lr parameter is not supported by lora_qwen2_0_5b function + # lr: 5e-3 checkpointer: _component_: torchtune.training.FullModelHFCheckpointer diff --git a/examples/llm_pte_finetuning/runner.py b/examples/llm_pte_finetuning/runner.py index 0baf160a56b..c126aa93067 100644 --- a/examples/llm_pte_finetuning/runner.py +++ b/examples/llm_pte_finetuning/runner.py @@ -84,7 +84,7 @@ def main() -> None: # params run from [param_start, outputs_end] grad_start = et_mod.run_method("__et_training_gradients_index_forward", [])[0] param_start = et_mod.run_method("__et_training_parameters_index_forward", [])[0] - learning_rate = 5e-3 + learning_rate = cfg.learning_rate f.seek(0) losses = [] for i, batch in tqdm(enumerate(train_dataloader), total=num_training_steps): diff --git a/examples/llm_pte_finetuning/training_lib.py b/examples/llm_pte_finetuning/training_lib.py index edd5e04d291..f0b407c68d6 100644 --- a/examples/llm_pte_finetuning/training_lib.py +++ b/examples/llm_pte_finetuning/training_lib.py @@ -106,10 +106,6 @@ def eval_model( token_size = tokens.shape[1] labels_size = labels.shape[1] - tokens, labels = batch["tokens"], batch["labels"] - token_size = tokens.shape[1] - labels_size = labels.shape[1] - # Fixed length for now. We need to resize as the input shapes # should be the same passed as examples to the export function. if token_size > max_seq_len: