From 06894da9f367ad0220c632f47552d051810b0df1 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Wed, 13 May 2026 17:58:17 +0800 Subject: [PATCH 1/3] support hf_grouped lora --- src/mcore_bridge/bridge/gpt_bridge.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/mcore_bridge/bridge/gpt_bridge.py b/src/mcore_bridge/bridge/gpt_bridge.py index 34961d0..6d6fe5b 100644 --- a/src/mcore_bridge/bridge/gpt_bridge.py +++ b/src/mcore_bridge/bridge/gpt_bridge.py @@ -46,6 +46,7 @@ class GPTBridge: hf_expert_bias_key = 'gate.e_score_correction_bias' additional_dim0_keys = set() additional_dim1_keys = set() + _support_hf_grouped_lora = True def __init__(self, config: ModelConfig): self.config = config @@ -938,9 +939,10 @@ def _set_mlp_state( dist.all_reduce(is_lora, group=self.pp_group) if is_lora: if hf_grouped: - raise ValueError('Since this model\'s transformers and megatron have different expert ' - 'weight organization methods, LoRA weight conversion is not supported. ' - 'You can solve this issue by setting `--merge_lora true`.') + logger.warning( + 'Since this model\'s transformers and megatron have different expert weight organization ' + 'methods, LoRA weights may not be available for inference. It is recommended to set `--merge_lora true.`' + 'You can also manually merge LoRA weights using the `megatron export` command.') if mg_mlp is None: lora_A = None lora_B = None @@ -1166,9 +1168,10 @@ def _set_mlp_state( dist.all_reduce(is_lora, group=self.pp_group) if is_lora: if hf_grouped: - raise ValueError('Since this model\'s transformers and megatron have different expert ' - 'weight organization methods, LoRA weight conversion is not supported. ' - 'You can solve this issue by setting `--merge_lora true`.') + logger.warning( + 'Since this model\'s transformers and megatron have different expert weight organization ' + 'methods, LoRA weights may not be available for inference. It is recommended to set `--merge_lora true.`' + 'You can also manually merge LoRA weights using the `megatron export` command.') if mg_mlp is None: lora_A = None lora_B = None From ed1f3cdce1c4a7b4b927c1629a474755d99a8616 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Wed, 13 May 2026 19:03:11 +0800 Subject: [PATCH 2/3] fix --- src/mcore_bridge/bridge/gpt_bridge.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mcore_bridge/bridge/gpt_bridge.py b/src/mcore_bridge/bridge/gpt_bridge.py index 6d6fe5b..201f0dd 100644 --- a/src/mcore_bridge/bridge/gpt_bridge.py +++ b/src/mcore_bridge/bridge/gpt_bridge.py @@ -939,7 +939,7 @@ def _set_mlp_state( dist.all_reduce(is_lora, group=self.pp_group) if is_lora: if hf_grouped: - logger.warning( + logger.warning_once( 'Since this model\'s transformers and megatron have different expert weight organization ' 'methods, LoRA weights may not be available for inference. It is recommended to set `--merge_lora true.`' 'You can also manually merge LoRA weights using the `megatron export` command.') @@ -1168,7 +1168,7 @@ def _set_mlp_state( dist.all_reduce(is_lora, group=self.pp_group) if is_lora: if hf_grouped: - logger.warning( + logger.warning_once( 'Since this model\'s transformers and megatron have different expert weight organization ' 'methods, LoRA weights may not be available for inference. It is recommended to set `--merge_lora true.`' 'You can also manually merge LoRA weights using the `megatron export` command.') From 674a48b2d766527c1adcb00b6942788ff38e916c Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Wed, 13 May 2026 20:08:21 +0800 Subject: [PATCH 3/3] update --- src/mcore_bridge/bridge/gpt_bridge.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/mcore_bridge/bridge/gpt_bridge.py b/src/mcore_bridge/bridge/gpt_bridge.py index 201f0dd..0eceb23 100644 --- a/src/mcore_bridge/bridge/gpt_bridge.py +++ b/src/mcore_bridge/bridge/gpt_bridge.py @@ -941,8 +941,9 @@ def _set_mlp_state( if hf_grouped: logger.warning_once( 'Since this model\'s transformers and megatron have different expert weight organization ' - 'methods, LoRA weights may not be available for inference. It is recommended to set `--merge_lora true.`' - 'You can also manually merge LoRA weights using the `megatron export` command.') + 'methods, LoRA weights may not be available for inference. It is recommended to set ' + '`--merge_lora true`. You can also manually merge LoRA weights using the ' + '`megatron export` command.') if mg_mlp is None: lora_A = None lora_B = None @@ -1170,8 +1171,9 @@ def _set_mlp_state( if hf_grouped: logger.warning_once( 'Since this model\'s transformers and megatron have different expert weight organization ' - 'methods, LoRA weights may not be available for inference. It is recommended to set `--merge_lora true.`' - 'You can also manually merge LoRA weights using the `megatron export` command.') + 'methods, LoRA weights may not be available for inference. It is recommended to set ' + '`--merge_lora true`. You can also manually merge LoRA weights using the ' + '`megatron export` command.') if mg_mlp is None: lora_A = None lora_B = None