From 7112464db5ec11825d706703e38e6aa73b5af0cc Mon Sep 17 00:00:00 2001 From: Lucy Qiu Date: Tue, 2 Dec 2025 15:46:50 -0800 Subject: [PATCH] Remove fairseq from export_llama (#16052) Summary: Pull Request resolved: https://github.com/pytorch/executorch/pull/16052 This is not being used anymore; remove it and simplify export_llama. Also remove language_llama, the only user of fairseq. Reviewed By: larryliu0820 Differential Revision: D87831086 --- examples/models/llama/export_llama_lib.py | 1 - examples/models/llama/model.py | 34 ------------------- .../llama/source_transformation/quantize.py | 17 +--------- extension/llm/export/config/llm_config.py | 4 --- 4 files changed, 1 insertion(+), 55 deletions(-) diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py index c981ac4a239..219cc71ded1 100644 --- a/examples/models/llama/export_llama_lib.py +++ b/examples/models/llama/export_llama_lib.py @@ -396,7 +396,6 @@ def build_args_parser() -> argparse.ArgumentParser: " [16] pattern specifies all layers have sliding window of 16.", ) - parser.add_argument("-2", "--fairseq2", action="store_true") parser.add_argument("-v", "--verbose", action="store_true") parser.add_argument( "-X", diff --git a/examples/models/llama/model.py b/examples/models/llama/model.py index ca6858517be..bc019444a7d 100644 --- a/examples/models/llama/model.py +++ b/examples/models/llama/model.py @@ -21,17 +21,6 @@ from executorch.extension.llm.export.config.llm_config import LlmConfig from torchao.utils import TorchAOBaseTensor -try: - from .fairseq2 import convert_to_llama_checkpoint - -except ImportError: - - def convert_to_llama_checkpoint(**kwargs): - raise NotImplementedError( - "Please install fairseq2 with `pip install fairseq2`." - ) - - from ..model_base import EagerModelBase @@ -70,33 +59,10 @@ def __init__(self, llm_config: Optional[LlmConfig] = None): checkpoint = {} if checkpoint_path: checkpoint = torch.load(checkpoint_path, map_location=device, mmap=True) - - # If given checkpoint is fairseq, convert to llama checkpoint. - fairseq2_checkpoint = self.llm_config.base.fairseq2 - if fairseq2_checkpoint: - print("Using fairseq2 checkpoint") - checkpoint = convert_to_llama_checkpoint(checkpoint=checkpoint) if "model" in checkpoint: # NB: some checkpoint contains a "model" field, which is the actual weights dict checkpoint = checkpoint["model"] - # Check if user gave a fairseq2 checkpoint unknowingly without specifying --fairseq2. - if (not fairseq2_checkpoint) and checkpoint.get( - "final_proj.weight", None - ) is not None: - raise ValueError( - """ -************************************************************ -This looks like a Fairseq2 checkpoint (based on the presence -of `final_proj.weight`. - -You can import Fairseq2 checkpoints using the --fairseq2 -option, but --fairseq2 was not specified. Please verify -the checkpoint format to avoid generating faulty models. -************************************************************ -""" - ) - # Get optional params. params = {} if params_path: diff --git a/examples/models/llama/source_transformation/quantize.py b/examples/models/llama/source_transformation/quantize.py index a9412d513c7..b9076f90795 100644 --- a/examples/models/llama/source_transformation/quantize.py +++ b/examples/models/llama/source_transformation/quantize.py @@ -17,21 +17,6 @@ from executorch.extension.llm.export.builder import DType -try: - from fairseq2.nn.embedding import ( - Embedding as fsEmbedding, - StandardEmbedding as fsStandardEmbedding, - ) - - from fairseq2.nn.projection import Linear as fsLinear - - print("Using fairseq2 modules.") -except: - fsEmbedding = nn.Embedding - fsStandardEmbedding = nn.Embedding - fsLinear = nn.Linear - - def quantize( # noqa C901 model: torch.nn.Module, qmode: str, @@ -400,7 +385,7 @@ def create_quantized_state_dict(self) -> Dict: for fqn, mod in self.mod.named_modules(): # print(f"maybe? quantize {fqn}...{type(mod)}") - if isinstance(mod, torch.nn.Linear) or isinstance(mod, fsLinear): + if isinstance(mod, torch.nn.Linear): # print(f"candidate {fqn}, nodetype {self.node_type}") if ( (self.node_type == "*") diff --git a/extension/llm/export/config/llm_config.py b/extension/llm/export/config/llm_config.py index 0dd8162a42f..b40fad88a9c 100644 --- a/extension/llm/export/config/llm_config.py +++ b/extension/llm/export/config/llm_config.py @@ -86,7 +86,6 @@ class BaseConfig: e.g. '"{\"get_bos_id\":128000, \"get_eos_ids\":[128009, 128001]}"' use_lora: Only for use with QAT. Rank of the LoRA adapter, disabled if set to 0. - fairseq2: For legacy internal use cases, this is safe to ignore. preq_mode: Legacy option to specify how prequantized weights are loaded. Going forward, ExecuTorch supports loading weights prequantized through TorchAo as-is, without any special handling. @@ -103,7 +102,6 @@ class BaseConfig: tokenizer_path: Optional[str] = None metadata: Optional[str] = None use_lora: int = 0 - fairseq2: bool = False preq_mode: Optional[PreqMode] = None preq_group_size: int = 32 preq_embedding_quantize: str = "8,0" @@ -535,8 +533,6 @@ def from_args(cls, args: argparse.Namespace) -> "LlmConfig": # noqa: C901 llm_config.base.metadata = args.metadata if hasattr(args, "use_lora"): llm_config.base.use_lora = args.use_lora - if hasattr(args, "fairseq2"): - llm_config.base.fairseq2 = args.fairseq2 # PreqMode settings if hasattr(args, "preq_mode") and args.preq_mode: