From 7112464db5ec11825d706703e38e6aa73b5af0cc Mon Sep 17 00:00:00 2001
From: Lucy Qiu <lfq@meta.com>
Date: Tue, 2 Dec 2025 15:46:50 -0800
Subject: [PATCH] Remove fairseq from export_llama (#16052)

Summary:
Pull Request resolved: https://github.com/pytorch/executorch/pull/16052

This is not being used anymore; remove it and simplify export_llama. Also remove language_llama, the only user of fairseq.

Reviewed By: larryliu0820

Differential Revision: D87831086
---
 examples/models/llama/export_llama_lib.py     |  1 -
 examples/models/llama/model.py                | 34 -------------------
 .../llama/source_transformation/quantize.py   | 17 +---------
 extension/llm/export/config/llm_config.py     |  4 ---
 4 files changed, 1 insertion(+), 55 deletions(-)

diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py
index c981ac4a239..219cc71ded1 100644
--- a/examples/models/llama/export_llama_lib.py
+++ b/examples/models/llama/export_llama_lib.py
@@ -396,7 +396,6 @@ def build_args_parser() -> argparse.ArgumentParser:
         " [16] pattern specifies all layers have sliding window of 16.",
     )
 
-    parser.add_argument("-2", "--fairseq2", action="store_true")
     parser.add_argument("-v", "--verbose", action="store_true")
     parser.add_argument(
         "-X",
diff --git a/examples/models/llama/model.py b/examples/models/llama/model.py
index ca6858517be..bc019444a7d 100644
--- a/examples/models/llama/model.py
+++ b/examples/models/llama/model.py
@@ -21,17 +21,6 @@
 from executorch.extension.llm.export.config.llm_config import LlmConfig
 from torchao.utils import TorchAOBaseTensor
 
-try:
-    from .fairseq2 import convert_to_llama_checkpoint
-
-except ImportError:
-
-    def convert_to_llama_checkpoint(**kwargs):
-        raise NotImplementedError(
-            "Please install fairseq2 with `pip install fairseq2`."
-        )
-
-
 from ..model_base import EagerModelBase
 
 
@@ -70,33 +59,10 @@ def __init__(self, llm_config: Optional[LlmConfig] = None):
         checkpoint = {}
         if checkpoint_path:
             checkpoint = torch.load(checkpoint_path, map_location=device, mmap=True)
-
-        # If given checkpoint is fairseq, convert to llama checkpoint.
-        fairseq2_checkpoint = self.llm_config.base.fairseq2
-        if fairseq2_checkpoint:
-            print("Using fairseq2 checkpoint")
-            checkpoint = convert_to_llama_checkpoint(checkpoint=checkpoint)
         if "model" in checkpoint:
             # NB: some checkpoint contains a "model" field, which is the actual weights dict
             checkpoint = checkpoint["model"]
 
-        # Check if user gave a fairseq2 checkpoint unknowingly without specifying --fairseq2.
-        if (not fairseq2_checkpoint) and checkpoint.get(
-            "final_proj.weight", None
-        ) is not None:
-            raise ValueError(
-                """
-************************************************************
-This looks like a Fairseq2 checkpoint (based on the presence
-of `final_proj.weight`.
-
-You can import Fairseq2 checkpoints using the --fairseq2
-option, but --fairseq2 was not specified.  Please verify
-the checkpoint format to avoid generating faulty models.
-************************************************************
-"""
-            )
-
         # Get optional params.
         params = {}
         if params_path:
diff --git a/examples/models/llama/source_transformation/quantize.py b/examples/models/llama/source_transformation/quantize.py
index a9412d513c7..b9076f90795 100644
--- a/examples/models/llama/source_transformation/quantize.py
+++ b/examples/models/llama/source_transformation/quantize.py
@@ -17,21 +17,6 @@
 from executorch.extension.llm.export.builder import DType
 
 
-try:
-    from fairseq2.nn.embedding import (
-        Embedding as fsEmbedding,
-        StandardEmbedding as fsStandardEmbedding,
-    )
-
-    from fairseq2.nn.projection import Linear as fsLinear
-
-    print("Using fairseq2 modules.")
-except:
-    fsEmbedding = nn.Embedding
-    fsStandardEmbedding = nn.Embedding
-    fsLinear = nn.Linear
-
-
 def quantize(  # noqa C901
     model: torch.nn.Module,
     qmode: str,
@@ -400,7 +385,7 @@ def create_quantized_state_dict(self) -> Dict:
 
         for fqn, mod in self.mod.named_modules():
             # print(f"maybe? quantize {fqn}...{type(mod)}")
-            if isinstance(mod, torch.nn.Linear) or isinstance(mod, fsLinear):
+            if isinstance(mod, torch.nn.Linear):
                 # print(f"candidate {fqn}, nodetype {self.node_type}")
                 if (
                     (self.node_type == "*")
diff --git a/extension/llm/export/config/llm_config.py b/extension/llm/export/config/llm_config.py
index 0dd8162a42f..b40fad88a9c 100644
--- a/extension/llm/export/config/llm_config.py
+++ b/extension/llm/export/config/llm_config.py
@@ -86,7 +86,6 @@ class BaseConfig:
             e.g. '"{\"get_bos_id\":128000, \"get_eos_ids\":[128009, 128001]}"'
         use_lora: Only for use with QAT. Rank of the LoRA adapter, disabled
             if set to 0.
-        fairseq2: For legacy internal use cases, this is safe to ignore.
         preq_mode: Legacy option to specify how prequantized weights are loaded.
             Going forward, ExecuTorch supports loading weights prequantized through
             TorchAo as-is, without any special handling.
@@ -103,7 +102,6 @@ class BaseConfig:
     tokenizer_path: Optional[str] = None
     metadata: Optional[str] = None
     use_lora: int = 0
-    fairseq2: bool = False
     preq_mode: Optional[PreqMode] = None
     preq_group_size: int = 32
     preq_embedding_quantize: str = "8,0"
@@ -535,8 +533,6 @@ def from_args(cls, args: argparse.Namespace) -> "LlmConfig":  # noqa: C901
             llm_config.base.metadata = args.metadata
         if hasattr(args, "use_lora"):
             llm_config.base.use_lora = args.use_lora
-        if hasattr(args, "fairseq2"):
-            llm_config.base.fairseq2 = args.fairseq2
 
         # PreqMode settings
         if hasattr(args, "preq_mode") and args.preq_mode: