diff --git a/backends/apple/coreml/test/test_coreml_quantizer.py b/backends/apple/coreml/test/test_coreml_quantizer.py index 461044f4d53..8dfb46cbbdc 100644 --- a/backends/apple/coreml/test/test_coreml_quantizer.py +++ b/backends/apple/coreml/test/test_coreml_quantizer.py @@ -15,12 +15,12 @@ ) from executorch.backends.apple.coreml.quantizer import CoreMLQuantizer -from torch._export import capture_pre_autograd_graph from torch.ao.quantization.quantize_pt2e import ( convert_pt2e, prepare_pt2e, prepare_qat_pt2e, ) +from torch.export import export_for_training class TestCoreMLQuantizer: @@ -32,7 +32,7 @@ def quantize_and_compare( ) -> None: assert quantization_type in {"PTQ", "QAT"} - pre_autograd_aten_dialect = capture_pre_autograd_graph(model, example_inputs) + pre_autograd_aten_dialect = export_for_training(model, example_inputs).module() quantization_config = LinearQuantizerConfig.from_dict( { diff --git a/backends/apple/mps/test/test_mps_utils.py b/backends/apple/mps/test/test_mps_utils.py index 6f7d00d7b09..39ce5df5115 100644 --- a/backends/apple/mps/test/test_mps_utils.py +++ b/backends/apple/mps/test/test_mps_utils.py @@ -209,9 +209,9 @@ def lower_module_and_test_output( expected_output = model(*sample_inputs) - model = torch._export.capture_pre_autograd_graph( + model = torch.export.export_for_training( model, sample_inputs, dynamic_shapes=dynamic_shapes - ) + ).module() edge_program = export_to_edge( model, diff --git a/backends/mediatek/quantizer/annotator.py b/backends/mediatek/quantizer/annotator.py index dcbaf58833a..e315599cf7f 100644 --- a/backends/mediatek/quantizer/annotator.py +++ b/backends/mediatek/quantizer/annotator.py @@ -7,8 +7,6 @@ from typing import Callable, List import torch - -from torch._export import capture_pre_autograd_graph from torch._ops import OpOverload from torch._subclasses import FakeTensor @@ -17,6 +15,8 @@ _annotate_input_qspec_map, _annotate_output_qspec, ) + +from torch.export import export_for_training from torch.fx import Graph, Node from torch.fx.passes.utils.matcher_with_name_node_map_utils import ( SubgraphMatcherWithNameNodeMap, @@ -159,7 +159,7 @@ def forward(self, x): return norm, {} for pattern_cls in (ExecuTorchPattern, MTKPattern): - pattern_gm = capture_pre_autograd_graph(pattern_cls(), (torch.randn(3, 3),)) + pattern_gm = export_for_training(pattern_cls(), (torch.randn(3, 3),)).module() matcher = SubgraphMatcherWithNameNodeMap( pattern_gm, ignore_literals=True, remove_overlapping_matches=False ) diff --git a/backends/transforms/test/test_duplicate_dynamic_quant_chain.py b/backends/transforms/test/test_duplicate_dynamic_quant_chain.py index 7d1ef169c8c..637ce807c18 100644 --- a/backends/transforms/test/test_duplicate_dynamic_quant_chain.py +++ b/backends/transforms/test/test_duplicate_dynamic_quant_chain.py @@ -8,7 +8,6 @@ import unittest import torch -import torch._export as export from executorch.backends.transforms.duplicate_dynamic_quant_chain import ( DuplicateDynamicQuantChainPass, ) @@ -59,10 +58,10 @@ def _test_duplicate_chain( # program capture m = copy.deepcopy(m_eager) - m = export.capture_pre_autograd_graph( + m = torch.export.export_for_training( m, example_inputs, - ) + ).module() m = prepare_pt2e(m, quantizer) # Calibrate diff --git a/examples/llm_manual/export_nanogpt.py b/examples/llm_manual/export_nanogpt.py index cf29a69c080..2d69c50ec99 100644 --- a/examples/llm_manual/export_nanogpt.py +++ b/examples/llm_manual/export_nanogpt.py @@ -15,8 +15,7 @@ from executorch.exir import to_edge from model import GPT -from torch._export import capture_pre_autograd_graph -from torch.export import export +from torch.export import export, export_for_training from torch.nn.attention import sdpa_kernel, SDPBackend model = GPT.from_pretrained("gpt2") # use gpt2 weight as pretrained weight @@ -28,7 +27,9 @@ # Trace the model, converting it to a portable intermediate representation. # The torch.no_grad() call tells PyTorch to exclude training-specific logic. with sdpa_kernel([SDPBackend.MATH]), torch.no_grad(): - m = capture_pre_autograd_graph(model, example_inputs, dynamic_shapes=dynamic_shape) + m = export_for_training( + model, example_inputs, dynamic_shapes=dynamic_shape + ).module() traced_model = export(m, example_inputs, dynamic_shapes=dynamic_shape) # Convert the model into a runnable ExecuTorch program. diff --git a/examples/mediatek/aot_utils/oss_utils/utils.py b/examples/mediatek/aot_utils/oss_utils/utils.py index 8b4de4aac3a..cb55822b9de 100755 --- a/examples/mediatek/aot_utils/oss_utils/utils.py +++ b/examples/mediatek/aot_utils/oss_utils/utils.py @@ -30,7 +30,7 @@ def build_executorch_binary( if quant_dtype not in Precision: raise AssertionError(f"No support for Precision {quant_dtype}.") - captured_model = torch._export.capture_pre_autograd_graph(model, inputs) + captured_model = torch.export.export_for_training(model, inputs).module() annotated_model = prepare_pt2e(captured_model, quantizer) print("Quantizing the model...") # calibration diff --git a/examples/mediatek/model_export_scripts/llama.py b/examples/mediatek/model_export_scripts/llama.py index b2fef26a4cf..180195ee2c7 100644 --- a/examples/mediatek/model_export_scripts/llama.py +++ b/examples/mediatek/model_export_scripts/llama.py @@ -318,9 +318,9 @@ def export_to_et_ir( max_num_token, max_cache_size, True ) print("Getting pre autograd ATen Dialect Graph") - pre_autograd_aten_dialect = torch._export.capture_pre_autograd_graph( + pre_autograd_aten_dialect = torch.export.export_for_training( model, example_inputs, dynamic_shapes=dynamic_shapes - ) # NOTE: Will be replaced with export + ).module() # NOTE: Will be replaced with export quantizer = NeuropilotQuantizer() quantizer.setup_precision(getattr(Precision, precision)) prepared_graph = prepare_pt2e(pre_autograd_aten_dialect, quantizer) diff --git a/exir/tests/test_quantization.py b/exir/tests/test_quantization.py index ebe94775221..269a9ee11bc 100644 --- a/exir/tests/test_quantization.py +++ b/exir/tests/test_quantization.py @@ -51,9 +51,9 @@ def test_resnet(self) -> None: m = torchvision.models.resnet18().eval() m_copy = copy.deepcopy(m) # program capture - m = torch._export.capture_pre_autograd_graph( + m = torch.export.export_for_training( m, copy.deepcopy(example_inputs) - ) + ).module() quantizer = XNNPACKQuantizer() operator_config = get_symmetric_quantization_config(is_per_channel=True) diff --git a/extension/llm/export/builder.py b/extension/llm/export/builder.py index ee54fe3660d..71588f44ac9 100644 --- a/extension/llm/export/builder.py +++ b/extension/llm/export/builder.py @@ -29,10 +29,10 @@ from executorch.extension.export_util.utils import export_to_edge, save_pte_program from executorch.extension.llm.tokenizer.utils import get_tokenizer -from torch._export import capture_pre_autograd_graph from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e from torch.ao.quantization.quantizer import Quantizer from torch.ao.quantization.quantizer.composable_quantizer import ComposableQuantizer +from torch.export import export_for_training from torch.nn.attention import SDPBackend FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s" @@ -190,9 +190,9 @@ def capture_pre_autograd_graph(self) -> "LLMEdgeManager": strict=True, ).module() else: - self.pre_autograd_graph_module = capture_pre_autograd_graph( + self.pre_autograd_graph_module = export_for_training( self.model, self.example_inputs, dynamic_shapes=dynamic_shape - ) + ).module() return self