From b88478ef054a7a2b156311a08b6d2874660dcec6 Mon Sep 17 00:00:00 2001 From: andrewor14 Date: Thu, 11 Sep 2025 11:14:05 -0700 Subject: [PATCH] [pt2e] Make prepare and convert faster by caching **Summary:** This is the torchao version of https://github.com/pytorch/pytorch/pull/162550 by @navsud. Including the PR description here again: D79674759 tried to fix the expensive prepare and convert steps, as assert_and_get_unique_device was called multiple times. This change fixes that issue by using functools.cache decorator. **Test Plan:** Verified on llm export to QNN. LLM Quantization prepare time of ~20min reduced to ~3min. --- torchao/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/torchao/utils.py b/torchao/utils.py index 652e7f33f1..daf7eab83c 100644 --- a/torchao/utils.py +++ b/torchao/utils.py @@ -49,6 +49,7 @@ # Referenced from: https://github.com/pytorch/pytorch/blob/9105d54c6b37099575c0059ef274c86c4dc80c57/torch/ao/quantization/utils.py#L711 +@functools.cache def _assert_and_get_unique_device(module: torch.nn.Module) -> Any: """ Returns the unique device for a module, or None if no device is found.