From b88478ef054a7a2b156311a08b6d2874660dcec6 Mon Sep 17 00:00:00 2001
From: andrewor14 <andrewor14@gmail.com>
Date: Thu, 11 Sep 2025 11:14:05 -0700
Subject: [PATCH] [pt2e] Make prepare and convert faster by caching

**Summary:** This is the torchao version of https://github.com/pytorch/pytorch/pull/162550
by @navsud. Including the PR description here again:

D79674759 tried to fix the expensive prepare and convert steps,
as assert_and_get_unique_device was called multiple times.
This change fixes that issue by using functools.cache decorator.

**Test Plan:**
Verified on llm export to QNN.
LLM Quantization prepare time of ~20min reduced to ~3min.
---
 torchao/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/torchao/utils.py b/torchao/utils.py
index 652e7f33f1..daf7eab83c 100644
--- a/torchao/utils.py
+++ b/torchao/utils.py
@@ -49,6 +49,7 @@
 
 
 # Referenced from: https://github.com/pytorch/pytorch/blob/9105d54c6b37099575c0059ef274c86c4dc80c57/torch/ao/quantization/utils.py#L711
+@functools.cache
 def _assert_and_get_unique_device(module: torch.nn.Module) -> Any:
     """
     Returns the unique device for a module, or None if no device is found.