From 9a886d2dc4ccc395bea95f35dc27d2ca2061ecde Mon Sep 17 00:00:00 2001 From: Naveen Suda Date: Thu, 25 Sep 2025 15:17:11 -0700 Subject: [PATCH] Enable quantization for bf16 model (#14558) Summary: To save GPU memory `bfloat16` dtype is commonly used for training of LLMs. Currently, the quantizer ignores quantizing the nodes if they are not float32. This change enables quantization of bf16 nodes as well. Reviewed By: billmguo Differential Revision: D82866443 --- backends/qualcomm/quantizer/annotators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/qualcomm/quantizer/annotators.py b/backends/qualcomm/quantizer/annotators.py index d584cd128ec..2649ed5b154 100644 --- a/backends/qualcomm/quantizer/annotators.py +++ b/backends/qualcomm/quantizer/annotators.py @@ -68,7 +68,7 @@ def _is_float_tensor(node: Node): or not isinstance(node.meta["val"], FakeTensor) ): return False - return node.meta["val"].dtype == torch.float32 + return node.meta["val"].dtype in (torch.bfloat16, torch.float32) def _mark_nodes_as_annotated(nodes: List[Node]):