From 5465132e84bfaf0f5658b27fc3339ea673f339ae Mon Sep 17 00:00:00 2001 From: Jack <32371937+jackzhxng@users.noreply.github.com> Date: Fri, 21 Feb 2025 12:58:19 -0800 Subject: [PATCH 1/2] [DEMO] See dequantized q_proj. value --- torchao/quantization/GPTQ.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/torchao/quantization/GPTQ.py b/torchao/quantization/GPTQ.py index cb7c8d0481..640db27697 100644 --- a/torchao/quantization/GPTQ.py +++ b/torchao/quantization/GPTQ.py @@ -955,6 +955,13 @@ def linear_forward_8da4w( precision, ) + print(f"w_dq dequantized: {w_dq}") + q_tensors = torch.load("/home/jackzhxng/torchrepos/executorch/fake_quantized_and_original_weights.pt") + correct_dequantized = q_tensors['q_after_quant_dequant'] + torch.testing.assert_close(correct_dequantized, w_dq) + print("Weights quantized properly") + exit() + # x = x.to(torch.float16) # w_dq = w_dq.to(torch.float16) c = torch.nn.functional.linear(x, w_dq) From a1cd570be2ff8d4f2bc5d7b6520a48d105f5c083 Mon Sep 17 00:00:00 2001 From: Jack <32371937+jackzhxng@users.noreply.github.com> Date: Mon, 24 Feb 2025 14:07:47 -0800 Subject: [PATCH 2/2] Update GPTQ.py --- torchao/quantization/GPTQ.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/torchao/quantization/GPTQ.py b/torchao/quantization/GPTQ.py index 640db27697..ed782a7350 100644 --- a/torchao/quantization/GPTQ.py +++ b/torchao/quantization/GPTQ.py @@ -957,8 +957,10 @@ def linear_forward_8da4w( print(f"w_dq dequantized: {w_dq}") q_tensors = torch.load("/home/jackzhxng/torchrepos/executorch/fake_quantized_and_original_weights.pt") - correct_dequantized = q_tensors['q_after_quant_dequant'] + correct_dequantized = q_tensors['q_after_quant_dequant'].to(torch.float32) torch.testing.assert_close(correct_dequantized, w_dq) + snr = 20 * torch.log10(torch.norm(w_dq, p=2) / torch.norm(w_dq - correct_dequantized, p=2) + assert snr.item() == 0 print("Weights quantized properly") exit()