[AMDGPU] Only try DecoderTables for the current subtarget. NFCI. #82992

jayfoad · 2024-02-26T12:09:02Z

Speed up disassembly by only calling tryDecodeInst for DecoderTables
that make sense for the current subtarget.

This gives a 1.3x speed-up on check-llvm-mc-disassembler-amdgpu in my
Release+Asserts build.

Speed up disassembly by only calling tryDecodeInst for DecoderTables that make sense for the current subtarget. This gives a 1.3x speed-up on check-llvm-mc-disassembler-amdgpu in my Release+Asserts build.

llvmbot · 2024-02-26T12:09:36Z

@llvm/pr-subscribers-backend-amdgpu

Author: Jay Foad (jayfoad)

Changes

Speed up disassembly by only calling tryDecodeInst for DecoderTables
that make sense for the current subtarget.

This gives a 1.3x speed-up on check-llvm-mc-disassembler-amdgpu in my
Release+Asserts build.

Full diff: https://github.com/llvm/llvm-project/pull/82992.diff

3 Files Affected:

(modified) llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (+30-15)
(modified) llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h (+1)
(modified) llvm/lib/Target/AMDGPU/MIMGInstructions.td (+3-3)

diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index e1cca17bdbf432..8c42304ce0bee5 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -466,15 +466,18 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     if (isGFX11Plus() && Bytes.size() >= 12 ) {
       DecoderUInt128 DecW = eat12Bytes(Bytes);
 
-      if (tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
+      if (isGFX11() &&
+          tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
                         DecW, Address, CS))
         break;
 
-      if (tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
+      if (isGFX12() &&
+          tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
                         DecW, Address, CS))
         break;
 
-      if (tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
+      if (isGFX12() &&
+          tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
         break;
     }
 
@@ -507,27 +510,32 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
           tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
         break;
 
-      if (tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
+      if ((isVI() || isGFX9()) &&
+          tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
         break;
 
-      if (tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
+      if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
         break;
 
-      if (tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
+      if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
         break;
 
-      if (tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
+      if (isGFX12() &&
+          tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
                         Address, CS))
         break;
 
-      if (tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
+      if (isGFX11() &&
+          tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
                         Address, CS))
         break;
 
-      if (tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
+      if (isGFX11() &&
+          tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
         break;
 
-      if (tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
+      if (isGFX12() &&
+          tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
         break;
     }
 
@@ -538,13 +546,14 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     if (Bytes.size() >= 4) {
       const uint32_t DW = eatBytes<uint32_t>(Bytes);
 
-      if (tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
+      if ((isVI() || isGFX9()) &&
+          tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
         break;
 
       if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
         break;
 
-      if (tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
+      if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
         break;
 
       if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
@@ -555,14 +564,16 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
           tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
         break;
 
-      if (tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
+      if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
         break;
 
-      if (tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
+      if (isGFX11() &&
+          tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
                         Address, CS))
         break;
 
-      if (tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
+      if (isGFX12() &&
+          tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
                         Address, CS))
         break;
     }
@@ -1750,6 +1761,10 @@ bool AMDGPUDisassembler::isGFX11Plus() const {
   return AMDGPU::isGFX11Plus(STI);
 }
 
+bool AMDGPUDisassembler::isGFX12() const {
+  return STI.hasFeature(AMDGPU::FeatureGFX12);
+}
+
 bool AMDGPUDisassembler::isGFX12Plus() const {
   return AMDGPU::isGFX12Plus(STI);
 }
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 2e1b6fb1c740b7..6a4cb120872089 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -275,6 +275,7 @@ class AMDGPUDisassembler : public MCDisassembler {
   bool isGFX10Plus() const;
   bool isGFX11() const;
   bool isGFX11Plus() const;
+  bool isGFX12() const;
   bool isGFX12Plus() const;
 
   bool hasArchitectedFlatScratch() const;
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index fe4db0ebb0262d..cc374fbae7cc56 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -527,7 +527,7 @@ multiclass MIMG_NoSampler_Src_Helper <mimgopc op, string asm,
     let ssamp = 0 in {
       if op.HAS_GFX10M then {
         def _V1 : MIMG_NoSampler_Helper <op, asm, dst_rc, VGPR_32,
-                                         !if(enableDisasm, "GFX10", "")>;
+                                         !if(enableDisasm, "GFX8", "")>;
         if !not(ExtendedImageInst) then
         def _V1_gfx90a : MIMG_NoSampler_Helper_gfx90a <op, asm, dst_rc, VGPR_32,
                                        !if(enableDisasm, "GFX90A", "")>;
@@ -754,7 +754,7 @@ multiclass MIMG_Store_Addr_Helper <mimgopc op, string asm,
       let ssamp = 0 in {
         if op.HAS_GFX10M then {
           def _V1 : MIMG_Store_Helper <op, asm, data_rc, VGPR_32,
-                                      !if(enableDisasm, "GFX10", "")>;
+                                      !if(enableDisasm, "GFX8", "")>;
           let hasPostISelHook = 1 in
           def _V1_gfx90a : MIMG_Store_Helper_gfx90a <op, asm, data_rc, VGPR_32,
                                       !if(enableDisasm, "GFX90A", "")>;
@@ -1298,7 +1298,7 @@ multiclass MIMG_Sampler_Src_Helper <mimgopc op, string asm,
       if op.HAS_GFX10M then {
         def _V # addr.NumWords
           : MIMG_Sampler_Helper <op, asm, dst_rc, addr.RegClass,
-                                 !if(!and(enableDisasm, addr.Disassemble), "GFX10", "")>;
+                                 !if(!and(enableDisasm, addr.Disassemble), "GFX8", "")>;
         if !not(ExtendedImageInst) then
         def _V # addr.NumWords # _gfx90a
           : MIMG_Sampler_gfx90a <op, asm, dst_rc, addr.RegClass,

jayfoad · 2024-02-26T12:10:30Z

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

@@ -507,27 +510,32 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
          tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
        break;

-      if (tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
+      if ((isVI() || isGFX9()) &&


Historical wart: namespace "GFX8" is used for lots of instructions that are shared with GFX9. Perhaps it should be renamed to "GFX8GFX9" or split into a shared part and a GFX8-only part. But I didn't want to touch that in this patch.

I think it would be better to have a common table for all sub targets with a shared encoding. That looks more like normal cpu targets, where you can emit instructions that will just fault on unsupported CPUs. Really we probably should be using different top level targets for each major encoding change

jayfoad · 2024-02-26T12:11:01Z

llvm/lib/Target/AMDGPU/MIMGInstructions.td

@@ -527,7 +527,7 @@ multiclass MIMG_NoSampler_Src_Helper <mimgopc op, string asm,
    let ssamp = 0 in {
      if op.HAS_GFX10M then {
        def _V1 : MIMG_NoSampler_Helper <op, asm, dst_rc, VGPR_32,
-                                         !if(enableDisasm, "GFX10", "")>;
+                                         !if(enableDisasm, "GFX8", "")>;


This is fixing a mistake that was previously harmless because we tried all namespaces on all subtargets.

kosarev

LGTM. We would not need to do the checks manually if the disassembler TableGen backend a) tests predicates as early as possible and b) allows differently predicated / mutually exclusive instructions in the same decoding table. This would also greatly reduce the size of our decoding tables, which currently contain a predicate node at the end of every single decoding path.

jayfoad · 2024-02-26T12:34:35Z

We would not need to do the checks manually if the disassembler TableGen backend a) tests predicates as early as possible and b) allows differently predicated / mutually exclusive instructions in the same decoding table.

If DisassemblerEmitter worked like that then I don't think we would need to use different DecoderNamespaces at all.

kosarev · 2024-02-26T12:43:46Z

If DisassemblerEmitter worked like that then I don't think we would need to use different DecoderNamespaces at all.

Yes, that's the point, wouldn't need multiple decoding attempts at all. I can imagine DecoderNamespace would still have its use cases where instructions to decode are completely unrelated.

[AMDGPU] Only try DecoderTables for the current subtarget. NFCI.

9d4151d

Speed up disassembly by only calling tryDecodeInst for DecoderTables that make sense for the current subtarget. This gives a 1.3x speed-up on check-llvm-mc-disassembler-amdgpu in my Release+Asserts build.

jayfoad requested review from kosarev and rampitec February 26, 2024 12:09

llvmbot added the backend:AMDGPU label Feb 26, 2024

jayfoad commented Feb 26, 2024

View reviewed changes

kosarev approved these changes Feb 26, 2024

View reviewed changes

arsenm approved these changes Feb 26, 2024

View reviewed changes

jayfoad merged commit 60e7ae3 into llvm:main Feb 26, 2024
5 of 6 checks passed

jayfoad deleted the conditional-decodertables branch February 26, 2024 13:02

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[AMDGPU] Only try DecoderTables for the current subtarget. NFCI. #82992

[AMDGPU] Only try DecoderTables for the current subtarget. NFCI. #82992

jayfoad commented Feb 26, 2024

llvmbot commented Feb 26, 2024

jayfoad Feb 26, 2024

arsenm Feb 26, 2024

jayfoad Feb 26, 2024

kosarev left a comment

jayfoad commented Feb 26, 2024

kosarev commented Feb 26, 2024

[AMDGPU] Only try DecoderTables for the current subtarget. NFCI. #82992

[AMDGPU] Only try DecoderTables for the current subtarget. NFCI. #82992

Conversation

jayfoad commented Feb 26, 2024

llvmbot commented Feb 26, 2024

jayfoad Feb 26, 2024

Choose a reason for hiding this comment

arsenm Feb 26, 2024

Choose a reason for hiding this comment

jayfoad Feb 26, 2024

Choose a reason for hiding this comment

kosarev left a comment

Choose a reason for hiding this comment

jayfoad commented Feb 26, 2024

kosarev commented Feb 26, 2024