[AMDGPU][True16][MC] Support v_swap_b16. #100442

broxigarchen · 2024-07-24T18:14:06Z

support V_SWAP_B16 true16 encoding in asm/disasm for GFX11/12

github-actions · 2024-07-24T18:14:23Z

Thank you for submitting a Pull Request (PR) to the LLVM Project!

This PR will be automatically labeled and the relevant teams will be
notified.

If you wish to, you can add reviewers by using the "Reviewers" section on this page.

If this is not working for you, it is probably because you do not have write
permissions for the repository. In which case you can instead tag reviewers by
name in a comment by using @ followed by their GitHub username.

If you have received no comments on your PR for a week, you can request a review
by "ping"ing the PR by adding a comment “Ping”. The common courtesy "ping" rate
is once a week. Please remember that you are asking for valuable time from other developers.

If you have further questions, they may be answered by the LLVM GitHub User Guide.

You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums.

llvmbot · 2024-07-24T19:19:32Z

@llvm/pr-subscribers-mc

Author: Brox Chen (broxigarchen)

Changes

Added v_swap_b16 support in AMDGPU codeGen

Full diff: https://github.com/llvm/llvm-project/pull/100442.diff

7 Files Affected:

(modified) llvm/lib/Target/AMDGPU/SIInstrFormats.td (+6)
(modified) llvm/lib/Target/AMDGPU/VOP1Instructions.td (+17-1)
(modified) llvm/test/MC/AMDGPU/gfx10_unsupported.s (+3)
(modified) llvm/test/MC/AMDGPU/gfx11_asm_vop1.s (+9)
(modified) llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s (+12)
(modified) llvm/test/MC/Disassembler/AMDGPU/decode-err.txt (+9)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt (+6)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
index 1fe8beafd5e5d..9b506eb0a711a 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
@@ -321,6 +321,12 @@ def VOPDstOperand_t16Lo128 : VOPDstOperand <VGPR_16_Lo128> {
   let DecoderMethod = "DecodeVGPR_16_Lo128RegisterClass";
 }
 
+// Source-encoded destination operand for instructions like v_swap_b16.
+def VOPSrcEncodedDstOperand_t16Lo128 : VOPDstOperand <VGPR_16_Lo128> {
+  let EncoderMethod = VSrcT_b16_Lo128.EncoderMethod;
+  let DecoderMethod = VSrcT_b16_Lo128.DecoderMethod;
+}
+
 class VINTRPe <bits<2> op> : Enc32 {
   bits<8> vdst;
   bits<8> vsrc;
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 2c0d61ee4afa1..c08903d03939c 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -729,7 +729,22 @@ def V_ACCVGPR_MOV_B32 : VOP1_Pseudo<"v_accvgpr_mov_b32", VOPProfileAccMov, [], 1
   let isAsCheapAsAMove = 1;
 }
 
+def VOP_SWAP_I16 : VOPProfile_True16<VOP_I16_I16> {
+  let Outs32 = (outs VOPDstOperand_t16Lo128:$vdst,
+                     VOPSrcEncodedDstOperand_t16Lo128:$vdst1);
+  let Ins32 = (ins VOPSrcEncodedDstOperand_t16Lo128:$src0,
+                   VOPDstOperand_t16Lo128:$src1);
+  let Asm32 = " $vdst, $src0";
+}
+
 let SubtargetPredicate = isGFX11Plus in {
+  def V_SWAP_B16 : VOP1_Pseudo<"v_swap_b16", VOP_SWAP_I16, [], /* VOP1Only= */ 1> {
+    let Constraints = "$vdst = $src1, $vdst1 = $src0";
+    let DisableEncoding = "$vdst1, $src1";
+    let SchedRW = [Write64Bit, Write64Bit];
+  }
+  // TODO-GFX11 select new insts
+  defm V_MOV_B16_t16        : VOP1Inst<"v_mov_b16_t16", VOPProfile_True16<VOP_I16_I16>>;
   // Restrict src0 to be VGPR
   def V_PERMLANE64_B32 : VOP1_Pseudo<"v_permlane64_b32", VOP_MOVRELS,
                                       [], /*VOP1Only=*/ 1> {
@@ -952,7 +967,8 @@ defm V_CTZ_I32_B32         : VOP1_Real_FULL_with_name_gfx11_gfx12<0x03a,
   "V_FFBL_B32", "v_ctz_i32_b32">;
 defm V_CLS_I32             : VOP1_Real_FULL_with_name_gfx11_gfx12<0x03b,
   "V_FFBH_I32", "v_cls_i32">;
-defm V_PERMLANE64_B32      : VOP1Only_Real_gfx11_gfx12<0x067>;
+defm V_SWAP_B16              : VOP1Only_Real_gfx11_gfx12<0x066>;
+defm V_PERMLANE64_B32        : VOP1Only_Real_gfx11_gfx12<0x067>;
 defm V_MOV_B16_t16           : VOP1_Real_FULL_t16_gfx11_gfx12<0x01c, "v_mov_b16">;
 defm V_NOT_B16_fake16        : VOP1_Real_FULL_t16_gfx11_gfx12<0x069, "v_not_b16">;
 defm V_CVT_I32_I16_fake16    : VOP1_Real_FULL_t16_gfx11_gfx12<0x06a, "v_cvt_i32_i16">;
diff --git a/llvm/test/MC/AMDGPU/gfx10_unsupported.s b/llvm/test/MC/AMDGPU/gfx10_unsupported.s
index 46b4e6ffb4037..1374417ac354b 100644
--- a/llvm/test/MC/AMDGPU/gfx10_unsupported.s
+++ b/llvm/test/MC/AMDGPU/gfx10_unsupported.s
@@ -3287,6 +3287,9 @@ v_subrev_u32_e64 v255, s[12:13], v1, v2
 v_subrev_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
 // CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
 
+v_swap_b16 v0.l, v0.l
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
 v_wmma_bf16_16x16x16_bf16 v[16:19], 1.0, v[8:15], v[16:19]
 // CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
 
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s
index d95ef6f15e48d..90d5ca7f72751 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s
@@ -3448,6 +3448,15 @@ v_sqrt_f64 v[5:6], src_scc
 v_sqrt_f64 v[254:255], 0xaf123456
 // GFX11: encoding: [0xff,0x68,0xfc,0x7f,0x56,0x34,0x12,0xaf]
 
+v_swap_b16 v5.l, v1.h
+// GFX11: encoding: [0x81,0xcd,0x0a,0x7e]
+
+v_swap_b16 v5.h, v1.l
+// GFX11: encoding: [0x01,0xcd,0x0a,0x7f]
+
+v_swap_b16 v127.l, v127.l
+// GFX11: encoding: [0x7f,0xcd,0xfe,0x7e]
+
 v_swap_b32 v5, v1
 // GFX11: encoding: [0x01,0xcb,0x0a,0x7e]
 
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s
index 5b5381b752feb..ab587a524fc6e 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s
@@ -211,6 +211,18 @@ v_sqrt_f16_e32 v255.l, v1.l
 v_sqrt_f16_e32 v5.l, v199.l
 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
+v_swap_b16_e32 v128.l, v0.l
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swap_b16_e32 v0.l, s0
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swap_b16_e32 v0.l, 0
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swap_b16_e32 v0.l, 0xfe0b
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
 v_trunc_f16_e32 v128, 0xfe0b
 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/decode-err.txt b/llvm/test/MC/Disassembler/AMDGPU/decode-err.txt
index f6d2a19326e1d..c15b65728b247 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/decode-err.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/decode-err.txt
@@ -19,6 +19,15 @@
 # W64: [[@LINE+1]]:1: warning: invalid instruction encoding
 0xff,0x04,0x02,0xc9,0x03,0x03,0x06,0x05,0x56,0x34,0x12,0xaf
 
+# GFX11: v_swap_b16 v5.h, s1/*Invalid register, operand has 'VGPR_16_Lo128' register class*/ ; encoding: [0x01,0xcc,0x0a,0x7f]
+0x01,0xcc,0x0a,0x7f
+
+# GFX11: v_swap_b16 v5.h, 0x3c00/*Invalid immediate*/ ; encoding: [0x00,0xcc,0x0a,0x7f]
+0xf2,0xcc,0x0a,0x7f
+
+# GFX11: v_swap_b16 v5.h, 0x78563412/*Invalid immediate*/ ; encoding: [0x12,0xcc,0x0a,0x7f]
+0xff,0xcc,0x0a,0x7f,0x12,0x34,0x56,0x78
+
 # W32: v_wmma_f32_16x16x16_f16 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x40,0x40,0xcc,0x00,0x11,0x42,0x1c]
 # W64: v_wmma_f32_16x16x16_f16 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x40,0x40,0xcc,0x00,0x11,0x42,0x1c]
 0x10,0x40,0x40,0xcc,0x00,0x11,0x42,0x1c
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt
index b176a57d70f86..778f7deb4ec1a 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt
@@ -3344,6 +3344,12 @@
 # GFX11: v_sqrt_f64_e32 v[254:255], 0xaf123456   ; encoding: [0xff,0x68,0xfc,0x7f,0x56,0x34,0x12,0xaf]
 0xff,0x68,0xfc,0x7f,0x56,0x34,0x12,0xaf
 
+# GFX11: v_swap_b16 v5.l, v1.h                   ; encoding: [0x81,0xcd,0x0a,0x7e]
+0x81,0xcd,0x0a,0x7e
+
+# GFX11: v_swap_b16 v5.h, v1.l                   ; encoding: [0x01,0xcd,0x0a,0x7f]
+0x01,0xcd,0x0a,0x7f
+
 # GFX11: v_swap_b32 v5, v1                       ; encoding: [0x01,0xcb,0x0a,0x7e]
 0x01,0xcb,0x0a,0x7e

llvmbot · 2024-07-24T19:19:32Z

@llvm/pr-subscribers-backend-amdgpu

Author: Brox Chen (broxigarchen)

Changes

Added v_swap_b16 support in AMDGPU codeGen

Full diff: https://github.com/llvm/llvm-project/pull/100442.diff

7 Files Affected:

(modified) llvm/lib/Target/AMDGPU/SIInstrFormats.td (+6)
(modified) llvm/lib/Target/AMDGPU/VOP1Instructions.td (+17-1)
(modified) llvm/test/MC/AMDGPU/gfx10_unsupported.s (+3)
(modified) llvm/test/MC/AMDGPU/gfx11_asm_vop1.s (+9)
(modified) llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s (+12)
(modified) llvm/test/MC/Disassembler/AMDGPU/decode-err.txt (+9)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt (+6)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
index 1fe8beafd5e5d..9b506eb0a711a 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
@@ -321,6 +321,12 @@ def VOPDstOperand_t16Lo128 : VOPDstOperand <VGPR_16_Lo128> {
   let DecoderMethod = "DecodeVGPR_16_Lo128RegisterClass";
 }
 
+// Source-encoded destination operand for instructions like v_swap_b16.
+def VOPSrcEncodedDstOperand_t16Lo128 : VOPDstOperand <VGPR_16_Lo128> {
+  let EncoderMethod = VSrcT_b16_Lo128.EncoderMethod;
+  let DecoderMethod = VSrcT_b16_Lo128.DecoderMethod;
+}
+
 class VINTRPe <bits<2> op> : Enc32 {
   bits<8> vdst;
   bits<8> vsrc;
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 2c0d61ee4afa1..c08903d03939c 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -729,7 +729,22 @@ def V_ACCVGPR_MOV_B32 : VOP1_Pseudo<"v_accvgpr_mov_b32", VOPProfileAccMov, [], 1
   let isAsCheapAsAMove = 1;
 }
 
+def VOP_SWAP_I16 : VOPProfile_True16<VOP_I16_I16> {
+  let Outs32 = (outs VOPDstOperand_t16Lo128:$vdst,
+                     VOPSrcEncodedDstOperand_t16Lo128:$vdst1);
+  let Ins32 = (ins VOPSrcEncodedDstOperand_t16Lo128:$src0,
+                   VOPDstOperand_t16Lo128:$src1);
+  let Asm32 = " $vdst, $src0";
+}
+
 let SubtargetPredicate = isGFX11Plus in {
+  def V_SWAP_B16 : VOP1_Pseudo<"v_swap_b16", VOP_SWAP_I16, [], /* VOP1Only= */ 1> {
+    let Constraints = "$vdst = $src1, $vdst1 = $src0";
+    let DisableEncoding = "$vdst1, $src1";
+    let SchedRW = [Write64Bit, Write64Bit];
+  }
+  // TODO-GFX11 select new insts
+  defm V_MOV_B16_t16        : VOP1Inst<"v_mov_b16_t16", VOPProfile_True16<VOP_I16_I16>>;
   // Restrict src0 to be VGPR
   def V_PERMLANE64_B32 : VOP1_Pseudo<"v_permlane64_b32", VOP_MOVRELS,
                                       [], /*VOP1Only=*/ 1> {
@@ -952,7 +967,8 @@ defm V_CTZ_I32_B32         : VOP1_Real_FULL_with_name_gfx11_gfx12<0x03a,
   "V_FFBL_B32", "v_ctz_i32_b32">;
 defm V_CLS_I32             : VOP1_Real_FULL_with_name_gfx11_gfx12<0x03b,
   "V_FFBH_I32", "v_cls_i32">;
-defm V_PERMLANE64_B32      : VOP1Only_Real_gfx11_gfx12<0x067>;
+defm V_SWAP_B16              : VOP1Only_Real_gfx11_gfx12<0x066>;
+defm V_PERMLANE64_B32        : VOP1Only_Real_gfx11_gfx12<0x067>;
 defm V_MOV_B16_t16           : VOP1_Real_FULL_t16_gfx11_gfx12<0x01c, "v_mov_b16">;
 defm V_NOT_B16_fake16        : VOP1_Real_FULL_t16_gfx11_gfx12<0x069, "v_not_b16">;
 defm V_CVT_I32_I16_fake16    : VOP1_Real_FULL_t16_gfx11_gfx12<0x06a, "v_cvt_i32_i16">;
diff --git a/llvm/test/MC/AMDGPU/gfx10_unsupported.s b/llvm/test/MC/AMDGPU/gfx10_unsupported.s
index 46b4e6ffb4037..1374417ac354b 100644
--- a/llvm/test/MC/AMDGPU/gfx10_unsupported.s
+++ b/llvm/test/MC/AMDGPU/gfx10_unsupported.s
@@ -3287,6 +3287,9 @@ v_subrev_u32_e64 v255, s[12:13], v1, v2
 v_subrev_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
 // CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
 
+v_swap_b16 v0.l, v0.l
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
 v_wmma_bf16_16x16x16_bf16 v[16:19], 1.0, v[8:15], v[16:19]
 // CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
 
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s
index d95ef6f15e48d..90d5ca7f72751 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s
@@ -3448,6 +3448,15 @@ v_sqrt_f64 v[5:6], src_scc
 v_sqrt_f64 v[254:255], 0xaf123456
 // GFX11: encoding: [0xff,0x68,0xfc,0x7f,0x56,0x34,0x12,0xaf]
 
+v_swap_b16 v5.l, v1.h
+// GFX11: encoding: [0x81,0xcd,0x0a,0x7e]
+
+v_swap_b16 v5.h, v1.l
+// GFX11: encoding: [0x01,0xcd,0x0a,0x7f]
+
+v_swap_b16 v127.l, v127.l
+// GFX11: encoding: [0x7f,0xcd,0xfe,0x7e]
+
 v_swap_b32 v5, v1
 // GFX11: encoding: [0x01,0xcb,0x0a,0x7e]
 
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s
index 5b5381b752feb..ab587a524fc6e 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s
@@ -211,6 +211,18 @@ v_sqrt_f16_e32 v255.l, v1.l
 v_sqrt_f16_e32 v5.l, v199.l
 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
+v_swap_b16_e32 v128.l, v0.l
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swap_b16_e32 v0.l, s0
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swap_b16_e32 v0.l, 0
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swap_b16_e32 v0.l, 0xfe0b
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
 v_trunc_f16_e32 v128, 0xfe0b
 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/decode-err.txt b/llvm/test/MC/Disassembler/AMDGPU/decode-err.txt
index f6d2a19326e1d..c15b65728b247 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/decode-err.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/decode-err.txt
@@ -19,6 +19,15 @@
 # W64: [[@LINE+1]]:1: warning: invalid instruction encoding
 0xff,0x04,0x02,0xc9,0x03,0x03,0x06,0x05,0x56,0x34,0x12,0xaf
 
+# GFX11: v_swap_b16 v5.h, s1/*Invalid register, operand has 'VGPR_16_Lo128' register class*/ ; encoding: [0x01,0xcc,0x0a,0x7f]
+0x01,0xcc,0x0a,0x7f
+
+# GFX11: v_swap_b16 v5.h, 0x3c00/*Invalid immediate*/ ; encoding: [0x00,0xcc,0x0a,0x7f]
+0xf2,0xcc,0x0a,0x7f
+
+# GFX11: v_swap_b16 v5.h, 0x78563412/*Invalid immediate*/ ; encoding: [0x12,0xcc,0x0a,0x7f]
+0xff,0xcc,0x0a,0x7f,0x12,0x34,0x56,0x78
+
 # W32: v_wmma_f32_16x16x16_f16 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x40,0x40,0xcc,0x00,0x11,0x42,0x1c]
 # W64: v_wmma_f32_16x16x16_f16 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x40,0x40,0xcc,0x00,0x11,0x42,0x1c]
 0x10,0x40,0x40,0xcc,0x00,0x11,0x42,0x1c
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt
index b176a57d70f86..778f7deb4ec1a 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt
@@ -3344,6 +3344,12 @@
 # GFX11: v_sqrt_f64_e32 v[254:255], 0xaf123456   ; encoding: [0xff,0x68,0xfc,0x7f,0x56,0x34,0x12,0xaf]
 0xff,0x68,0xfc,0x7f,0x56,0x34,0x12,0xaf
 
+# GFX11: v_swap_b16 v5.l, v1.h                   ; encoding: [0x81,0xcd,0x0a,0x7e]
+0x81,0xcd,0x0a,0x7e
+
+# GFX11: v_swap_b16 v5.h, v1.l                   ; encoding: [0x01,0xcd,0x0a,0x7f]
+0x01,0xcd,0x0a,0x7f
+
 # GFX11: v_swap_b32 v5, v1                       ; encoding: [0x01,0xcb,0x0a,0x7e]
 0x01,0xcb,0x0a,0x7e

arsenm

Description is slightly misleading, this does not emit it from codegen and only handles the encoding/decoding

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Sisyph

Needs gfx12 assembler and disassembler tests.

Should resolve
#61622

llvm/test/CodeGen/AMDGPU/v_swap_b16.ll

jayfoad · 2024-07-25T10:24:47Z

Can you please split this into one patch which just adds MC (assembler/disassembler) support and tests, and then a second patch which adds CodeGen support and tests?

DadSchoorse · 2024-07-25T17:41:37Z

llvm/lib/Target/AMDGPU/VOP1Instructions.td

 let SubtargetPredicate = isGFX11Plus in {
+  def V_SWAP_B16 : VOP1_Pseudo<"v_swap_b16", VOP_SWAP_I16, [], /* VOP1Only= */ 1> {


Are you sure this opcode is VOP1 only? When I implemented support for v_swap_b16 in our compiler, I tested it with VOP3 encoding and it worked fine. VOP3 v_swap_b16 is nessecary to address v128-v255.

According to the docs this should be VOP1 only for GFX11/12. I am not sure if such usage is supported when VOP3 is not listed in supported format. Hi @Sisyph @kosarev Do you know anything about this?

It's VOP1 only in GFX11 and GFX12, and it does support v0-v255 for both operands. UPDATE: As a VOP1 instruction it only supports registers up to v127.l/.h, of course, which is very unfortunate.

That can't be true, VOP1 v_swap_b16 definitely operates on v0-v127.lo/.hi. There is no way to encode v128-v255 without VOP3, just like for other true16 instructions.

Right, the lo/hi selector goes to the MSB of the operand fields, so can't encode registers beyond v127.

From what I see, officially there's no VOP3 form, but I could ask ISA people to be sure. Which targets did you try this on, exactly?

I did all my gfx11 testing on navi31.

OK, I did inquire on this and will update this thread as soon as I have a response. Thanks for reporting.

The response from our HW guys reads that the VOP3 codes are not guaranteed to work correctly under all possible circumstances. Meaning, they should not be considered supported regardless of observed side effects.

Thanks for investigating, even if the answer is a bit disappointing.

broxigarchen · 2024-07-26T03:43:50Z

Can you please split this into one patch which just adds MC (assembler/disassembler) support and tests, and then a second patch which adds CodeGen support and tests?

Thanks for the comment! Just trim the patch to contain only MC changes

Sisyph · 2024-07-26T20:52:37Z

Can you please commit a separate patch first that contains the changes to llvm/test/MC/AMDGPU/gfx12_asm_vop1-fake16.s and llvm/test/MC/AMDGPU/gfx12_asm_vop1.s minus v_swap_b16? The files look fine, just adding that bulk test update should be separate from the functional change to v_swap_b16.

broxigarchen · 2024-07-29T15:21:37Z

Can you please commit a separate patch first that contains the changes to llvm/test/MC/AMDGPU/gfx12_asm_vop1-fake16.s and llvm/test/MC/AMDGPU/gfx12_asm_vop1.s minus v_swap_b16? The files look fine, just adding that bulk test update should be separate from the functional change to v_swap_b16.

Created a seperate PR here #100849

Sisyph

LGTM. If the VOP3 form is supported, we can add a follow up patch.

broxigarchen · 2024-08-01T18:04:33Z

LGTM. If the VOP3 form is supported, we can add a follow up patch.

Thanks!

broxigarchen · 2024-08-01T18:05:15Z

Hi I think this PR is ready. Can anyone help to merge this PR? Thanks!

github-actions · 2024-08-01T18:09:04Z

@broxigarchen Congratulations on having your first Pull Request (PR) merged into the LLVM Project!

Your changes will be combined with recent changes from other authors, then tested
by our build bots. If there is a problem with a build, you may receive a report in an email or a comment on this PR.

Please check whether problems have been caused by your change specifically, as
the builds can include changes from many authors. It is not uncommon for your
change to be included in a build that fails due to someone else's changes, or
infrastructure issues.

How to do this, and the rest of the post-merge process, is covered in detail here.

If your change does cause a problem, it may be reverted, or you can revert it yourself.
This is a normal part of LLVM development. You can fix your changes and open a new PR to merge them again.

If you don't get any reports, no action is required from you. Your changes are working as expected, well done!

v_swap_b16 is not offically supported as VOP3, so it can't be used with v128-255. Tests show that VOP3 appears to work correctly, but according to AMD that should not be relied on. llvm/llvm-project#100442 (comment) Foz-DB Navi31: Totals from 6 (0.01% of 79395) affected shaders: Instrs: 64799 -> 65932 (+1.75%) CodeSize: 360180 -> 368440 (+2.29%) Latency: 1364648 -> 1365922 (+0.09%) InvThroughput: 635843 -> 636475 (+0.10%) Copies: 14766 -> 15698 (+6.31%) VALU: 38743 -> 39675 (+2.41%) Fixes: 80b8bbf ("aco/gfx11: use v_swap_b16") Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30515>

v_swap_b16 is not offically supported as VOP3, so it can't be used with v128-255. Tests show that VOP3 appears to work correctly, but according to AMD that should not be relied on. llvm/llvm-project#100442 (comment) Foz-DB Navi31: Totals from 6 (0.01% of 79395) affected shaders: Instrs: 64799 -> 65932 (+1.75%) CodeSize: 360180 -> 368440 (+2.29%) Latency: 1364648 -> 1365922 (+0.09%) InvThroughput: 635843 -> 636475 (+0.10%) Copies: 14766 -> 15698 (+6.31%) VALU: 38743 -> 39675 (+2.41%) Fixes: 80b8bbf ("aco/gfx11: use v_swap_b16") Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30515> (cherry picked from commit e0818cb)

support V_SWAP_B16 true16 encoding in asm/disasm for GFX11/12 Co-authored-by: guochen2 <guochen2@amd.com>

broxigarchen force-pushed the main-merge-fake16 branch from b5b892a to 7b60ebd Compare July 24, 2024 19:18

broxigarchen marked this pull request as ready for review July 24, 2024 19:19

llvmbot added backend:AMDGPU mc Machine (object) code labels Jul 24, 2024

arsenm reviewed Jul 24, 2024

View reviewed changes

llvm/lib/Target/AMDGPU/VOP1Instructions.td Outdated Show resolved Hide resolved

broxigarchen force-pushed the main-merge-fake16 branch from 7b60ebd to 6e732d4 Compare July 24, 2024 19:48

Sisyph requested review from kosarev and jayfoad July 24, 2024 20:08

Sisyph reviewed Jul 24, 2024

View reviewed changes

llvm/test/CodeGen/AMDGPU/v_swap_b16.ll Outdated Show resolved Hide resolved

DadSchoorse reviewed Jul 25, 2024

View reviewed changes

broxigarchen force-pushed the main-merge-fake16 branch 2 times, most recently from eb09da0 to d80b7b0 Compare July 26, 2024 03:28

broxigarchen requested review from Sisyph and arsenm July 26, 2024 15:26

broxigarchen mentioned this pull request Jul 29, 2024

[AMDGPU][True16][MC] duplicate vop1 tests to fake16 and update real-true16 flags for GFX12 #100849

Merged

[True16][MC] support V_SWAP_B16 true16 format in asm/disasm for GFX11/12

ca2fc62

broxigarchen force-pushed the main-merge-fake16 branch from 9b02a91 to ca2fc62 Compare August 1, 2024 14:27

Sisyph approved these changes Aug 1, 2024

View reviewed changes

arsenm merged commit ab91371 into llvm:main Aug 1, 2024
7 checks passed

broxigarchen deleted the main-merge-fake16 branch August 1, 2024 19:15

banach-space pushed a commit to banach-space/llvm-project that referenced this pull request Aug 7, 2024

[AMDGPU][True16][MC] Support v_swap_b16. (llvm#100442)

2ce0da2

support V_SWAP_B16 true16 encoding in asm/disasm for GFX11/12 Co-authored-by: guochen2 <guochen2@amd.com>

broxigarchen mentioned this pull request Aug 13, 2024

Request Commit Access For broxigarchen #100457

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[AMDGPU][True16][MC] Support v_swap_b16. #100442

[AMDGPU][True16][MC] Support v_swap_b16. #100442

broxigarchen commented Jul 24, 2024 •

edited

Loading

github-actions bot commented Jul 24, 2024

llvmbot commented Jul 24, 2024

llvmbot commented Jul 24, 2024

arsenm left a comment

Sisyph left a comment

jayfoad commented Jul 25, 2024

DadSchoorse Jul 25, 2024

broxigarchen Jul 26, 2024

kosarev Jul 26, 2024 •

edited

Loading

DadSchoorse Jul 26, 2024

kosarev Jul 26, 2024

kosarev Jul 26, 2024

DadSchoorse Jul 26, 2024

kosarev Jul 26, 2024

kosarev Aug 5, 2024

DadSchoorse Aug 5, 2024

broxigarchen commented Jul 26, 2024

Sisyph commented Jul 26, 2024

broxigarchen commented Jul 29, 2024

Sisyph left a comment

broxigarchen commented Aug 1, 2024

broxigarchen commented Aug 1, 2024

github-actions bot commented Aug 1, 2024

		let SubtargetPredicate = isGFX11Plus in {
		def V_SWAP_B16 : VOP1_Pseudo<"v_swap_b16", VOP_SWAP_I16, [], /* VOP1Only= */ 1> {

[AMDGPU][True16][MC] Support v_swap_b16. #100442

[AMDGPU][True16][MC] Support v_swap_b16. #100442

Conversation

broxigarchen commented Jul 24, 2024 • edited Loading

github-actions bot commented Jul 24, 2024

llvmbot commented Jul 24, 2024

llvmbot commented Jul 24, 2024

arsenm left a comment

Choose a reason for hiding this comment

Sisyph left a comment

Choose a reason for hiding this comment

jayfoad commented Jul 25, 2024

Choose a reason for hiding this comment

Choose a reason for hiding this comment

kosarev Jul 26, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

broxigarchen commented Jul 26, 2024

Sisyph commented Jul 26, 2024

broxigarchen commented Jul 29, 2024

Sisyph left a comment

Choose a reason for hiding this comment

broxigarchen commented Aug 1, 2024

broxigarchen commented Aug 1, 2024

github-actions bot commented Aug 1, 2024

broxigarchen commented Jul 24, 2024 •

edited

Loading

kosarev Jul 26, 2024 •

edited

Loading