Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AMDGPU] Add new 64-bit SALU instructions #74449

Merged
merged 1 commit into from
Dec 6, 2023
Merged

[AMDGPU] Add new 64-bit SALU instructions #74449

merged 1 commit into from
Dec 6, 2023

Conversation

jayfoad
Copy link
Contributor

@jayfoad jayfoad commented Dec 5, 2023

No description provided.

@llvmbot
Copy link
Collaborator

llvmbot commented Dec 5, 2023

@llvm/pr-subscribers-mc

@llvm/pr-subscribers-backend-amdgpu

Author: Jay Foad (jayfoad)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/74449.diff

4 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/SOPInstructions.td (+17)
  • (modified) llvm/test/MC/AMDGPU/gfx12_asm_sop2.s (+234)
  • (modified) llvm/test/MC/AMDGPU/gfx12_asm_sop2_alias.s (+6)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop2.txt (+234)
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 87f64913c02d0..bfde97735bb20 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -578,6 +578,20 @@ def S_MAX_U32 : SOP2_32 <"s_max_u32",
 } // End isCommutable = 1
 } // End Defs = [SCC]
 
+let SubtargetPredicate = isGFX12Plus in {
+  def S_ADD_U64 : SOP2_64<"s_add_u64">{
+    let isCommutable = 1;
+  }
+
+  def S_SUB_U64 : SOP2_64<"s_sub_u64">;
+
+  def S_MUL_U64 : SOP2_64 <"s_mul_u64",
+    [(set i64:$sdst, (UniformBinFrag<mul> i64:$src0, i64:$src1))]> {
+    let isCommutable = 1;
+  }
+
+} // End SubtargetPredicate = isGFX12Plus
+
 def SelectPat : PatFrag <
   (ops node:$src1, node:$src2),
   (select SCC, $src1, $src2),
@@ -2072,6 +2086,9 @@ defm S_MUL_HI_I32      : SOP2_Real_gfx11_gfx12<0x02e>;
 defm S_CSELECT_B32     : SOP2_Real_gfx11_gfx12<0x030>;
 defm S_CSELECT_B64     : SOP2_Real_gfx11_gfx12<0x031>;
 defm S_PACK_HL_B32_B16 : SOP2_Real_gfx11_gfx12<0x035>;
+defm S_ADD_NC_U64      : SOP2_Real_Renamed_gfx12<0x053, S_ADD_U64, "s_add_nc_u64">;
+defm S_SUB_NC_U64      : SOP2_Real_Renamed_gfx12<0x054, S_SUB_U64, "s_sub_nc_u64">;
+defm S_MUL_U64         : SOP2_Real_gfx12<0x055>;
 
 //===----------------------------------------------------------------------===//
 // SOP2 - GFX1150, GFX12
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_sop2.s b/llvm/test/MC/AMDGPU/gfx12_asm_sop2.s
index 002133bcecdfc..1a898bebde57f 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_sop2.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_sop2.s
@@ -1,5 +1,239 @@
 // RUN: llvm-mc -arch=amdgcn -show-encoding -mcpu=gfx1200 %s | FileCheck -check-prefix=GFX12 %s
 
+s_add_nc_u64 s[0:1], s[2:3], s[4:5]
+// GFX12: encoding: [0x02,0x04,0x80,0xa9]
+
+s_add_nc_u64 s[100:101], s[102:103], s[104:105]
+// GFX12: encoding: [0x66,0x68,0xe4,0xa9]
+
+s_add_nc_u64 s[0:1], s[104:105], s[102:103]
+// GFX12: encoding: [0x68,0x66,0x80,0xa9]
+
+s_add_nc_u64 s[104:105], s[0:1], s[102:103]
+// GFX12: encoding: [0x00,0x66,0xe8,0xa9]
+
+s_add_nc_u64 s[104:105], s[102:103], s[2:3]
+// GFX12: encoding: [0x66,0x02,0xe8,0xa9]
+
+s_add_nc_u64 s[104:105], s[0:1], s[2:3]
+// GFX12: encoding: [0x00,0x02,0xe8,0xa9]
+
+s_add_nc_u64 s[0:1], s[102:103], s[2:3]
+// GFX12: encoding: [0x66,0x02,0x80,0xa9]
+
+s_add_nc_u64 s[0:1], s[2:3], s[102:103]
+// GFX12: encoding: [0x02,0x66,0x80,0xa9]
+
+s_add_nc_u64 exec, s[0:1], s[2:3]
+// GFX12: encoding: [0x00,0x02,0xfe,0xa9]
+
+s_add_nc_u64 vcc, s[0:1], s[2:3]
+// GFX12: encoding: [0x00,0x02,0xea,0xa9]
+
+s_add_nc_u64 s[0:1], exec, s[2:3]
+// GFX12: encoding: [0x7e,0x02,0x80,0xa9]
+
+s_add_nc_u64 s[0:1], vcc, s[2:3]
+// GFX12: encoding: [0x6a,0x02,0x80,0xa9]
+
+s_add_nc_u64 s[0:1], 0, s[2:3]
+// GFX12: encoding: [0x80,0x02,0x80,0xa9]
+
+s_add_nc_u64 s[0:1], -1, s[2:3]
+// GFX12: encoding: [0xc1,0x02,0x80,0xa9]
+
+s_add_nc_u64 s[0:1], 0.5, s[2:3]
+// GFX12: encoding: [0xf0,0x02,0x80,0xa9]
+
+s_add_nc_u64 s[0:1], -4.0, s[2:3]
+// GFX12: encoding: [0xf7,0x02,0x80,0xa9]
+
+s_add_nc_u64 s[0:1], 0x3f717273, s[2:3]
+// GFX12: encoding: [0xff,0x02,0x80,0xa9,0x73,0x72,0x71,0x3f]
+
+s_add_nc_u64 s[0:1], 0xaf123456, s[2:3]
+// GFX12: encoding: [0xff,0x02,0x80,0xa9,0x56,0x34,0x12,0xaf]
+
+s_add_nc_u64 s[0:1], s[2:3], exec
+// GFX12: encoding: [0x02,0x7e,0x80,0xa9]
+
+s_add_nc_u64 s[0:1], s[2:3], vcc
+// GFX12: encoding: [0x02,0x6a,0x80,0xa9]
+
+s_add_nc_u64 s[0:1], s[2:3], 0
+// GFX12: encoding: [0x02,0x80,0x80,0xa9]
+
+s_add_nc_u64 s[0:1], s[2:3], -1
+// GFX12: encoding: [0x02,0xc1,0x80,0xa9]
+
+s_add_nc_u64 s[0:1], s[2:3], 0.5
+// GFX12: encoding: [0x02,0xf0,0x80,0xa9]
+
+s_add_nc_u64 s[0:1], s[2:3], -4.0
+// GFX12: encoding: [0x02,0xf7,0x80,0xa9]
+
+s_add_nc_u64 s[0:1], s[2:3], 0x3f717273
+// GFX12: encoding: [0x02,0xff,0x80,0xa9,0x73,0x72,0x71,0x3f]
+
+s_add_nc_u64 s[0:1], s[2:3], 0xaf123456
+// GFX12: encoding: [0x02,0xff,0x80,0xa9,0x56,0x34,0x12,0xaf]
+
+s_sub_nc_u64 s[0:1], s[2:3], s[4:5]
+// GFX12: encoding: [0x02,0x04,0x00,0xaa]
+
+s_sub_nc_u64 s[100:101], s[102:103], s[104:105]
+// GFX12: encoding: [0x66,0x68,0x64,0xaa]
+
+s_sub_nc_u64 s[0:1], s[104:105], s[102:103]
+// GFX12: encoding: [0x68,0x66,0x00,0xaa]
+
+s_sub_nc_u64 s[104:105], s[0:1], s[102:103]
+// GFX12: encoding: [0x00,0x66,0x68,0xaa]
+
+s_sub_nc_u64 s[104:105], s[102:103], s[2:3]
+// GFX12: encoding: [0x66,0x02,0x68,0xaa]
+
+s_sub_nc_u64 s[104:105], s[0:1], s[2:3]
+// GFX12: encoding: [0x00,0x02,0x68,0xaa]
+
+s_sub_nc_u64 s[0:1], s[102:103], s[2:3]
+// GFX12: encoding: [0x66,0x02,0x00,0xaa]
+
+s_sub_nc_u64 s[0:1], s[2:3], s[102:103]
+// GFX12: encoding: [0x02,0x66,0x00,0xaa]
+
+s_sub_nc_u64 exec, s[0:1], s[2:3]
+// GFX12: encoding: [0x00,0x02,0x7e,0xaa]
+
+s_sub_nc_u64 vcc, s[0:1], s[2:3]
+// GFX12: encoding: [0x00,0x02,0x6a,0xaa]
+
+s_sub_nc_u64 s[0:1], exec, s[2:3]
+// GFX12: encoding: [0x7e,0x02,0x00,0xaa]
+
+s_sub_nc_u64 s[0:1], vcc, s[2:3]
+// GFX12: encoding: [0x6a,0x02,0x00,0xaa]
+
+s_sub_nc_u64 s[0:1], 0, s[2:3]
+// GFX12: encoding: [0x80,0x02,0x00,0xaa]
+
+s_sub_nc_u64 s[0:1], -1, s[2:3]
+// GFX12: encoding: [0xc1,0x02,0x00,0xaa]
+
+s_sub_nc_u64 s[0:1], 0.5, s[2:3]
+// GFX12: encoding: [0xf0,0x02,0x00,0xaa]
+
+s_sub_nc_u64 s[0:1], -4.0, s[2:3]
+// GFX12: encoding: [0xf7,0x02,0x00,0xaa]
+
+s_sub_nc_u64 s[0:1], 0x3f717273, s[2:3]
+// GFX12: encoding: [0xff,0x02,0x00,0xaa,0x73,0x72,0x71,0x3f]
+
+s_sub_nc_u64 s[0:1], 0xaf123456, s[2:3]
+// GFX12: encoding: [0xff,0x02,0x00,0xaa,0x56,0x34,0x12,0xaf]
+
+s_sub_nc_u64 s[0:1], s[2:3], exec
+// GFX12: encoding: [0x02,0x7e,0x00,0xaa]
+
+s_sub_nc_u64 s[0:1], s[2:3], vcc
+// GFX12: encoding: [0x02,0x6a,0x00,0xaa]
+
+s_sub_nc_u64 s[0:1], s[2:3], 0
+// GFX12: encoding: [0x02,0x80,0x00,0xaa]
+
+s_sub_nc_u64 s[0:1], s[2:3], -1
+// GFX12: encoding: [0x02,0xc1,0x00,0xaa]
+
+s_sub_nc_u64 s[0:1], s[2:3], 0.5
+// GFX12: encoding: [0x02,0xf0,0x00,0xaa]
+
+s_sub_nc_u64 s[0:1], s[2:3], -4.0
+// GFX12: encoding: [0x02,0xf7,0x00,0xaa]
+
+s_sub_nc_u64 s[0:1], s[2:3], 0x3f717273
+// GFX12: encoding: [0x02,0xff,0x00,0xaa,0x73,0x72,0x71,0x3f]
+
+s_sub_nc_u64 s[0:1], s[2:3], 0xaf123456
+// GFX12: encoding: [0x02,0xff,0x00,0xaa,0x56,0x34,0x12,0xaf]
+
+s_mul_u64 s[0:1], s[2:3], s[4:5]
+// GFX12: encoding: [0x02,0x04,0x80,0xaa]
+
+s_mul_u64 s[100:101], s[102:103], s[104:105]
+// GFX12: encoding: [0x66,0x68,0xe4,0xaa]
+
+s_mul_u64 s[0:1], s[104:105], s[102:103]
+// GFX12: encoding: [0x68,0x66,0x80,0xaa]
+
+s_mul_u64 s[104:105], s[0:1], s[102:103]
+// GFX12: encoding: [0x00,0x66,0xe8,0xaa]
+
+s_mul_u64 s[104:105], s[102:103], s[2:3]
+// GFX12: encoding: [0x66,0x02,0xe8,0xaa]
+
+s_mul_u64 s[104:105], s[0:1], s[2:3]
+// GFX12: encoding: [0x00,0x02,0xe8,0xaa]
+
+s_mul_u64 s[0:1], s[102:103], s[2:3]
+// GFX12: encoding: [0x66,0x02,0x80,0xaa]
+
+s_mul_u64 s[0:1], s[2:3], s[102:103]
+// GFX12: encoding: [0x02,0x66,0x80,0xaa]
+
+s_mul_u64 exec, s[0:1], s[2:3]
+// GFX12: encoding: [0x00,0x02,0xfe,0xaa]
+
+s_mul_u64 vcc, s[0:1], s[2:3]
+// GFX12: encoding: [0x00,0x02,0xea,0xaa]
+
+s_mul_u64 s[0:1], exec, s[2:3]
+// GFX12: encoding: [0x7e,0x02,0x80,0xaa]
+
+s_mul_u64 s[0:1], vcc, s[2:3]
+// GFX12: encoding: [0x6a,0x02,0x80,0xaa]
+
+s_mul_u64 s[0:1], 0, s[2:3]
+// GFX12: encoding: [0x80,0x02,0x80,0xaa]
+
+s_mul_u64 s[0:1], -1, s[2:3]
+// GFX12: encoding: [0xc1,0x02,0x80,0xaa]
+
+s_mul_u64 s[0:1], 0.5, s[2:3]
+// GFX12: encoding: [0xf0,0x02,0x80,0xaa]
+
+s_mul_u64 s[0:1], -4.0, s[2:3]
+// GFX12: encoding: [0xf7,0x02,0x80,0xaa]
+
+s_mul_u64 s[0:1], 0x3f717273, s[2:3]
+// GFX12: encoding: [0xff,0x02,0x80,0xaa,0x73,0x72,0x71,0x3f]
+
+s_mul_u64 s[0:1], 0xaf123456, s[2:3]
+// GFX12: encoding: [0xff,0x02,0x80,0xaa,0x56,0x34,0x12,0xaf]
+
+s_mul_u64 s[0:1], s[2:3], exec
+// GFX12: encoding: [0x02,0x7e,0x80,0xaa]
+
+s_mul_u64 s[0:1], s[2:3], vcc
+// GFX12: encoding: [0x02,0x6a,0x80,0xaa]
+
+s_mul_u64 s[0:1], s[2:3], 0
+// GFX12: encoding: [0x02,0x80,0x80,0xaa]
+
+s_mul_u64 s[0:1], s[2:3], -1
+// GFX12: encoding: [0x02,0xc1,0x80,0xaa]
+
+s_mul_u64 s[0:1], s[2:3], 0.5
+// GFX12: encoding: [0x02,0xf0,0x80,0xaa]
+
+s_mul_u64 s[0:1], s[2:3], -4.0
+// GFX12: encoding: [0x02,0xf7,0x80,0xaa]
+
+s_mul_u64 s[0:1], s[2:3], 0x3f717273
+// GFX12: encoding: [0x02,0xff,0x80,0xaa,0x73,0x72,0x71,0x3f]
+
+s_mul_u64 s[0:1], s[2:3], 0xaf123456
+// GFX12: encoding: [0x02,0xff,0x80,0xaa,0x56,0x34,0x12,0xaf]
+
 s_add_f32 s5, s1, s2
 // GFX12: encoding: [0x01,0x02,0x05,0xa0]
 
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_sop2_alias.s b/llvm/test/MC/AMDGPU/gfx12_asm_sop2_alias.s
index 283add1d8367b..86c3bdbaf8300 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_sop2_alias.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_sop2_alias.s
@@ -6,6 +6,9 @@ s_add_i32 s0, s1, s2
 s_add_u32 s0, s1, s2
 // GFX12: encoding: [0x01,0x02,0x00,0x80]
 
+s_add_u64 s[0:1], s[2:3], s[4:5]
+// GFX12: encoding: [0x02,0x04,0x80,0xa9]
+
 s_addc_u32 s0, s1, s2
 // GFX12: encoding: [0x01,0x02,0x00,0x82]
 
@@ -15,6 +18,9 @@ s_sub_i32 s0, s1, s2
 s_sub_u32 s0, s1, s2
 // GFX12: encoding: [0x01,0x02,0x80,0x80]
 
+s_sub_u64 s[0:1], s[2:3], s[4:5]
+// GFX12: encoding: [0x02,0x04,0x00,0xaa]
+
 s_subb_u32 s0, s1, s2
 // GFX12: encoding: [0x01,0x02,0x80,0x82]
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop2.txt
index 1827ce6140831..c177af7a6e50b 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop2.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop2.txt
@@ -1,6 +1,240 @@
 # RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefix=GFX12 %s
 # RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefix=GFX12 %s
 
+# GFX12: s_add_nc_u64 s[0:1], s[2:3], s[4:5]     ; encoding: [0x02,0x04,0x80,0xa9]
+0x02,0x04,0x80,0xa9
+
+# GFX12: s_add_nc_u64 s[100:101], s[102:103], s[104:105] ; encoding: [0x66,0x68,0xe4,0xa9]
+0x66,0x68,0xe4,0xa9
+
+# GFX12: s_add_nc_u64 s[0:1], s[104:105], s[102:103] ; encoding: [0x68,0x66,0x80,0xa9]
+0x68,0x66,0x80,0xa9
+
+# GFX12: s_add_nc_u64 s[104:105], s[0:1], s[102:103] ; encoding: [0x00,0x66,0xe8,0xa9]
+0x00,0x66,0xe8,0xa9
+
+# GFX12: s_add_nc_u64 s[104:105], s[102:103], s[2:3] ; encoding: [0x66,0x02,0xe8,0xa9]
+0x66,0x02,0xe8,0xa9
+
+# GFX12: s_add_nc_u64 s[104:105], s[0:1], s[2:3] ; encoding: [0x00,0x02,0xe8,0xa9]
+0x00,0x02,0xe8,0xa9
+
+# GFX12: s_add_nc_u64 s[0:1], s[102:103], s[2:3] ; encoding: [0x66,0x02,0x80,0xa9]
+0x66,0x02,0x80,0xa9
+
+# GFX12: s_add_nc_u64 s[0:1], s[2:3], s[102:103] ; encoding: [0x02,0x66,0x80,0xa9]
+0x02,0x66,0x80,0xa9
+
+# GFX12: s_add_nc_u64 exec, s[0:1], s[2:3]       ; encoding: [0x00,0x02,0xfe,0xa9]
+0x00,0x02,0xfe,0xa9
+
+# GFX12: s_add_nc_u64 vcc, s[0:1], s[2:3]        ; encoding: [0x00,0x02,0xea,0xa9]
+0x00,0x02,0xea,0xa9
+
+# GFX12: s_add_nc_u64 s[0:1], exec, s[2:3]       ; encoding: [0x7e,0x02,0x80,0xa9]
+0x7e,0x02,0x80,0xa9
+
+# GFX12: s_add_nc_u64 s[0:1], vcc, s[2:3]        ; encoding: [0x6a,0x02,0x80,0xa9]
+0x6a,0x02,0x80,0xa9
+
+# GFX12: s_add_nc_u64 s[0:1], 0, s[2:3]          ; encoding: [0x80,0x02,0x80,0xa9]
+0x80,0x02,0x80,0xa9
+
+# GFX12: s_add_nc_u64 s[0:1], -1, s[2:3]         ; encoding: [0xc1,0x02,0x80,0xa9]
+0xc1,0x02,0x80,0xa9
+
+# GFX12: s_add_nc_u64 s[0:1], 0.5, s[2:3]        ; encoding: [0xf0,0x02,0x80,0xa9]
+0xf0,0x02,0x80,0xa9
+
+# GFX12: s_add_nc_u64 s[0:1], -4.0, s[2:3]       ; encoding: [0xf7,0x02,0x80,0xa9]
+0xf7,0x02,0x80,0xa9
+
+# GFX12: s_add_nc_u64 s[0:1], 0x3f717273, s[2:3] ; encoding: [0xff,0x02,0x80,0xa9,0x73,0x72,0x71,0x3f]
+0xff,0x02,0x80,0xa9,0x73,0x72,0x71,0x3f
+
+# GFX12: s_add_nc_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xff,0x02,0x80,0xa9,0x56,0x34,0x12,0xaf]
+0xff,0x02,0x80,0xa9,0x56,0x34,0x12,0xaf
+
+# GFX12: s_add_nc_u64 s[0:1], s[2:3], exec       ; encoding: [0x02,0x7e,0x80,0xa9]
+0x02,0x7e,0x80,0xa9
+
+# GFX12: s_add_nc_u64 s[0:1], s[2:3], vcc        ; encoding: [0x02,0x6a,0x80,0xa9]
+0x02,0x6a,0x80,0xa9
+
+# GFX12: s_add_nc_u64 s[0:1], s[2:3], 0          ; encoding: [0x02,0x80,0x80,0xa9]
+0x02,0x80,0x80,0xa9
+
+# GFX12: s_add_nc_u64 s[0:1], s[2:3], -1         ; encoding: [0x02,0xc1,0x80,0xa9]
+0x02,0xc1,0x80,0xa9
+
+# GFX12: s_add_nc_u64 s[0:1], s[2:3], 0.5        ; encoding: [0x02,0xf0,0x80,0xa9]
+0x02,0xf0,0x80,0xa9
+
+# GFX12: s_add_nc_u64 s[0:1], s[2:3], -4.0       ; encoding: [0x02,0xf7,0x80,0xa9]
+0x02,0xf7,0x80,0xa9
+
+# GFX12: s_add_nc_u64 s[0:1], s[2:3], 0x3f717273 ; encoding: [0x02,0xff,0x80,0xa9,0x73,0x72,0x71,0x3f]
+0x02,0xff,0x80,0xa9,0x73,0x72,0x71,0x3f
+
+# GFX12: s_add_nc_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0xa9,0x56,0x34,0x12,0xaf]
+0x02,0xff,0x80,0xa9,0x56,0x34,0x12,0xaf
+
+# GFX12: s_sub_nc_u64 s[0:1], s[2:3], s[4:5]     ; encoding: [0x02,0x04,0x00,0xaa]
+0x02,0x04,0x00,0xaa
+
+# GFX12: s_sub_nc_u64 s[100:101], s[102:103], s[104:105] ; encoding: [0x66,0x68,0x64,0xaa]
+0x66,0x68,0x64,0xaa
+
+# GFX12: s_sub_nc_u64 s[0:1], s[104:105], s[102:103] ; encoding: [0x68,0x66,0x00,0xaa]
+0x68,0x66,0x00,0xaa
+
+# GFX12: s_sub_nc_u64 s[104:105], s[0:1], s[102:103] ; encoding: [0x00,0x66,0x68,0xaa]
+0x00,0x66,0x68,0xaa
+
+# GFX12: s_sub_nc_u64 s[104:105], s[102:103], s[2:3] ; encoding: [0x66,0x02,0x68,0xaa]
+0x66,0x02,0x68,0xaa
+
+# GFX12: s_sub_nc_u64 s[104:105], s[0:1], s[2:3] ; encoding: [0x00,0x02,0x68,0xaa]
+0x00,0x02,0x68,0xaa
+
+# GFX12: s_sub_nc_u64 s[0:1], s[102:103], s[2:3] ; encoding: [0x66,0x02,0x00,0xaa]
+0x66,0x02,0x00,0xaa
+
+# GFX12: s_sub_nc_u64 s[0:1], s[2:3], s[102:103] ; encoding: [0x02,0x66,0x00,0xaa]
+0x02,0x66,0x00,0xaa
+
+# GFX12: s_sub_nc_u64 exec, s[0:1], s[2:3]       ; encoding: [0x00,0x02,0x7e,0xaa]
+0x00,0x02,0x7e,0xaa
+
+# GFX12: s_sub_nc_u64 vcc, s[0:1], s[2:3]        ; encoding: [0x00,0x02,0x6a,0xaa]
+0x00,0x02,0x6a,0xaa
+
+# GFX12: s_sub_nc_u64 s[0:1], exec, s[2:3]       ; encoding: [0x7e,0x02,0x00,0xaa]
+0x7e,0x02,0x00,0xaa
+
+# GFX12: s_sub_nc_u64 s[0:1], vcc, s[2:3]        ; encoding: [0x6a,0x02,0x00,0xaa]
+0x6a,0x02,0x00,0xaa
+
+# GFX12: s_sub_nc_u64 s[0:1], 0, s[2:3]          ; encoding: [0x80,0x02,0x00,0xaa]
+0x80,0x02,0x00,0xaa
+
+# GFX12: s_sub_nc_u64 s[0:1], -1, s[2:3]         ; encoding: [0xc1,0x02,0x00,0xaa]
+0xc1,0x02,0x00,0xaa
+
+# GFX12: s_sub_nc_u64 s[0:1], 0.5, s[2:3]        ; encoding: [0xf0,0x02,0x00,0xaa]
+0xf0,0x02,0x00,0xaa
+
+# GFX12: s_sub_nc_u64 s[0:1], -4.0, s[2:3]       ; encoding: [0xf7,0x02,0x00,0xaa]
+0xf7,0x02,0x00,0xaa
+
+# GFX12: s_sub_nc_u64 s[0:1], 0x3f717273, s[2:3] ; encoding: [0xff,0x02,0x00,0xaa,0x73,0x72,0x71,0x3f]
+0xff,0x02,0x00,0xaa,0x73,0x72,0x71,0x3f
+
+# GFX12: s_sub_nc_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xff,0x02,0x00,0xaa,0x56,0x34,0x12,0xaf]
+0xff,0x02,0x00,0xaa,0x56,0x34,0x12,0xaf
+
+# GFX12: s_sub_nc_u64 s[0:1], s[2:3], exec       ; encoding: [0x02,0x7e,0x00,0xaa]
+0x02,0x7e,0x00,0xaa
+
+# GFX12: s_sub_nc_u64 s[0:1], s[2:3], vcc        ; encoding: [0x02,0x6a,0x00,0xaa
+0x02,0x6a,0x00,0xaa
+
+# GFX12: s_sub_nc_u64 s[0:1], s[2:3], 0          ; encoding: [0x02,0x80,0x00,0xaa]
+0x02,0x80,0x00,0xaa
+
+# GFX12: s_sub_nc_u64 s[0:1], s[2:3], -1         ; encoding: [0x02,0xc1,0x00,0xaa]
+0x02,0xc1,0x00,0xaa
+
+# GFX12: s_sub_nc_u64 s[0:1], s[2:3], 0.5        ; encoding: [0x02,0xf0,0x00,0xaa]
+0x02,0xf0,0x00,0xaa
+
+# GFX12: s_sub_nc_u64 s[0:1], s[2:3], -4.0       ; encoding: [0x02,0xf7,0x00,0xaa]
+0x02,0xf7,0x00,0xaa
+
+# GFX12: s_sub_nc_u64 s[0:1], s[2:3], 0x3f717273 ; encoding: [0x02,0xff,0x00,0xaa,0x73,0x72,0x71,0x3f]
+0x02,0xff,0x00,0xaa,0x73,0x72,0x71,0x3f
+
+# GFX12: s_sub_nc_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x00,0xaa,0x56,0x34,0x12,0xaf]
+0x02,0xff,0x00,0xaa,0x56,0x34,0x12,0xaf
+
+# GFX12: s_mul_u64 s[0:1], s[2:3], s[4:5]        ; encoding: [0x02,0x04,0x80,0xaa]
+0x02,0x04,0x80,0xaa
+
+# GFX12: s_mul_u64 s[100:101], s[102:103], s[104:105] ; encoding: [0x66,0x68,0xe4,0xaa]
+0x66,0x68,0xe4,0xaa
+
+# GFX12: s_mul_u64 s[0:1], s[104:105], s[102:103] ; encoding: [0x68,0x66,0x80,0xaa]
+0x68,0x66,0x80,0xaa
+
+# GFX12: s_mul_u64 s[104:105], s[0:1], s[102:103] ; encoding: [0x00,0x66,0xe8,0xaa]
+0x00,0x66,0xe8,0xaa
+
+# GFX12: s_mul_u64 s[104:105], s[102:103], s[2:3] ; encoding: [0x66,0x02,0xe8,0xaa]
+0x66,0x02,0xe8,0xaa
+
+# GFX12: s_mul_u64 s[104:105], s[0:1], s[2:3]    ; encoding: [0x00,0x02,0xe8,0xaa]
+0x00,0x02,0xe8,0xaa
+
+# GFX12: s_mul_u64 s[0:1], s[102:103], s[2:3]    ; encoding: [0x66,0x02,0x80,0xaa]
+0x66,0x02,0x80,0xaa
+
+# GFX12: s_mul_u64 s[0:1], s[2:3], s[102:103]    ; encoding: [0x02,0x66,0x80,0xaa]
+0x02,0x66,0x80,0xaa
+
+# GFX12: s_mul_u64 exec, s[0:1], s[2:3]          ; encoding: [0x00,0x02,0xfe,0xaa]
+0x00,0x02,0xfe,0xaa
+
+# GFX12: s_mul_u64 vcc, s[0:1], s[2:3]           ; encoding: [0x00,0x02,0xea,0xaa]
+0x00,0x02,0xea,0xaa
+
+# GFX12: s_mul_u64 s[0:1], exec, s[2:3]          ; encoding: [0x7e,0x02,0x80,0xaa]
+0x7e,0x02,0x80,0xaa
+
+# GFX12: s_mul_u64 s[0:1], vcc, s[2:3]           ; encoding: [0x6a,0x02,0x80,0xaa]
+0x6a,0x02,0x80,0xaa
+
+# GFX12: s_mul_u64 s[0:1], 0, s[2:3]             ; encoding: [0x80,0x02,0x80,0xaa]
+0x80,0x02,0x80,0xaa
+
+# GFX12: s_mul_u64 s[0:1], -1, s[2:3]            ; encoding: [0xc1,0x02,0x80,0xaa]
+0xc1,0x02,0x80,0xaa
+
+# GFX12: s_mul_u64 s[0:1], 0.5, s[2:3]           ; encoding: [0xf0,0x02,0x80,0xaa]
+0xf0,0x02,0x80,0xaa
+
+# GFX12: s_mul_u64 s[0:1], -4.0, s[2:3]          ; encoding: [0xf7,0x02,0x80,0xaa]
+0xf7,0x02,0x80,0xaa
+
+# GFX12: s_mul_u64 s[0:1], 0x3f717273, s[2:3]    ; encoding: [0xff,0x02,0x80,0xaa,0x73,0x72,0x71,0x3f]
+0xff,0x02,0x80,0xaa,0x73,0x72,0x71,0x3f
+
+# GFX12: s_mul_u64 s[0:1], 0xaf123456, s[2:3]    ; encoding: [0xff,0x02,0x80,0xaa,0x56,0x34,0x12,0xaf]
+0xff,0x02,0x80,0xaa,0x56,0x34,0x12,0xaf
+
+# GFX12: s_mul_u64 s[0:1], s[2:3], exec          ; encoding: [0x02,0x7e,0x80,0xaa]
+0x02,0x7e,0x80,0xaa
+
+# GFX12: s_mul_u64 s[0:1], s[2:3], vcc           ; encoding: [0x02,0x6a,0x80,0xaa]
+0x02,0x6a,0x80,0xaa
+
+# GFX12: s_mul_u64 s[0:1], s[2:3], 0             ; encoding: [0x02,0x80,0x80,0xaa]
+0x02,0x80,0x80,0xaa
+
+# GFX12: s_mul_u64 s[0:1], s[2:3], -1            ; encoding: [0x02,0xc1,0x80,0xaa]
+0x02,0xc1,0x80,0xaa
+
+# GFX12: s_mul_u64 s[0:1], s[2:3], 0.5           ; encoding: [0x02,0xf0,0x80,0xaa]
+0x02,0xf0,0x80,0xaa
+
+# GFX12: s_mul_u64 s[0:1], s[2:3], -4.0          ; encoding: [0x02,0xf7,0x80,0xaa]
+0x02,0xf7,0x80,0xaa
+
+# GFX12: s_mul_u64 s[0:1], s[2:3], 0x3f717273    ; encoding: [0x02,0xff,0x80,0xaa,0x73,0x72,0x71,0x3f]
+0x02,0xff,0x80,0xaa,0x73,0x72,0x71,0x3f
+
+# GFX12: s_mul_u64 s[0:1], s[2:3], 0xaf123456    ; encoding: [0x02,0xff,0x80,0xaa,0x56,0x34,0x12,0xaf]
+0x02,0xff,0x80,0xaa,0x56,0x34,0x12,0xaf
+
 # GFX12: s_add_f32 s5, s1, s2                    ; encoding: [0x01,0x02,0x05,0xa0]
 0x01,0x02,0x05,0xa0
 

Comment on lines +2089 to +2090
defm S_ADD_NC_U64 : SOP2_Real_Renamed_gfx12<0x053, S_ADD_U64, "s_add_nc_u64">;
defm S_SUB_NC_U64 : SOP2_Real_Renamed_gfx12<0x054, S_SUB_U64, "s_sub_nc_u64">;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

...again?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To be clear, GFX12 is adding "NC" to the SALU name to match the VALU equivalents. The old versions without NC (like S_ADD_U64) are still accepted for compatibility.

@jayfoad jayfoad merged commit f465a2c into llvm:main Dec 6, 2023
5 checks passed
@jayfoad jayfoad deleted the gfx12-add-sub-mul-u64 branch December 6, 2023 10:08
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:AMDGPU mc Machine (object) code
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

4 participants