Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AMDGPU] Add GXF12 8- and 16-bit SMEM loads #76966

Merged
merged 1 commit into from
Jan 5, 2024
Merged

[AMDGPU] Add GXF12 8- and 16-bit SMEM loads #76966

merged 1 commit into from
Jan 5, 2024

Conversation

jayfoad
Copy link
Contributor

@jayfoad jayfoad commented Jan 4, 2024

No description provided.

@llvmbot llvmbot added backend:AMDGPU mc Machine (object) code labels Jan 4, 2024
@llvmbot
Copy link
Collaborator

llvmbot commented Jan 4, 2024

@llvm/pr-subscribers-mc

Author: Jay Foad (jayfoad)

Changes

Patch is 57.58 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/76966.diff

3 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/SMInstructions.td (+18)
  • (modified) llvm/test/MC/AMDGPU/gfx12_asm_smem.s (+564)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_smem.txt (+513)
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index be21cf0140fc85..087ee65aa03fd3 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -305,6 +305,10 @@ let SubtargetPredicate = HasScalarDwordx3Loads in
 defm S_LOAD_DWORDX4  : SM_Pseudo_Loads <SReg_64, SReg_128>;
 defm S_LOAD_DWORDX8  : SM_Pseudo_Loads <SReg_64, SReg_256>;
 defm S_LOAD_DWORDX16 : SM_Pseudo_Loads <SReg_64, SReg_512>;
+defm S_LOAD_I8       : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
+defm S_LOAD_U8       : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
+defm S_LOAD_I16      : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
+defm S_LOAD_U16      : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
 
 let is_buffer = 1 in {
 defm S_BUFFER_LOAD_DWORD : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
@@ -316,6 +320,10 @@ let SubtargetPredicate = HasScalarDwordx3Loads in
 defm S_BUFFER_LOAD_DWORDX4 : SM_Pseudo_Loads <SReg_128, SReg_128>;
 defm S_BUFFER_LOAD_DWORDX8 : SM_Pseudo_Loads <SReg_128, SReg_256>;
 defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads <SReg_128, SReg_512>;
+defm S_BUFFER_LOAD_I8 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_LOAD_U8 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_LOAD_I16 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_LOAD_U16 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
 }
 
 let SubtargetPredicate = HasScalarStores in {
@@ -1336,6 +1344,11 @@ defm S_LOAD_B128 : SM_Real_Loads_gfx12<0x02, "S_LOAD_DWORDX4">;
 defm S_LOAD_B256 : SM_Real_Loads_gfx12<0x03, "S_LOAD_DWORDX8">;
 defm S_LOAD_B512 : SM_Real_Loads_gfx12<0x04, "S_LOAD_DWORDX16">;
 
+defm S_LOAD_I8   : SM_Real_Loads_gfx12<0x08>;
+defm S_LOAD_U8   : SM_Real_Loads_gfx12<0x09>;
+defm S_LOAD_I16  : SM_Real_Loads_gfx12<0x0a>;
+defm S_LOAD_U16  : SM_Real_Loads_gfx12<0x0b>;
+
 defm S_BUFFER_LOAD_B32  : SM_Real_Loads_gfx12<0x10, "S_BUFFER_LOAD_DWORD">;
 defm S_BUFFER_LOAD_B64  : SM_Real_Loads_gfx12<0x11, "S_BUFFER_LOAD_DWORDX2">;
 defm S_BUFFER_LOAD_B96  : SM_Real_Loads_gfx12<0x15, "S_BUFFER_LOAD_DWORDX3">;
@@ -1343,6 +1356,11 @@ defm S_BUFFER_LOAD_B128 : SM_Real_Loads_gfx12<0x12, "S_BUFFER_LOAD_DWORDX4">;
 defm S_BUFFER_LOAD_B256 : SM_Real_Loads_gfx12<0x13, "S_BUFFER_LOAD_DWORDX8">;
 defm S_BUFFER_LOAD_B512 : SM_Real_Loads_gfx12<0x14, "S_BUFFER_LOAD_DWORDX16">;
 
+defm S_BUFFER_LOAD_I8  : SM_Real_Loads_gfx12<0x18>;
+defm S_BUFFER_LOAD_U8  : SM_Real_Loads_gfx12<0x19>;
+defm S_BUFFER_LOAD_I16 : SM_Real_Loads_gfx12<0x1a>;
+defm S_BUFFER_LOAD_U16 : SM_Real_Loads_gfx12<0x1b>;
+
 def S_DCACHE_INV_gfx12 : SMEM_Real_gfx12<0x021, S_DCACHE_INV>;
 
 def S_PREFETCH_INST_gfx12        : SMEM_Real_Prefetch_gfx12<0x24, S_PREFETCH_INST>;
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_smem.s b/llvm/test/MC/AMDGPU/gfx12_asm_smem.s
index 1566b9c04e3494..eb59607755da0e 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_smem.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_smem.s
@@ -34,6 +34,306 @@ s_buffer_prefetch_data s[20:23], 100, s10, 7
 s_buffer_prefetch_data s[20:23], 100, null, 7
 // GFX12: s_buffer_prefetch_data s[20:23], 0x64, null, 7 ; encoding: [0xca,0xe1,0x04,0xf4,0x64,0x00,0x00,0xf8]
 
+s_load_i8 s5, s[2:3], s0
+// GFX12: encoding: [0x41,0x01,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 s101, s[2:3], s0
+// GFX12: encoding: [0x41,0x19,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 vcc_lo, s[2:3], s0
+// GFX12: encoding: [0x81,0x1a,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 vcc_hi, s[2:3], s0
+// GFX12: encoding: [0xc1,0x1a,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[4:5], s0
+// GFX12: encoding: [0x42,0x01,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[100:101], s0
+// GFX12: encoding: [0x72,0x01,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, vcc, s0
+// GFX12: encoding: [0x75,0x01,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[2:3], s101
+// GFX12: encoding: [0x41,0x01,0x01,0xf4,0x00,0x00,0x00,0xca]
+
+s_load_i8 s5, s[2:3], vcc_lo
+// GFX12: encoding: [0x41,0x01,0x01,0xf4,0x00,0x00,0x00,0xd4]
+
+s_load_i8 s5, s[2:3], vcc_hi
+// GFX12: encoding: [0x41,0x01,0x01,0xf4,0x00,0x00,0x00,0xd6]
+
+s_load_i8 s5, s[2:3], m0
+// GFX12: encoding: [0x41,0x01,0x01,0xf4,0x00,0x00,0x00,0xfa]
+
+s_load_i8 s5, s[2:3], 0x0
+// GFX12: encoding: [0x41,0x01,0x01,0xf4,0x00,0x00,0x00,0xf8]
+
+s_load_i8 s5, s[2:3], s7 offset:0x12345
+// GFX12: encoding: [0x41,0x01,0x01,0xf4,0x45,0x23,0x01,0x0e]
+
+s_load_u8 s5, s[2:3], s0
+// GFX12: encoding: [0x41,0x21,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0
+// GFX12: encoding: [0x41,0x39,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 vcc_lo, s[2:3], s0
+// GFX12: encoding: [0x81,0x3a,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 vcc_hi, s[2:3], s0
+// GFX12: encoding: [0xc1,0x3a,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 s5, s[4:5], s0
+// GFX12: encoding: [0x42,0x21,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 s5, s[100:101], s0
+// GFX12: encoding: [0x72,0x21,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 s5, vcc, s0
+// GFX12: encoding: [0x75,0x21,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 s5, s[2:3], s101
+// GFX12: encoding: [0x41,0x21,0x01,0xf4,0x00,0x00,0x00,0xca]
+
+s_load_u8 s5, s[2:3], vcc_lo
+// GFX12: encoding: [0x41,0x21,0x01,0xf4,0x00,0x00,0x00,0xd4]
+
+s_load_u8 s5, s[2:3], vcc_hi
+// GFX12: encoding: [0x41,0x21,0x01,0xf4,0x00,0x00,0x00,0xd6]
+
+s_load_u8 s5, s[2:3], m0
+// GFX12: encoding: [0x41,0x21,0x01,0xf4,0x00,0x00,0x00,0xfa]
+
+s_load_u8 s5, s[2:3], 0x0
+// GFX12: encoding: [0x41,0x21,0x01,0xf4,0x00,0x00,0x00,0xf8]
+
+s_load_u8 s5, s[2:3], s7 offset:0x12345
+// GFX12: encoding: [0x41,0x21,0x01,0xf4,0x45,0x23,0x01,0x0e]
+
+s_buffer_load_i8 s5, s[4:7], s0
+// GFX12: encoding: [0x42,0x01,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i8 s101, s[4:7], s0
+// GFX12: encoding: [0x42,0x19,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i8 vcc_lo, s[4:7], s0
+// GFX12: encoding: [0x82,0x1a,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i8 vcc_hi, s[4:7], s0
+// GFX12: encoding: [0xc2,0x1a,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i8 s5, s[8:11], s0
+// GFX12: encoding: [0x44,0x01,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i8 s5, s[96:99], s0
+// GFX12: encoding: [0x70,0x01,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i8 s5, s[4:7], s101
+// GFX12: encoding: [0x42,0x01,0x03,0xf4,0x00,0x00,0x00,0xca]
+
+s_buffer_load_i8 s5, s[4:7], vcc_lo
+// GFX12: encoding: [0x42,0x01,0x03,0xf4,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_i8 s5, s[4:7], vcc_hi
+// GFX12: encoding: [0x42,0x01,0x03,0xf4,0x00,0x00,0x00,0xd6]
+
+s_buffer_load_i8 s5, s[4:7], m0
+// GFX12: encoding: [0x42,0x01,0x03,0xf4,0x00,0x00,0x00,0xfa]
+
+s_buffer_load_i8 s5, s[4:7], 0x0
+// GFX12: encoding: [0x42,0x01,0x03,0xf4,0x00,0x00,0x00,0xf8]
+
+s_buffer_load_i8 s5, s[4:7], s0 offset:0x12345
+// GFX12: encoding: [0x42,0x01,0x03,0xf4,0x45,0x23,0x01,0x00]
+
+s_buffer_load_u8 s5, s[4:7], s0
+// GFX12: encoding: [0x42,0x21,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u8 s101, s[4:7], s0
+// GFX12: encoding: [0x42,0x39,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u8 vcc_lo, s[4:7], s0
+// GFX12: encoding: [0x82,0x3a,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u8 vcc_hi, s[4:7], s0
+// GFX12: encoding: [0xc2,0x3a,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u8 s5, s[8:11], s0
+// GFX12: encoding: [0x44,0x21,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u8 s5, s[96:99], s0
+// GFX12: encoding: [0x70,0x21,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u8 s5, s[4:7], s101
+// GFX12: encoding: [0x42,0x21,0x03,0xf4,0x00,0x00,0x00,0xca]
+
+s_buffer_load_u8 s5, s[4:7], vcc_lo
+// GFX12: encoding: [0x42,0x21,0x03,0xf4,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_u8 s5, s[4:7], vcc_hi
+// GFX12: encoding: [0x42,0x21,0x03,0xf4,0x00,0x00,0x00,0xd6]
+
+s_buffer_load_u8 s5, s[4:7], m0
+// GFX12: encoding: [0x42,0x21,0x03,0xf4,0x00,0x00,0x00,0xfa]
+
+s_buffer_load_u8 s5, s[4:7], 0x0
+// GFX12: encoding: [0x42,0x21,0x03,0xf4,0x00,0x00,0x00,0xf8]
+
+s_buffer_load_u8 s5, s[4:7], s0 offset:0x12345
+// GFX12: encoding: [0x42,0x21,0x03,0xf4,0x45,0x23,0x01,0x00]
+
+s_load_i16 s5, s[2:3], s0
+// GFX12: encoding: [0x41,0x41,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 s101, s[2:3], s0
+// GFX12: encoding: [0x41,0x59,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 vcc_lo, s[2:3], s0
+// GFX12: encoding: [0x81,0x5a,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 vcc_hi, s[2:3], s0
+// GFX12: encoding: [0xc1,0x5a,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[4:5], s0
+// GFX12: encoding: [0x42,0x41,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0
+// GFX12: encoding: [0x72,0x41,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, vcc, s0
+// GFX12: encoding: [0x75,0x41,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[2:3], s101
+// GFX12: encoding: [0x41,0x41,0x01,0xf4,0x00,0x00,0x00,0xca]
+
+s_load_i16 s5, s[2:3], vcc_lo
+// GFX12: encoding: [0x41,0x41,0x01,0xf4,0x00,0x00,0x00,0xd4]
+
+s_load_i16 s5, s[2:3], vcc_hi
+// GFX12: encoding: [0x41,0x41,0x01,0xf4,0x00,0x00,0x00,0xd6]
+
+s_load_i16 s5, s[2:3], m0
+// GFX12: encoding: [0x41,0x41,0x01,0xf4,0x00,0x00,0x00,0xfa]
+
+s_load_i16 s5, s[2:3], 0x0
+// GFX12: encoding: [0x41,0x41,0x01,0xf4,0x00,0x00,0x00,0xf8]
+
+s_load_i16 s5, s[2:3], s7 offset:0x12345
+// GFX12: encoding: [0x41,0x41,0x01,0xf4,0x45,0x23,0x01,0x0e]
+
+s_load_u16 s5, s[2:3], s0
+// GFX12: encoding: [0x41,0x61,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u16 s101, s[2:3], s0
+// GFX12: encoding: [0x41,0x79,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u16 vcc_lo, s[2:3], s0
+// GFX12: encoding: [0x81,0x7a,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u16 vcc_hi, s[2:3], s0
+// GFX12: encoding: [0xc1,0x7a,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u16 s5, s[4:5], s0
+// GFX12: encoding: [0x42,0x61,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u16 s5, s[100:101], s0
+// GFX12: encoding: [0x72,0x61,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u16 s5, vcc, s0
+// GFX12: encoding: [0x75,0x61,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u16 s5, s[2:3], s101
+// GFX12: encoding: [0x41,0x61,0x01,0xf4,0x00,0x00,0x00,0xca]
+
+s_load_u16 s5, s[2:3], vcc_lo
+// GFX12: encoding: [0x41,0x61,0x01,0xf4,0x00,0x00,0x00,0xd4]
+
+s_load_u16 s5, s[2:3], vcc_hi
+// GFX12: encoding: [0x41,0x61,0x01,0xf4,0x00,0x00,0x00,0xd6]
+
+s_load_u16 s5, s[2:3], m0
+// GFX12: encoding: [0x41,0x61,0x01,0xf4,0x00,0x00,0x00,0xfa]
+
+s_load_u16 s5, s[2:3], 0x0
+// GFX12: encoding: [0x41,0x61,0x01,0xf4,0x00,0x00,0x00,0xf8]
+
+s_load_u16 s5, s[2:3], s7 offset:0x12345
+// GFX12: encoding: [0x41,0x61,0x01,0xf4,0x45,0x23,0x01,0x0e]
+
+s_buffer_load_i16 s5, s[4:7], s0
+// GFX12: encoding: [0x42,0x41,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i16 s101, s[4:7], s0
+// GFX12: encoding: [0x42,0x59,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i16 vcc_lo, s[4:7], s0
+// GFX12: encoding: [0x82,0x5a,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i16 vcc_hi, s[4:7], s0
+// GFX12: encoding: [0xc2,0x5a,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i16 s5, s[8:11], s0
+// GFX12: encoding: [0x44,0x41,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i16 s5, s[96:99], s0
+// GFX12: encoding: [0x70,0x41,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i16 s5, s[4:7], s101
+// GFX12: encoding: [0x42,0x41,0x03,0xf4,0x00,0x00,0x00,0xca]
+
+s_buffer_load_i16 s5, s[4:7], vcc_lo
+// GFX12: encoding: [0x42,0x41,0x03,0xf4,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_i16 s5, s[4:7], vcc_hi
+// GFX12: encoding: [0x42,0x41,0x03,0xf4,0x00,0x00,0x00,0xd6]
+
+s_buffer_load_i16 s5, s[4:7], m0
+// GFX12: encoding: [0x42,0x41,0x03,0xf4,0x00,0x00,0x00,0xfa]
+
+s_buffer_load_i16 s5, s[4:7], 0x0
+// GFX12: encoding: [0x42,0x41,0x03,0xf4,0x00,0x00,0x00,0xf8]
+
+s_buffer_load_i16 s5, s[4:7], s0 offset:0x12345
+// GFX12: encoding: [0x42,0x41,0x03,0xf4,0x45,0x23,0x01,0x00]
+
+s_buffer_load_u16 s5, s[4:7], s0
+// GFX12: encoding: [0x42,0x61,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u16 s101, s[4:7], s0
+// GFX12: encoding: [0x42,0x79,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u16 vcc_lo, s[4:7], s0
+// GFX12: encoding: [0x82,0x7a,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u16 vcc_hi, s[4:7], s0
+// GFX12: encoding: [0xc2,0x7a,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u16 s5, s[8:11], s0
+// GFX12: encoding: [0x44,0x61,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u16 s5, s[96:99], s0
+// GFX12: encoding: [0x70,0x61,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u16 s5, s[4:7], s101
+// GFX12: encoding: [0x42,0x61,0x03,0xf4,0x00,0x00,0x00,0xca]
+
+s_buffer_load_u16 s5, s[4:7], vcc_lo
+// GFX12: encoding: [0x42,0x61,0x03,0xf4,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_u16 s5, s[4:7], vcc_hi
+// GFX12: encoding: [0x42,0x61,0x03,0xf4,0x00,0x00,0x00,0xd6]
+
+s_buffer_load_u16 s5, s[4:7], m0
+// GFX12: encoding: [0x42,0x61,0x03,0xf4,0x00,0x00,0x00,0xfa]
+
+s_buffer_load_u16 s5, s[4:7], 0x0
+// GFX12: encoding: [0x42,0x61,0x03,0xf4,0x00,0x00,0x00,0xf8]
+
+s_buffer_load_u16 s5, s[4:7], s0 offset:0x12345
+// GFX12: encoding: [0x42,0x61,0x03,0xf4,0x45,0x23,0x01,0x00]
+
 s_load_b32 s5, s[2:3], s0
 // GFX12: s_load_b32 s5, s[2:3], s0 offset:0x0    ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0x00]
 
@@ -646,6 +946,138 @@ s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE
 s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS
 // GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x02,0x85,0x60,0xf5,0x00,0x00,0x00,0x00]
 
+s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_RT
+// GFX12: s_load_i8 s5, s[2:3], s0 offset:0x0     ; encoding: [0x41,0x01,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_NT
+// GFX12: s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x41,0x01,0x81,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_HT
+// GFX12: s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x41,0x01,0x01,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_LU
+// GFX12: s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x41,0x01,0x81,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[2:3], s0 offset:0x0 scope:SCOPE_CU
+// GFX12: s_load_i8 s5, s[2:3], s0 offset:0x0     ; encoding: [0x41,0x01,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[2:3], s0 offset:0x0 scope:SCOPE_SE
+// GFX12: s_load_i8 s5, s[2:3], s0 offset:0x0 scope:SCOPE_SE ; encoding: [0x41,0x01,0x21,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[2:3], s0 offset:0x0 scope:SCOPE_DEV
+// GFX12: s_load_i8 s5, s[2:3], s0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x41,0x01,0x41,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[2:3], s0 offset:0x0 scope:SCOPE_SYS
+// GFX12: s_load_i8 s5, s[2:3], s0 offset:0x0 scope:SCOPE_SYS ; encoding: [0x41,0x01,0x61,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE
+// GFX12: s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x41,0x01,0x21,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE
+// GFX12: s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x41,0x01,0xa1,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS
+// GFX12: s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x41,0x01,0x61,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_RT
+// GFX12: s_load_u8 s101, s[2:3], s0 offset:0x0   ; encoding: [0x41,0x39,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_NT
+// GFX12: s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x41,0x39,0x81,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_HT
+// GFX12: s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x41,0x39,0x01,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_LU
+// GFX12: s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x41,0x39,0x81,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0 offset:0x0 scope:SCOPE_CU
+// GFX12: s_load_u8 s101, s[2:3], s0 offset:0x0   ; encoding: [0x41,0x39,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0 offset:0x0 scope:SCOPE_SE
+// GFX12: s_load_u8 s101, s[2:3], s0 offset:0x0 scope:SCOPE_SE ; encoding: [0x41,0x39,0x21,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0 offset:0x0 scope:SCOPE_DEV
+// GFX12: s_load_u8 s101, s[2:3], s0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x41,0x39,0x41,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0 offset:0x0 scope:SCOPE_SYS
+// GFX12: s_load_u8 s101, s[2:3], s0 offset:0x0 scope:SCOPE_SYS ; encoding: [0x41,0x39,0x61,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE
+// GFX12: s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x41,0x39,0x21,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE
+// GFX12: s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x41,0x39,0xa1,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS
+// GFX12: s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x41,0x39,0x61,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_RT
+// GFX12: s_load_i16 s5, s[100:101], s0 offset:0x0 ; encoding: [0x72,0x41,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_NT
+// GFX12: s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x72,0x41,0x81,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_HT
+// GFX12: s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x72,0x41,0x01,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_LU
+// GFX12: s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x72,0x41,0x81,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0 offset:0x0 scope:SCOPE_CU
+// GFX12: s_load_i16 s5, s[100:101], s0 offset:0x0 ; encoding: [0x72,0x41,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0 offset:0x0 scope:SCOPE_SE
+// GFX12: s_load_i16 s5, s[100:101], s0 offset:0x0 scope:SCOPE_SE ; encoding: [0x72,0x41,0x21,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0 offset:0x0 scope:SCOPE_DEV
+// GFX12: s_load_i16 s5, s[100:101], s0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x72,0x41,0x41,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0 offset:0x0 scope:SCOPE_SYS
+// GFX12: s_load_i16 s5, s[100:101], s0 offset:0x0 scope:SCOPE_SYS ; encoding: [0x72,0x41,0x61,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE
+// GFX12: s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x72,0x41,0x21,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE
+// GFX12: s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x72,0x41,0xa1,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS
+// GFX12: s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x72,0x41,0x61,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_u16 s5, vcc, s0 offset:0x0 th:TH_LOAD_RT
+// GFX12: s_load_u16 s5, vcc, s0 offset:0x0       ; encoding: [0x75,0x61,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u16 s5, vcc, s0 offset:0x0 th:TH_LOAD_NT
+// GFX12: s_load_u16 s5, vcc, s0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x75,0x61,0x81,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u16 s5, vcc, s0 offset:0x0 th:TH_LOAD_HT
+// GFX12: s_load_u16 s5, vcc, s0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x75,0x61,0x01,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_u16 s5, vcc, s0 offset:0x0 th:TH_LOAD_LU
+// GFX12: s_load_u16 s5, vcc, s0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x75,0x61,0x81,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_u16 s5, vcc, s0 offset:0x0 scope:SCOPE_CU
+// GFX12: s_load_u16 s5, vcc, s0 offset:0x0       ; encoding: [0x75,0x61,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u16 s5, vcc, s0 offset:0x0 scope:SCOPE_SE
+// GFX12: s_load_u16 s5, vcc, s0 offset:0x0 scope:SCOPE_SE ; encoding: [0x75,0x61,0x21,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u16 s5, vcc, s0 offset:0x0 scope:SCOPE_DEV
+// GFX12: s_load_u16 s5, vcc, s0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x75,0x61...
[truncated]

@llvmbot
Copy link
Collaborator

llvmbot commented Jan 4, 2024

@llvm/pr-subscribers-backend-amdgpu

Author: Jay Foad (jayfoad)

Changes

Patch is 57.58 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/76966.diff

3 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/SMInstructions.td (+18)
  • (modified) llvm/test/MC/AMDGPU/gfx12_asm_smem.s (+564)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_smem.txt (+513)
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index be21cf0140fc85..087ee65aa03fd3 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -305,6 +305,10 @@ let SubtargetPredicate = HasScalarDwordx3Loads in
 defm S_LOAD_DWORDX4  : SM_Pseudo_Loads <SReg_64, SReg_128>;
 defm S_LOAD_DWORDX8  : SM_Pseudo_Loads <SReg_64, SReg_256>;
 defm S_LOAD_DWORDX16 : SM_Pseudo_Loads <SReg_64, SReg_512>;
+defm S_LOAD_I8       : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
+defm S_LOAD_U8       : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
+defm S_LOAD_I16      : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
+defm S_LOAD_U16      : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
 
 let is_buffer = 1 in {
 defm S_BUFFER_LOAD_DWORD : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
@@ -316,6 +320,10 @@ let SubtargetPredicate = HasScalarDwordx3Loads in
 defm S_BUFFER_LOAD_DWORDX4 : SM_Pseudo_Loads <SReg_128, SReg_128>;
 defm S_BUFFER_LOAD_DWORDX8 : SM_Pseudo_Loads <SReg_128, SReg_256>;
 defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads <SReg_128, SReg_512>;
+defm S_BUFFER_LOAD_I8 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_LOAD_U8 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_LOAD_I16 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_LOAD_U16 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
 }
 
 let SubtargetPredicate = HasScalarStores in {
@@ -1336,6 +1344,11 @@ defm S_LOAD_B128 : SM_Real_Loads_gfx12<0x02, "S_LOAD_DWORDX4">;
 defm S_LOAD_B256 : SM_Real_Loads_gfx12<0x03, "S_LOAD_DWORDX8">;
 defm S_LOAD_B512 : SM_Real_Loads_gfx12<0x04, "S_LOAD_DWORDX16">;
 
+defm S_LOAD_I8   : SM_Real_Loads_gfx12<0x08>;
+defm S_LOAD_U8   : SM_Real_Loads_gfx12<0x09>;
+defm S_LOAD_I16  : SM_Real_Loads_gfx12<0x0a>;
+defm S_LOAD_U16  : SM_Real_Loads_gfx12<0x0b>;
+
 defm S_BUFFER_LOAD_B32  : SM_Real_Loads_gfx12<0x10, "S_BUFFER_LOAD_DWORD">;
 defm S_BUFFER_LOAD_B64  : SM_Real_Loads_gfx12<0x11, "S_BUFFER_LOAD_DWORDX2">;
 defm S_BUFFER_LOAD_B96  : SM_Real_Loads_gfx12<0x15, "S_BUFFER_LOAD_DWORDX3">;
@@ -1343,6 +1356,11 @@ defm S_BUFFER_LOAD_B128 : SM_Real_Loads_gfx12<0x12, "S_BUFFER_LOAD_DWORDX4">;
 defm S_BUFFER_LOAD_B256 : SM_Real_Loads_gfx12<0x13, "S_BUFFER_LOAD_DWORDX8">;
 defm S_BUFFER_LOAD_B512 : SM_Real_Loads_gfx12<0x14, "S_BUFFER_LOAD_DWORDX16">;
 
+defm S_BUFFER_LOAD_I8  : SM_Real_Loads_gfx12<0x18>;
+defm S_BUFFER_LOAD_U8  : SM_Real_Loads_gfx12<0x19>;
+defm S_BUFFER_LOAD_I16 : SM_Real_Loads_gfx12<0x1a>;
+defm S_BUFFER_LOAD_U16 : SM_Real_Loads_gfx12<0x1b>;
+
 def S_DCACHE_INV_gfx12 : SMEM_Real_gfx12<0x021, S_DCACHE_INV>;
 
 def S_PREFETCH_INST_gfx12        : SMEM_Real_Prefetch_gfx12<0x24, S_PREFETCH_INST>;
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_smem.s b/llvm/test/MC/AMDGPU/gfx12_asm_smem.s
index 1566b9c04e3494..eb59607755da0e 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_smem.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_smem.s
@@ -34,6 +34,306 @@ s_buffer_prefetch_data s[20:23], 100, s10, 7
 s_buffer_prefetch_data s[20:23], 100, null, 7
 // GFX12: s_buffer_prefetch_data s[20:23], 0x64, null, 7 ; encoding: [0xca,0xe1,0x04,0xf4,0x64,0x00,0x00,0xf8]
 
+s_load_i8 s5, s[2:3], s0
+// GFX12: encoding: [0x41,0x01,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 s101, s[2:3], s0
+// GFX12: encoding: [0x41,0x19,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 vcc_lo, s[2:3], s0
+// GFX12: encoding: [0x81,0x1a,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 vcc_hi, s[2:3], s0
+// GFX12: encoding: [0xc1,0x1a,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[4:5], s0
+// GFX12: encoding: [0x42,0x01,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[100:101], s0
+// GFX12: encoding: [0x72,0x01,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, vcc, s0
+// GFX12: encoding: [0x75,0x01,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[2:3], s101
+// GFX12: encoding: [0x41,0x01,0x01,0xf4,0x00,0x00,0x00,0xca]
+
+s_load_i8 s5, s[2:3], vcc_lo
+// GFX12: encoding: [0x41,0x01,0x01,0xf4,0x00,0x00,0x00,0xd4]
+
+s_load_i8 s5, s[2:3], vcc_hi
+// GFX12: encoding: [0x41,0x01,0x01,0xf4,0x00,0x00,0x00,0xd6]
+
+s_load_i8 s5, s[2:3], m0
+// GFX12: encoding: [0x41,0x01,0x01,0xf4,0x00,0x00,0x00,0xfa]
+
+s_load_i8 s5, s[2:3], 0x0
+// GFX12: encoding: [0x41,0x01,0x01,0xf4,0x00,0x00,0x00,0xf8]
+
+s_load_i8 s5, s[2:3], s7 offset:0x12345
+// GFX12: encoding: [0x41,0x01,0x01,0xf4,0x45,0x23,0x01,0x0e]
+
+s_load_u8 s5, s[2:3], s0
+// GFX12: encoding: [0x41,0x21,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0
+// GFX12: encoding: [0x41,0x39,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 vcc_lo, s[2:3], s0
+// GFX12: encoding: [0x81,0x3a,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 vcc_hi, s[2:3], s0
+// GFX12: encoding: [0xc1,0x3a,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 s5, s[4:5], s0
+// GFX12: encoding: [0x42,0x21,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 s5, s[100:101], s0
+// GFX12: encoding: [0x72,0x21,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 s5, vcc, s0
+// GFX12: encoding: [0x75,0x21,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 s5, s[2:3], s101
+// GFX12: encoding: [0x41,0x21,0x01,0xf4,0x00,0x00,0x00,0xca]
+
+s_load_u8 s5, s[2:3], vcc_lo
+// GFX12: encoding: [0x41,0x21,0x01,0xf4,0x00,0x00,0x00,0xd4]
+
+s_load_u8 s5, s[2:3], vcc_hi
+// GFX12: encoding: [0x41,0x21,0x01,0xf4,0x00,0x00,0x00,0xd6]
+
+s_load_u8 s5, s[2:3], m0
+// GFX12: encoding: [0x41,0x21,0x01,0xf4,0x00,0x00,0x00,0xfa]
+
+s_load_u8 s5, s[2:3], 0x0
+// GFX12: encoding: [0x41,0x21,0x01,0xf4,0x00,0x00,0x00,0xf8]
+
+s_load_u8 s5, s[2:3], s7 offset:0x12345
+// GFX12: encoding: [0x41,0x21,0x01,0xf4,0x45,0x23,0x01,0x0e]
+
+s_buffer_load_i8 s5, s[4:7], s0
+// GFX12: encoding: [0x42,0x01,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i8 s101, s[4:7], s0
+// GFX12: encoding: [0x42,0x19,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i8 vcc_lo, s[4:7], s0
+// GFX12: encoding: [0x82,0x1a,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i8 vcc_hi, s[4:7], s0
+// GFX12: encoding: [0xc2,0x1a,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i8 s5, s[8:11], s0
+// GFX12: encoding: [0x44,0x01,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i8 s5, s[96:99], s0
+// GFX12: encoding: [0x70,0x01,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i8 s5, s[4:7], s101
+// GFX12: encoding: [0x42,0x01,0x03,0xf4,0x00,0x00,0x00,0xca]
+
+s_buffer_load_i8 s5, s[4:7], vcc_lo
+// GFX12: encoding: [0x42,0x01,0x03,0xf4,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_i8 s5, s[4:7], vcc_hi
+// GFX12: encoding: [0x42,0x01,0x03,0xf4,0x00,0x00,0x00,0xd6]
+
+s_buffer_load_i8 s5, s[4:7], m0
+// GFX12: encoding: [0x42,0x01,0x03,0xf4,0x00,0x00,0x00,0xfa]
+
+s_buffer_load_i8 s5, s[4:7], 0x0
+// GFX12: encoding: [0x42,0x01,0x03,0xf4,0x00,0x00,0x00,0xf8]
+
+s_buffer_load_i8 s5, s[4:7], s0 offset:0x12345
+// GFX12: encoding: [0x42,0x01,0x03,0xf4,0x45,0x23,0x01,0x00]
+
+s_buffer_load_u8 s5, s[4:7], s0
+// GFX12: encoding: [0x42,0x21,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u8 s101, s[4:7], s0
+// GFX12: encoding: [0x42,0x39,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u8 vcc_lo, s[4:7], s0
+// GFX12: encoding: [0x82,0x3a,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u8 vcc_hi, s[4:7], s0
+// GFX12: encoding: [0xc2,0x3a,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u8 s5, s[8:11], s0
+// GFX12: encoding: [0x44,0x21,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u8 s5, s[96:99], s0
+// GFX12: encoding: [0x70,0x21,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u8 s5, s[4:7], s101
+// GFX12: encoding: [0x42,0x21,0x03,0xf4,0x00,0x00,0x00,0xca]
+
+s_buffer_load_u8 s5, s[4:7], vcc_lo
+// GFX12: encoding: [0x42,0x21,0x03,0xf4,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_u8 s5, s[4:7], vcc_hi
+// GFX12: encoding: [0x42,0x21,0x03,0xf4,0x00,0x00,0x00,0xd6]
+
+s_buffer_load_u8 s5, s[4:7], m0
+// GFX12: encoding: [0x42,0x21,0x03,0xf4,0x00,0x00,0x00,0xfa]
+
+s_buffer_load_u8 s5, s[4:7], 0x0
+// GFX12: encoding: [0x42,0x21,0x03,0xf4,0x00,0x00,0x00,0xf8]
+
+s_buffer_load_u8 s5, s[4:7], s0 offset:0x12345
+// GFX12: encoding: [0x42,0x21,0x03,0xf4,0x45,0x23,0x01,0x00]
+
+s_load_i16 s5, s[2:3], s0
+// GFX12: encoding: [0x41,0x41,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 s101, s[2:3], s0
+// GFX12: encoding: [0x41,0x59,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 vcc_lo, s[2:3], s0
+// GFX12: encoding: [0x81,0x5a,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 vcc_hi, s[2:3], s0
+// GFX12: encoding: [0xc1,0x5a,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[4:5], s0
+// GFX12: encoding: [0x42,0x41,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0
+// GFX12: encoding: [0x72,0x41,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, vcc, s0
+// GFX12: encoding: [0x75,0x41,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[2:3], s101
+// GFX12: encoding: [0x41,0x41,0x01,0xf4,0x00,0x00,0x00,0xca]
+
+s_load_i16 s5, s[2:3], vcc_lo
+// GFX12: encoding: [0x41,0x41,0x01,0xf4,0x00,0x00,0x00,0xd4]
+
+s_load_i16 s5, s[2:3], vcc_hi
+// GFX12: encoding: [0x41,0x41,0x01,0xf4,0x00,0x00,0x00,0xd6]
+
+s_load_i16 s5, s[2:3], m0
+// GFX12: encoding: [0x41,0x41,0x01,0xf4,0x00,0x00,0x00,0xfa]
+
+s_load_i16 s5, s[2:3], 0x0
+// GFX12: encoding: [0x41,0x41,0x01,0xf4,0x00,0x00,0x00,0xf8]
+
+s_load_i16 s5, s[2:3], s7 offset:0x12345
+// GFX12: encoding: [0x41,0x41,0x01,0xf4,0x45,0x23,0x01,0x0e]
+
+s_load_u16 s5, s[2:3], s0
+// GFX12: encoding: [0x41,0x61,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u16 s101, s[2:3], s0
+// GFX12: encoding: [0x41,0x79,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u16 vcc_lo, s[2:3], s0
+// GFX12: encoding: [0x81,0x7a,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u16 vcc_hi, s[2:3], s0
+// GFX12: encoding: [0xc1,0x7a,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u16 s5, s[4:5], s0
+// GFX12: encoding: [0x42,0x61,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u16 s5, s[100:101], s0
+// GFX12: encoding: [0x72,0x61,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u16 s5, vcc, s0
+// GFX12: encoding: [0x75,0x61,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u16 s5, s[2:3], s101
+// GFX12: encoding: [0x41,0x61,0x01,0xf4,0x00,0x00,0x00,0xca]
+
+s_load_u16 s5, s[2:3], vcc_lo
+// GFX12: encoding: [0x41,0x61,0x01,0xf4,0x00,0x00,0x00,0xd4]
+
+s_load_u16 s5, s[2:3], vcc_hi
+// GFX12: encoding: [0x41,0x61,0x01,0xf4,0x00,0x00,0x00,0xd6]
+
+s_load_u16 s5, s[2:3], m0
+// GFX12: encoding: [0x41,0x61,0x01,0xf4,0x00,0x00,0x00,0xfa]
+
+s_load_u16 s5, s[2:3], 0x0
+// GFX12: encoding: [0x41,0x61,0x01,0xf4,0x00,0x00,0x00,0xf8]
+
+s_load_u16 s5, s[2:3], s7 offset:0x12345
+// GFX12: encoding: [0x41,0x61,0x01,0xf4,0x45,0x23,0x01,0x0e]
+
+s_buffer_load_i16 s5, s[4:7], s0
+// GFX12: encoding: [0x42,0x41,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i16 s101, s[4:7], s0
+// GFX12: encoding: [0x42,0x59,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i16 vcc_lo, s[4:7], s0
+// GFX12: encoding: [0x82,0x5a,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i16 vcc_hi, s[4:7], s0
+// GFX12: encoding: [0xc2,0x5a,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i16 s5, s[8:11], s0
+// GFX12: encoding: [0x44,0x41,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i16 s5, s[96:99], s0
+// GFX12: encoding: [0x70,0x41,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_i16 s5, s[4:7], s101
+// GFX12: encoding: [0x42,0x41,0x03,0xf4,0x00,0x00,0x00,0xca]
+
+s_buffer_load_i16 s5, s[4:7], vcc_lo
+// GFX12: encoding: [0x42,0x41,0x03,0xf4,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_i16 s5, s[4:7], vcc_hi
+// GFX12: encoding: [0x42,0x41,0x03,0xf4,0x00,0x00,0x00,0xd6]
+
+s_buffer_load_i16 s5, s[4:7], m0
+// GFX12: encoding: [0x42,0x41,0x03,0xf4,0x00,0x00,0x00,0xfa]
+
+s_buffer_load_i16 s5, s[4:7], 0x0
+// GFX12: encoding: [0x42,0x41,0x03,0xf4,0x00,0x00,0x00,0xf8]
+
+s_buffer_load_i16 s5, s[4:7], s0 offset:0x12345
+// GFX12: encoding: [0x42,0x41,0x03,0xf4,0x45,0x23,0x01,0x00]
+
+s_buffer_load_u16 s5, s[4:7], s0
+// GFX12: encoding: [0x42,0x61,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u16 s101, s[4:7], s0
+// GFX12: encoding: [0x42,0x79,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u16 vcc_lo, s[4:7], s0
+// GFX12: encoding: [0x82,0x7a,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u16 vcc_hi, s[4:7], s0
+// GFX12: encoding: [0xc2,0x7a,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u16 s5, s[8:11], s0
+// GFX12: encoding: [0x44,0x61,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u16 s5, s[96:99], s0
+// GFX12: encoding: [0x70,0x61,0x03,0xf4,0x00,0x00,0x00,0x00]
+
+s_buffer_load_u16 s5, s[4:7], s101
+// GFX12: encoding: [0x42,0x61,0x03,0xf4,0x00,0x00,0x00,0xca]
+
+s_buffer_load_u16 s5, s[4:7], vcc_lo
+// GFX12: encoding: [0x42,0x61,0x03,0xf4,0x00,0x00,0x00,0xd4]
+
+s_buffer_load_u16 s5, s[4:7], vcc_hi
+// GFX12: encoding: [0x42,0x61,0x03,0xf4,0x00,0x00,0x00,0xd6]
+
+s_buffer_load_u16 s5, s[4:7], m0
+// GFX12: encoding: [0x42,0x61,0x03,0xf4,0x00,0x00,0x00,0xfa]
+
+s_buffer_load_u16 s5, s[4:7], 0x0
+// GFX12: encoding: [0x42,0x61,0x03,0xf4,0x00,0x00,0x00,0xf8]
+
+s_buffer_load_u16 s5, s[4:7], s0 offset:0x12345
+// GFX12: encoding: [0x42,0x61,0x03,0xf4,0x45,0x23,0x01,0x00]
+
 s_load_b32 s5, s[2:3], s0
 // GFX12: s_load_b32 s5, s[2:3], s0 offset:0x0    ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0x00]
 
@@ -646,6 +946,138 @@ s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE
 s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS
 // GFX12: s_load_b512 s[20:35], s[4:5], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x02,0x85,0x60,0xf5,0x00,0x00,0x00,0x00]
 
+s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_RT
+// GFX12: s_load_i8 s5, s[2:3], s0 offset:0x0     ; encoding: [0x41,0x01,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_NT
+// GFX12: s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x41,0x01,0x81,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_HT
+// GFX12: s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x41,0x01,0x01,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_LU
+// GFX12: s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x41,0x01,0x81,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[2:3], s0 offset:0x0 scope:SCOPE_CU
+// GFX12: s_load_i8 s5, s[2:3], s0 offset:0x0     ; encoding: [0x41,0x01,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[2:3], s0 offset:0x0 scope:SCOPE_SE
+// GFX12: s_load_i8 s5, s[2:3], s0 offset:0x0 scope:SCOPE_SE ; encoding: [0x41,0x01,0x21,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[2:3], s0 offset:0x0 scope:SCOPE_DEV
+// GFX12: s_load_i8 s5, s[2:3], s0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x41,0x01,0x41,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[2:3], s0 offset:0x0 scope:SCOPE_SYS
+// GFX12: s_load_i8 s5, s[2:3], s0 offset:0x0 scope:SCOPE_SYS ; encoding: [0x41,0x01,0x61,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE
+// GFX12: s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x41,0x01,0x21,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE
+// GFX12: s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x41,0x01,0xa1,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS
+// GFX12: s_load_i8 s5, s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x41,0x01,0x61,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_RT
+// GFX12: s_load_u8 s101, s[2:3], s0 offset:0x0   ; encoding: [0x41,0x39,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_NT
+// GFX12: s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x41,0x39,0x81,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_HT
+// GFX12: s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x41,0x39,0x01,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_LU
+// GFX12: s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x41,0x39,0x81,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0 offset:0x0 scope:SCOPE_CU
+// GFX12: s_load_u8 s101, s[2:3], s0 offset:0x0   ; encoding: [0x41,0x39,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0 offset:0x0 scope:SCOPE_SE
+// GFX12: s_load_u8 s101, s[2:3], s0 offset:0x0 scope:SCOPE_SE ; encoding: [0x41,0x39,0x21,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0 offset:0x0 scope:SCOPE_DEV
+// GFX12: s_load_u8 s101, s[2:3], s0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x41,0x39,0x41,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0 offset:0x0 scope:SCOPE_SYS
+// GFX12: s_load_u8 s101, s[2:3], s0 offset:0x0 scope:SCOPE_SYS ; encoding: [0x41,0x39,0x61,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE
+// GFX12: s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x41,0x39,0x21,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE
+// GFX12: s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x41,0x39,0xa1,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS
+// GFX12: s_load_u8 s101, s[2:3], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x41,0x39,0x61,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_RT
+// GFX12: s_load_i16 s5, s[100:101], s0 offset:0x0 ; encoding: [0x72,0x41,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_NT
+// GFX12: s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x72,0x41,0x81,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_HT
+// GFX12: s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x72,0x41,0x01,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_LU
+// GFX12: s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x72,0x41,0x81,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0 offset:0x0 scope:SCOPE_CU
+// GFX12: s_load_i16 s5, s[100:101], s0 offset:0x0 ; encoding: [0x72,0x41,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0 offset:0x0 scope:SCOPE_SE
+// GFX12: s_load_i16 s5, s[100:101], s0 offset:0x0 scope:SCOPE_SE ; encoding: [0x72,0x41,0x21,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0 offset:0x0 scope:SCOPE_DEV
+// GFX12: s_load_i16 s5, s[100:101], s0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x72,0x41,0x41,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0 offset:0x0 scope:SCOPE_SYS
+// GFX12: s_load_i16 s5, s[100:101], s0 offset:0x0 scope:SCOPE_SYS ; encoding: [0x72,0x41,0x61,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE
+// GFX12: s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x72,0x41,0x21,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE
+// GFX12: s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_LU scope:SCOPE_SE ; encoding: [0x72,0x41,0xa1,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS
+// GFX12: s_load_i16 s5, s[100:101], s0 offset:0x0 th:TH_LOAD_HT scope:SCOPE_SYS ; encoding: [0x72,0x41,0x61,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_u16 s5, vcc, s0 offset:0x0 th:TH_LOAD_RT
+// GFX12: s_load_u16 s5, vcc, s0 offset:0x0       ; encoding: [0x75,0x61,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u16 s5, vcc, s0 offset:0x0 th:TH_LOAD_NT
+// GFX12: s_load_u16 s5, vcc, s0 offset:0x0 th:TH_LOAD_NT ; encoding: [0x75,0x61,0x81,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u16 s5, vcc, s0 offset:0x0 th:TH_LOAD_HT
+// GFX12: s_load_u16 s5, vcc, s0 offset:0x0 th:TH_LOAD_HT ; encoding: [0x75,0x61,0x01,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_u16 s5, vcc, s0 offset:0x0 th:TH_LOAD_LU
+// GFX12: s_load_u16 s5, vcc, s0 offset:0x0 th:TH_LOAD_LU ; encoding: [0x75,0x61,0x81,0xf5,0x00,0x00,0x00,0x00]
+
+s_load_u16 s5, vcc, s0 offset:0x0 scope:SCOPE_CU
+// GFX12: s_load_u16 s5, vcc, s0 offset:0x0       ; encoding: [0x75,0x61,0x01,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u16 s5, vcc, s0 offset:0x0 scope:SCOPE_SE
+// GFX12: s_load_u16 s5, vcc, s0 offset:0x0 scope:SCOPE_SE ; encoding: [0x75,0x61,0x21,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_u16 s5, vcc, s0 offset:0x0 scope:SCOPE_DEV
+// GFX12: s_load_u16 s5, vcc, s0 offset:0x0 scope:SCOPE_DEV ; encoding: [0x75,0x61...
[truncated]

@jayfoad jayfoad merged commit 59f3b72 into llvm:main Jan 5, 2024
6 checks passed
@jayfoad jayfoad deleted the gfx12-s-load-8-16 branch January 10, 2024 17:30
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:AMDGPU mc Machine (object) code
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

3 participants