Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AMDGPU] Update hardware registers for GFX12 #74445

Merged
merged 1 commit into from Dec 6, 2023
Merged

[AMDGPU] Update hardware registers for GFX12 #74445

merged 1 commit into from Dec 6, 2023

Conversation

jayfoad
Copy link
Contributor

@jayfoad jayfoad commented Dec 5, 2023

No description provided.

@llvmbot
Copy link
Collaborator

llvmbot commented Dec 5, 2023

@llvm/pr-subscribers-mc

Author: Jay Foad (jayfoad)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/74445.diff

4 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/SIDefines.h (+19-5)
  • (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp (+29-13)
  • (modified) llvm/test/MC/AMDGPU/gfx12_asm_sopk.s (+45)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopk.txt (+51)
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 659ff75e13d01..47dc59e77dc4e 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -478,6 +478,9 @@ enum Id { // HwRegCode, (6) [5:0]
   ID_GPR_ALLOC = 5,
   ID_LDS_ALLOC = 6,
   ID_IB_STS = 7,
+  ID_PERF_SNAPSHOT_DATA_gfx12 = 10,
+  ID_PERF_SNAPSHOT_PC_LO_gfx12 = 11,
+  ID_PERF_SNAPSHOT_PC_HI_gfx12 = 12,
   ID_MEM_BASES = 15,
   ID_TBA_LO = 16,
   ID_TBA_HI = 17,
@@ -489,12 +492,23 @@ enum Id { // HwRegCode, (6) [5:0]
   ID_HW_ID1 = 23,
   ID_HW_ID2 = 24,
   ID_POPS_PACKER = 25,
-  ID_PERF_SNAPSHOT_DATA = 27,
+  ID_PERF_SNAPSHOT_DATA_gfx11 = 27,
   ID_SHADER_CYCLES = 29,
-
-  // Register numbers reused in GFX11+
-  ID_PERF_SNAPSHOT_PC_LO = 18,
-  ID_PERF_SNAPSHOT_PC_HI = 19,
+  ID_SHADER_CYCLES_HI = 30,
+  ID_DVGPR_ALLOC_LO = 31,
+  ID_DVGPR_ALLOC_HI = 32,
+
+  // Register numbers reused in GFX11
+  ID_PERF_SNAPSHOT_PC_LO_gfx11 = 18,
+  ID_PERF_SNAPSHOT_PC_HI_gfx11 = 19,
+
+  // Register numbers reused in GFX12+
+  ID_STATE_PRIV = 4,
+  ID_PERF_SNAPSHOT_DATA1 = 15,
+  ID_PERF_SNAPSHOT_DATA2 = 16,
+  ID_EXCP_FLAG_PRIV = 17,
+  ID_EXCP_FLAG_USER = 18,
+  ID_TRAP_CTRL = 19,
 
   // GFX940 specific registers
   ID_XCC_ID = 20,
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
index ce40d82021cf8..403efd6ffed35 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
@@ -87,41 +87,56 @@ namespace Hwreg {
 
 // Disable lint checking for this block since it makes the table unreadable.
 // NOLINTBEGIN
+// clang-format off
 const CustomOperand<const MCSubtargetInfo &> Opr[] = {
   {{""}},
   {{"HW_REG_MODE"},          ID_MODE},
   {{"HW_REG_STATUS"},        ID_STATUS},
-  {{"HW_REG_TRAPSTS"},       ID_TRAPSTS},
+  {{"HW_REG_TRAPSTS"},       ID_TRAPSTS,     isNotGFX12Plus},
   {{"HW_REG_HW_ID"},         ID_HW_ID,       isNotGFX10Plus},
   {{"HW_REG_GPR_ALLOC"},     ID_GPR_ALLOC},
   {{"HW_REG_LDS_ALLOC"},     ID_LDS_ALLOC},
   {{"HW_REG_IB_STS"},        ID_IB_STS},
   {{""}},
   {{""}},
+  {{"HW_REG_PERF_SNAPSHOT_DATA"},  ID_PERF_SNAPSHOT_DATA_gfx12,  isGFX12Plus},
+  {{"HW_REG_PERF_SNAPSHOT_PC_LO"}, ID_PERF_SNAPSHOT_PC_LO_gfx12, isGFX12Plus},
+  {{"HW_REG_PERF_SNAPSHOT_PC_HI"}, ID_PERF_SNAPSHOT_PC_HI_gfx12, isGFX12Plus},
   {{""}},
   {{""}},
-  {{""}},
-  {{""}},
-  {{""}},
-  {{"HW_REG_SH_MEM_BASES"},  ID_MEM_BASES,   isGFX9Plus},
+  {{"HW_REG_SH_MEM_BASES"},  ID_MEM_BASES,   isGFX9_GFX10_GFX11},
   {{"HW_REG_TBA_LO"},        ID_TBA_LO,      isGFX9_GFX10},
   {{"HW_REG_TBA_HI"},        ID_TBA_HI,      isGFX9_GFX10},
   {{"HW_REG_TMA_LO"},        ID_TMA_LO,      isGFX9_GFX10},
   {{"HW_REG_TMA_HI"},        ID_TMA_HI,      isGFX9_GFX10},
-  {{"HW_REG_FLAT_SCR_LO"},   ID_FLAT_SCR_LO, isGFX10Plus},
-  {{"HW_REG_FLAT_SCR_HI"},   ID_FLAT_SCR_HI, isGFX10Plus},
+  {{"HW_REG_FLAT_SCR_LO"},   ID_FLAT_SCR_LO, isGFX10_GFX11},
+  {{"HW_REG_FLAT_SCR_HI"},   ID_FLAT_SCR_HI, isGFX10_GFX11},
   {{"HW_REG_XNACK_MASK"},    ID_XNACK_MASK,  isGFX10Before1030},
   {{"HW_REG_HW_ID1"},        ID_HW_ID1,      isGFX10Plus},
   {{"HW_REG_HW_ID2"},        ID_HW_ID2,      isGFX10Plus},
   {{"HW_REG_POPS_PACKER"},   ID_POPS_PACKER, isGFX10},
   {{""}},
-  {{"HW_REG_PERF_SNAPSHOT_DATA"}, ID_PERF_SNAPSHOT_DATA, isGFX11Plus},
+  {{"HW_REG_PERF_SNAPSHOT_DATA"}, ID_PERF_SNAPSHOT_DATA_gfx11, isGFX11},
   {{""}},
-  {{"HW_REG_SHADER_CYCLES"}, ID_SHADER_CYCLES, isGFX10_BEncoding},
-
-  // Register numbers reused in GFX11+
-  {{"HW_REG_PERF_SNAPSHOT_PC_LO"}, ID_PERF_SNAPSHOT_PC_LO, isGFX11Plus},
-  {{"HW_REG_PERF_SNAPSHOT_PC_HI"}, ID_PERF_SNAPSHOT_PC_HI, isGFX11Plus},
+  {{"HW_REG_SHADER_CYCLES"},    ID_SHADER_CYCLES,    isGFX10_3_GFX11},
+  {{"HW_REG_SHADER_CYCLES_HI"}, ID_SHADER_CYCLES_HI, isGFX12Plus},
+  {{"HW_REG_DVGPR_ALLOC_LO"},   ID_DVGPR_ALLOC_LO,   isGFX12Plus},
+  {{"HW_REG_DVGPR_ALLOC_HI"},   ID_DVGPR_ALLOC_HI,   isGFX12Plus},
+
+  // Register numbers reused in GFX11
+  {{"HW_REG_PERF_SNAPSHOT_PC_LO"}, ID_PERF_SNAPSHOT_PC_LO_gfx11, isGFX11},
+  {{"HW_REG_PERF_SNAPSHOT_PC_HI"}, ID_PERF_SNAPSHOT_PC_HI_gfx11, isGFX11},
+
+  // Register numbers reused in GFX12+
+  {{"HW_REG_STATE_PRIV"},          ID_STATE_PRIV,          isGFX12Plus},
+  {{"HW_REG_PERF_SNAPSHOT_DATA1"}, ID_PERF_SNAPSHOT_DATA1, isGFX12Plus},
+  {{"HW_REG_PERF_SNAPSHOT_DATA2"}, ID_PERF_SNAPSHOT_DATA2, isGFX12Plus},
+  {{"HW_REG_EXCP_FLAG_PRIV"},      ID_EXCP_FLAG_PRIV,      isGFX12Plus},
+  {{"HW_REG_EXCP_FLAG_USER"},      ID_EXCP_FLAG_USER,      isGFX12Plus},
+  {{"HW_REG_TRAP_CTRL"},           ID_TRAP_CTRL,           isGFX12Plus},
+  {{"HW_REG_SCRATCH_BASE_LO"},     ID_FLAT_SCR_LO,         isGFX12Plus},
+  {{"HW_REG_SCRATCH_BASE_HI"},     ID_FLAT_SCR_HI,         isGFX12Plus},
+  {{"HW_REG_SHADER_CYCLES_LO"},    ID_SHADER_CYCLES,       isGFX12Plus},
 
   // GFX940 specific registers
   {{"HW_REG_XCC_ID"},                 ID_XCC_ID,                 isGFX940},
@@ -133,6 +148,7 @@ const CustomOperand<const MCSubtargetInfo &> Opr[] = {
   // Aliases
   {{"HW_REG_HW_ID"},                  ID_HW_ID1,                 isGFX10},
 };
+// clang-format on
 // NOLINTEND
 
 const int OPR_SIZE = static_cast<int>(
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_sopk.s b/llvm/test/MC/AMDGPU/gfx12_asm_sopk.s
index 8eb4ebd385a77..3d8d59cfada32 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_sopk.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_sopk.s
@@ -183,6 +183,9 @@ s_getreg_b32 s0, hwreg(HW_REG_MODE)
 s_getreg_b32 s0, hwreg(HW_REG_STATUS)
 // GFX12: encoding: [0x02,0xf8,0x80,0xb8]
 
+s_getreg_b32 s0, hwreg(HW_REG_STATE_PRIV)
+// GFX12: encoding: [0x04,0xf8,0x80,0xb8]
+
 s_getreg_b32 s0, hwreg(HW_REG_GPR_ALLOC)
 // GFX12: encoding: [0x05,0xf8,0x80,0xb8]
 
@@ -192,8 +195,50 @@ s_getreg_b32 s0, hwreg(HW_REG_LDS_ALLOC)
 s_getreg_b32 s0, hwreg(HW_REG_IB_STS)
 // GFX12: encoding: [0x07,0xf8,0x80,0xb8]
 
+s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_DATA)
+// GFX12: encoding: [0x0a,0xf8,0x80,0xb8]
+
+s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_PC_LO)
+// GFX12: encoding: [0x0b,0xf8,0x80,0xb8]
+
+s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_PC_HI)
+// GFX12: encoding: [0x0c,0xf8,0x80,0xb8]
+
+s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_DATA1)
+// GFX12: encoding: [0x0f,0xf8,0x80,0xb8]
+
+s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_DATA2)
+// GFX12: encoding: [0x10,0xf8,0x80,0xb8]
+
+s_getreg_b32 s0, hwreg(HW_REG_EXCP_FLAG_PRIV)
+// GFX12: encoding: [0x11,0xf8,0x80,0xb8]
+
+s_getreg_b32 s0, hwreg(HW_REG_EXCP_FLAG_USER)
+// GFX12: encoding: [0x12,0xf8,0x80,0xb8]
+
+s_getreg_b32 s0, hwreg(HW_REG_TRAP_CTRL)
+// GFX12: encoding: [0x13,0xf8,0x80,0xb8]
+
+s_getreg_b32 s0, hwreg(HW_REG_SCRATCH_BASE_LO)
+// GFX12: encoding: [0x14,0xf8,0x80,0xb8]
+
+s_getreg_b32 s0, hwreg(HW_REG_SCRATCH_BASE_HI)
+// GFX12: encoding: [0x15,0xf8,0x80,0xb8]
+
 s_getreg_b32 s0, hwreg(HW_REG_HW_ID1)
 // GFX12: encoding: [0x17,0xf8,0x80,0xb8]
 
 s_getreg_b32 s0, hwreg(HW_REG_HW_ID2)
 // GFX12: encoding: [0x18,0xf8,0x80,0xb8]
+
+s_getreg_b32 s0, hwreg(HW_REG_DVGPR_ALLOC_LO)
+// GFX12: encoding: [0x1f,0xf8,0x80,0xb8]
+
+s_getreg_b32 s0, hwreg(HW_REG_DVGPR_ALLOC_HI)
+// GFX12: encoding: [0x20,0xf8,0x80,0xb8]
+
+s_getreg_b32 s0, hwreg(HW_REG_SHADER_CYCLES_LO)
+// GFX12: encoding: [0x1d,0xf8,0x80,0xb8]
+
+s_getreg_b32 s0, hwreg(HW_REG_SHADER_CYCLES_HI)
+// GFX12: encoding: [0x1e,0xf8,0x80,0xb8]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopk.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopk.txt
index fc840e971b55e..ebf65a45d1765 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopk.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopk.txt
@@ -76,6 +76,9 @@
 # GFX12: s_getreg_b32 s0, hwreg(52, 8, 3)        ; encoding: [0x34,0x12,0x80,0xb8]
 0x34,0x12,0x80,0xb8
 
+# GFX12: s_getreg_b32 s0, hwreg(HW_REG_EXCP_FLAG_PRIV, 7, 25) ; encoding: [0xd1,0xc1,0x80,0xb8]
+0xd1,0xc1,0x80,0xb8
+
 # GFX12: s_getreg_b32 s105, hwreg(52, 8, 3)      ; encoding: [0x34,0x12,0xe9,0xb8]
 0x34,0x12,0xe9,0xb8
 
@@ -154,6 +157,9 @@
 # GFX12: s_setreg_b32 hwreg(52, 8, 3), vcc_lo    ; encoding: [0x34,0x12,0x6a,0xb9]
 0x34,0x12,0x6a,0xb9
 
+# GFX12: s_setreg_b32 hwreg(HW_REG_EXCP_FLAG_PRIV, 7, 25), s0 ; encoding: [0xd1,0xc1,0x00,0xb9]
+0xd1,0xc1,0x00,0xb9
+
 # GFX12: s_version 0x1234                        ; encoding: [0x34,0x12,0x80,0xb0]
 0x34,0x12,0x80,0xb0
 
@@ -190,6 +196,9 @@
 # GFX12: s_getreg_b32 s0, hwreg(HW_REG_STATUS)   ; encoding: [0x02,0xf8,0x80,0xb8]
 0x02,0xf8,0x80,0xb8
 
+# GFX12: s_getreg_b32 s0, hwreg(HW_REG_STATE_PRIV) ; encoding: [0x04,0xf8,0x80,0xb8]
+0x04,0xf8,0x80,0xb8
+
 # GFX12: s_getreg_b32 s0, hwreg(HW_REG_GPR_ALLOC) ; encoding: [0x05,0xf8,0x80,0xb8]
 0x05,0xf8,0x80,0xb8
 
@@ -199,8 +208,50 @@
 # GFX12: s_getreg_b32 s0, hwreg(HW_REG_IB_STS)   ; encoding: [0x07,0xf8,0x80,0xb8]
 0x07,0xf8,0x80,0xb8
 
+# GFX12: s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_DATA) ; encoding: [0x0a,0xf8,0x80,0xb8]
+0x0a,0xf8,0x80,0xb8
+
+# GFX12: s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_PC_LO) ; encoding: [0x0b,0xf8,0x80,0xb8]
+0x0b,0xf8,0x80,0xb8
+
+# GFX12: s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_PC_HI) ; encoding: [0x0c,0xf8,0x80,0xb8]
+0x0c,0xf8,0x80,0xb8
+
+# GFX12: s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_DATA1) ; encoding: [0x0f,0xf8,0x80,0xb8]
+0x0f,0xf8,0x80,0xb8
+
+# GFX12: s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_DATA2) ; encoding: [0x10,0xf8,0x80,0xb8]
+0x10,0xf8,0x80,0xb8
+
+# GFX12: s_getreg_b32 s0, hwreg(HW_REG_EXCP_FLAG_PRIV) ; encoding: [0x11,0xf8,0x80,0xb8]
+0x11,0xf8,0x80,0xb8
+
+# GFX12: s_getreg_b32 s0, hwreg(HW_REG_EXCP_FLAG_USER) ; encoding: [0x12,0xf8,0x80,0xb8]
+0x12,0xf8,0x80,0xb8
+
+# GFX12: s_getreg_b32 s0, hwreg(HW_REG_TRAP_CTRL) ; encoding: [0x13,0xf8,0x80,0xb8]
+0x13,0xf8,0x80,0xb8
+
+# GFX12: s_getreg_b32 s0, hwreg(HW_REG_SCRATCH_BASE_LO) ; encoding: [0x14,0xf8,0x80,0xb8]
+0x14,0xf8,0x80,0xb8
+
+# GFX12: s_getreg_b32 s0, hwreg(HW_REG_SCRATCH_BASE_HI) ; encoding: [0x15,0xf8,0x80,0xb8]
+0x15,0xf8,0x80,0xb8
+
 # GFX12: s_getreg_b32 s0, hwreg(HW_REG_HW_ID1)   ; encoding: [0x17,0xf8,0x80,0xb8]
 0x17,0xf8,0x80,0xb8
 
 # GFX12: s_getreg_b32 s0, hwreg(HW_REG_HW_ID2)   ; encoding: [0x18,0xf8,0x80,0xb8]
 0x18,0xf8,0x80,0xb8
+
+# GFX12: s_getreg_b32 s0, hwreg(HW_REG_DVGPR_ALLOC_LO) ; encoding: [0x1f,0xf8,0x80,0xb8]
+0x1f,0xf8,0x80,0xb8
+
+# GFX12: s_getreg_b32 s0, hwreg(HW_REG_DVGPR_ALLOC_HI) ; encoding: [0x20,0xf8,0x80,0xb8]
+0x20,0xf8,0x80,0xb8
+
+# GFX12: s_getreg_b32 s0, hwreg(HW_REG_SHADER_CYCLES_LO) ; encoding: [0x1d,0xf8,0x80,0xb8]
+0x1d,0xf8,0x80,0xb8
+
+# GFX12: s_getreg_b32 s0, hwreg(HW_REG_SHADER_CYCLES_HI) ; encoding: [0x1e,0xf8,0x80,0xb8]
+0x1e,0xf8,0x80,0xb8

@llvmbot
Copy link
Collaborator

llvmbot commented Dec 5, 2023

@llvm/pr-subscribers-backend-amdgpu

Author: Jay Foad (jayfoad)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/74445.diff

4 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/SIDefines.h (+19-5)
  • (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp (+29-13)
  • (modified) llvm/test/MC/AMDGPU/gfx12_asm_sopk.s (+45)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopk.txt (+51)
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 659ff75e13d01..47dc59e77dc4e 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -478,6 +478,9 @@ enum Id { // HwRegCode, (6) [5:0]
   ID_GPR_ALLOC = 5,
   ID_LDS_ALLOC = 6,
   ID_IB_STS = 7,
+  ID_PERF_SNAPSHOT_DATA_gfx12 = 10,
+  ID_PERF_SNAPSHOT_PC_LO_gfx12 = 11,
+  ID_PERF_SNAPSHOT_PC_HI_gfx12 = 12,
   ID_MEM_BASES = 15,
   ID_TBA_LO = 16,
   ID_TBA_HI = 17,
@@ -489,12 +492,23 @@ enum Id { // HwRegCode, (6) [5:0]
   ID_HW_ID1 = 23,
   ID_HW_ID2 = 24,
   ID_POPS_PACKER = 25,
-  ID_PERF_SNAPSHOT_DATA = 27,
+  ID_PERF_SNAPSHOT_DATA_gfx11 = 27,
   ID_SHADER_CYCLES = 29,
-
-  // Register numbers reused in GFX11+
-  ID_PERF_SNAPSHOT_PC_LO = 18,
-  ID_PERF_SNAPSHOT_PC_HI = 19,
+  ID_SHADER_CYCLES_HI = 30,
+  ID_DVGPR_ALLOC_LO = 31,
+  ID_DVGPR_ALLOC_HI = 32,
+
+  // Register numbers reused in GFX11
+  ID_PERF_SNAPSHOT_PC_LO_gfx11 = 18,
+  ID_PERF_SNAPSHOT_PC_HI_gfx11 = 19,
+
+  // Register numbers reused in GFX12+
+  ID_STATE_PRIV = 4,
+  ID_PERF_SNAPSHOT_DATA1 = 15,
+  ID_PERF_SNAPSHOT_DATA2 = 16,
+  ID_EXCP_FLAG_PRIV = 17,
+  ID_EXCP_FLAG_USER = 18,
+  ID_TRAP_CTRL = 19,
 
   // GFX940 specific registers
   ID_XCC_ID = 20,
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
index ce40d82021cf8..403efd6ffed35 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
@@ -87,41 +87,56 @@ namespace Hwreg {
 
 // Disable lint checking for this block since it makes the table unreadable.
 // NOLINTBEGIN
+// clang-format off
 const CustomOperand<const MCSubtargetInfo &> Opr[] = {
   {{""}},
   {{"HW_REG_MODE"},          ID_MODE},
   {{"HW_REG_STATUS"},        ID_STATUS},
-  {{"HW_REG_TRAPSTS"},       ID_TRAPSTS},
+  {{"HW_REG_TRAPSTS"},       ID_TRAPSTS,     isNotGFX12Plus},
   {{"HW_REG_HW_ID"},         ID_HW_ID,       isNotGFX10Plus},
   {{"HW_REG_GPR_ALLOC"},     ID_GPR_ALLOC},
   {{"HW_REG_LDS_ALLOC"},     ID_LDS_ALLOC},
   {{"HW_REG_IB_STS"},        ID_IB_STS},
   {{""}},
   {{""}},
+  {{"HW_REG_PERF_SNAPSHOT_DATA"},  ID_PERF_SNAPSHOT_DATA_gfx12,  isGFX12Plus},
+  {{"HW_REG_PERF_SNAPSHOT_PC_LO"}, ID_PERF_SNAPSHOT_PC_LO_gfx12, isGFX12Plus},
+  {{"HW_REG_PERF_SNAPSHOT_PC_HI"}, ID_PERF_SNAPSHOT_PC_HI_gfx12, isGFX12Plus},
   {{""}},
   {{""}},
-  {{""}},
-  {{""}},
-  {{""}},
-  {{"HW_REG_SH_MEM_BASES"},  ID_MEM_BASES,   isGFX9Plus},
+  {{"HW_REG_SH_MEM_BASES"},  ID_MEM_BASES,   isGFX9_GFX10_GFX11},
   {{"HW_REG_TBA_LO"},        ID_TBA_LO,      isGFX9_GFX10},
   {{"HW_REG_TBA_HI"},        ID_TBA_HI,      isGFX9_GFX10},
   {{"HW_REG_TMA_LO"},        ID_TMA_LO,      isGFX9_GFX10},
   {{"HW_REG_TMA_HI"},        ID_TMA_HI,      isGFX9_GFX10},
-  {{"HW_REG_FLAT_SCR_LO"},   ID_FLAT_SCR_LO, isGFX10Plus},
-  {{"HW_REG_FLAT_SCR_HI"},   ID_FLAT_SCR_HI, isGFX10Plus},
+  {{"HW_REG_FLAT_SCR_LO"},   ID_FLAT_SCR_LO, isGFX10_GFX11},
+  {{"HW_REG_FLAT_SCR_HI"},   ID_FLAT_SCR_HI, isGFX10_GFX11},
   {{"HW_REG_XNACK_MASK"},    ID_XNACK_MASK,  isGFX10Before1030},
   {{"HW_REG_HW_ID1"},        ID_HW_ID1,      isGFX10Plus},
   {{"HW_REG_HW_ID2"},        ID_HW_ID2,      isGFX10Plus},
   {{"HW_REG_POPS_PACKER"},   ID_POPS_PACKER, isGFX10},
   {{""}},
-  {{"HW_REG_PERF_SNAPSHOT_DATA"}, ID_PERF_SNAPSHOT_DATA, isGFX11Plus},
+  {{"HW_REG_PERF_SNAPSHOT_DATA"}, ID_PERF_SNAPSHOT_DATA_gfx11, isGFX11},
   {{""}},
-  {{"HW_REG_SHADER_CYCLES"}, ID_SHADER_CYCLES, isGFX10_BEncoding},
-
-  // Register numbers reused in GFX11+
-  {{"HW_REG_PERF_SNAPSHOT_PC_LO"}, ID_PERF_SNAPSHOT_PC_LO, isGFX11Plus},
-  {{"HW_REG_PERF_SNAPSHOT_PC_HI"}, ID_PERF_SNAPSHOT_PC_HI, isGFX11Plus},
+  {{"HW_REG_SHADER_CYCLES"},    ID_SHADER_CYCLES,    isGFX10_3_GFX11},
+  {{"HW_REG_SHADER_CYCLES_HI"}, ID_SHADER_CYCLES_HI, isGFX12Plus},
+  {{"HW_REG_DVGPR_ALLOC_LO"},   ID_DVGPR_ALLOC_LO,   isGFX12Plus},
+  {{"HW_REG_DVGPR_ALLOC_HI"},   ID_DVGPR_ALLOC_HI,   isGFX12Plus},
+
+  // Register numbers reused in GFX11
+  {{"HW_REG_PERF_SNAPSHOT_PC_LO"}, ID_PERF_SNAPSHOT_PC_LO_gfx11, isGFX11},
+  {{"HW_REG_PERF_SNAPSHOT_PC_HI"}, ID_PERF_SNAPSHOT_PC_HI_gfx11, isGFX11},
+
+  // Register numbers reused in GFX12+
+  {{"HW_REG_STATE_PRIV"},          ID_STATE_PRIV,          isGFX12Plus},
+  {{"HW_REG_PERF_SNAPSHOT_DATA1"}, ID_PERF_SNAPSHOT_DATA1, isGFX12Plus},
+  {{"HW_REG_PERF_SNAPSHOT_DATA2"}, ID_PERF_SNAPSHOT_DATA2, isGFX12Plus},
+  {{"HW_REG_EXCP_FLAG_PRIV"},      ID_EXCP_FLAG_PRIV,      isGFX12Plus},
+  {{"HW_REG_EXCP_FLAG_USER"},      ID_EXCP_FLAG_USER,      isGFX12Plus},
+  {{"HW_REG_TRAP_CTRL"},           ID_TRAP_CTRL,           isGFX12Plus},
+  {{"HW_REG_SCRATCH_BASE_LO"},     ID_FLAT_SCR_LO,         isGFX12Plus},
+  {{"HW_REG_SCRATCH_BASE_HI"},     ID_FLAT_SCR_HI,         isGFX12Plus},
+  {{"HW_REG_SHADER_CYCLES_LO"},    ID_SHADER_CYCLES,       isGFX12Plus},
 
   // GFX940 specific registers
   {{"HW_REG_XCC_ID"},                 ID_XCC_ID,                 isGFX940},
@@ -133,6 +148,7 @@ const CustomOperand<const MCSubtargetInfo &> Opr[] = {
   // Aliases
   {{"HW_REG_HW_ID"},                  ID_HW_ID1,                 isGFX10},
 };
+// clang-format on
 // NOLINTEND
 
 const int OPR_SIZE = static_cast<int>(
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_sopk.s b/llvm/test/MC/AMDGPU/gfx12_asm_sopk.s
index 8eb4ebd385a77..3d8d59cfada32 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_sopk.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_sopk.s
@@ -183,6 +183,9 @@ s_getreg_b32 s0, hwreg(HW_REG_MODE)
 s_getreg_b32 s0, hwreg(HW_REG_STATUS)
 // GFX12: encoding: [0x02,0xf8,0x80,0xb8]
 
+s_getreg_b32 s0, hwreg(HW_REG_STATE_PRIV)
+// GFX12: encoding: [0x04,0xf8,0x80,0xb8]
+
 s_getreg_b32 s0, hwreg(HW_REG_GPR_ALLOC)
 // GFX12: encoding: [0x05,0xf8,0x80,0xb8]
 
@@ -192,8 +195,50 @@ s_getreg_b32 s0, hwreg(HW_REG_LDS_ALLOC)
 s_getreg_b32 s0, hwreg(HW_REG_IB_STS)
 // GFX12: encoding: [0x07,0xf8,0x80,0xb8]
 
+s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_DATA)
+// GFX12: encoding: [0x0a,0xf8,0x80,0xb8]
+
+s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_PC_LO)
+// GFX12: encoding: [0x0b,0xf8,0x80,0xb8]
+
+s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_PC_HI)
+// GFX12: encoding: [0x0c,0xf8,0x80,0xb8]
+
+s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_DATA1)
+// GFX12: encoding: [0x0f,0xf8,0x80,0xb8]
+
+s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_DATA2)
+// GFX12: encoding: [0x10,0xf8,0x80,0xb8]
+
+s_getreg_b32 s0, hwreg(HW_REG_EXCP_FLAG_PRIV)
+// GFX12: encoding: [0x11,0xf8,0x80,0xb8]
+
+s_getreg_b32 s0, hwreg(HW_REG_EXCP_FLAG_USER)
+// GFX12: encoding: [0x12,0xf8,0x80,0xb8]
+
+s_getreg_b32 s0, hwreg(HW_REG_TRAP_CTRL)
+// GFX12: encoding: [0x13,0xf8,0x80,0xb8]
+
+s_getreg_b32 s0, hwreg(HW_REG_SCRATCH_BASE_LO)
+// GFX12: encoding: [0x14,0xf8,0x80,0xb8]
+
+s_getreg_b32 s0, hwreg(HW_REG_SCRATCH_BASE_HI)
+// GFX12: encoding: [0x15,0xf8,0x80,0xb8]
+
 s_getreg_b32 s0, hwreg(HW_REG_HW_ID1)
 // GFX12: encoding: [0x17,0xf8,0x80,0xb8]
 
 s_getreg_b32 s0, hwreg(HW_REG_HW_ID2)
 // GFX12: encoding: [0x18,0xf8,0x80,0xb8]
+
+s_getreg_b32 s0, hwreg(HW_REG_DVGPR_ALLOC_LO)
+// GFX12: encoding: [0x1f,0xf8,0x80,0xb8]
+
+s_getreg_b32 s0, hwreg(HW_REG_DVGPR_ALLOC_HI)
+// GFX12: encoding: [0x20,0xf8,0x80,0xb8]
+
+s_getreg_b32 s0, hwreg(HW_REG_SHADER_CYCLES_LO)
+// GFX12: encoding: [0x1d,0xf8,0x80,0xb8]
+
+s_getreg_b32 s0, hwreg(HW_REG_SHADER_CYCLES_HI)
+// GFX12: encoding: [0x1e,0xf8,0x80,0xb8]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopk.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopk.txt
index fc840e971b55e..ebf65a45d1765 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopk.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopk.txt
@@ -76,6 +76,9 @@
 # GFX12: s_getreg_b32 s0, hwreg(52, 8, 3)        ; encoding: [0x34,0x12,0x80,0xb8]
 0x34,0x12,0x80,0xb8
 
+# GFX12: s_getreg_b32 s0, hwreg(HW_REG_EXCP_FLAG_PRIV, 7, 25) ; encoding: [0xd1,0xc1,0x80,0xb8]
+0xd1,0xc1,0x80,0xb8
+
 # GFX12: s_getreg_b32 s105, hwreg(52, 8, 3)      ; encoding: [0x34,0x12,0xe9,0xb8]
 0x34,0x12,0xe9,0xb8
 
@@ -154,6 +157,9 @@
 # GFX12: s_setreg_b32 hwreg(52, 8, 3), vcc_lo    ; encoding: [0x34,0x12,0x6a,0xb9]
 0x34,0x12,0x6a,0xb9
 
+# GFX12: s_setreg_b32 hwreg(HW_REG_EXCP_FLAG_PRIV, 7, 25), s0 ; encoding: [0xd1,0xc1,0x00,0xb9]
+0xd1,0xc1,0x00,0xb9
+
 # GFX12: s_version 0x1234                        ; encoding: [0x34,0x12,0x80,0xb0]
 0x34,0x12,0x80,0xb0
 
@@ -190,6 +196,9 @@
 # GFX12: s_getreg_b32 s0, hwreg(HW_REG_STATUS)   ; encoding: [0x02,0xf8,0x80,0xb8]
 0x02,0xf8,0x80,0xb8
 
+# GFX12: s_getreg_b32 s0, hwreg(HW_REG_STATE_PRIV) ; encoding: [0x04,0xf8,0x80,0xb8]
+0x04,0xf8,0x80,0xb8
+
 # GFX12: s_getreg_b32 s0, hwreg(HW_REG_GPR_ALLOC) ; encoding: [0x05,0xf8,0x80,0xb8]
 0x05,0xf8,0x80,0xb8
 
@@ -199,8 +208,50 @@
 # GFX12: s_getreg_b32 s0, hwreg(HW_REG_IB_STS)   ; encoding: [0x07,0xf8,0x80,0xb8]
 0x07,0xf8,0x80,0xb8
 
+# GFX12: s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_DATA) ; encoding: [0x0a,0xf8,0x80,0xb8]
+0x0a,0xf8,0x80,0xb8
+
+# GFX12: s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_PC_LO) ; encoding: [0x0b,0xf8,0x80,0xb8]
+0x0b,0xf8,0x80,0xb8
+
+# GFX12: s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_PC_HI) ; encoding: [0x0c,0xf8,0x80,0xb8]
+0x0c,0xf8,0x80,0xb8
+
+# GFX12: s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_DATA1) ; encoding: [0x0f,0xf8,0x80,0xb8]
+0x0f,0xf8,0x80,0xb8
+
+# GFX12: s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_DATA2) ; encoding: [0x10,0xf8,0x80,0xb8]
+0x10,0xf8,0x80,0xb8
+
+# GFX12: s_getreg_b32 s0, hwreg(HW_REG_EXCP_FLAG_PRIV) ; encoding: [0x11,0xf8,0x80,0xb8]
+0x11,0xf8,0x80,0xb8
+
+# GFX12: s_getreg_b32 s0, hwreg(HW_REG_EXCP_FLAG_USER) ; encoding: [0x12,0xf8,0x80,0xb8]
+0x12,0xf8,0x80,0xb8
+
+# GFX12: s_getreg_b32 s0, hwreg(HW_REG_TRAP_CTRL) ; encoding: [0x13,0xf8,0x80,0xb8]
+0x13,0xf8,0x80,0xb8
+
+# GFX12: s_getreg_b32 s0, hwreg(HW_REG_SCRATCH_BASE_LO) ; encoding: [0x14,0xf8,0x80,0xb8]
+0x14,0xf8,0x80,0xb8
+
+# GFX12: s_getreg_b32 s0, hwreg(HW_REG_SCRATCH_BASE_HI) ; encoding: [0x15,0xf8,0x80,0xb8]
+0x15,0xf8,0x80,0xb8
+
 # GFX12: s_getreg_b32 s0, hwreg(HW_REG_HW_ID1)   ; encoding: [0x17,0xf8,0x80,0xb8]
 0x17,0xf8,0x80,0xb8
 
 # GFX12: s_getreg_b32 s0, hwreg(HW_REG_HW_ID2)   ; encoding: [0x18,0xf8,0x80,0xb8]
 0x18,0xf8,0x80,0xb8
+
+# GFX12: s_getreg_b32 s0, hwreg(HW_REG_DVGPR_ALLOC_LO) ; encoding: [0x1f,0xf8,0x80,0xb8]
+0x1f,0xf8,0x80,0xb8
+
+# GFX12: s_getreg_b32 s0, hwreg(HW_REG_DVGPR_ALLOC_HI) ; encoding: [0x20,0xf8,0x80,0xb8]
+0x20,0xf8,0x80,0xb8
+
+# GFX12: s_getreg_b32 s0, hwreg(HW_REG_SHADER_CYCLES_LO) ; encoding: [0x1d,0xf8,0x80,0xb8]
+0x1d,0xf8,0x80,0xb8
+
+# GFX12: s_getreg_b32 s0, hwreg(HW_REG_SHADER_CYCLES_HI) ; encoding: [0x1e,0xf8,0x80,0xb8]
+0x1e,0xf8,0x80,0xb8

@jayfoad jayfoad merged commit 8d4977a into llvm:main Dec 6, 2023
5 checks passed
@jayfoad jayfoad deleted the gfx12-hardware-registers branch December 6, 2023 10:04
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:AMDGPU mc Machine (object) code
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

4 participants