diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index 659ff75e13d01..47dc59e77dc4e 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -478,6 +478,9 @@ enum Id { // HwRegCode, (6) [5:0] ID_GPR_ALLOC = 5, ID_LDS_ALLOC = 6, ID_IB_STS = 7, + ID_PERF_SNAPSHOT_DATA_gfx12 = 10, + ID_PERF_SNAPSHOT_PC_LO_gfx12 = 11, + ID_PERF_SNAPSHOT_PC_HI_gfx12 = 12, ID_MEM_BASES = 15, ID_TBA_LO = 16, ID_TBA_HI = 17, @@ -489,12 +492,23 @@ enum Id { // HwRegCode, (6) [5:0] ID_HW_ID1 = 23, ID_HW_ID2 = 24, ID_POPS_PACKER = 25, - ID_PERF_SNAPSHOT_DATA = 27, + ID_PERF_SNAPSHOT_DATA_gfx11 = 27, ID_SHADER_CYCLES = 29, - - // Register numbers reused in GFX11+ - ID_PERF_SNAPSHOT_PC_LO = 18, - ID_PERF_SNAPSHOT_PC_HI = 19, + ID_SHADER_CYCLES_HI = 30, + ID_DVGPR_ALLOC_LO = 31, + ID_DVGPR_ALLOC_HI = 32, + + // Register numbers reused in GFX11 + ID_PERF_SNAPSHOT_PC_LO_gfx11 = 18, + ID_PERF_SNAPSHOT_PC_HI_gfx11 = 19, + + // Register numbers reused in GFX12+ + ID_STATE_PRIV = 4, + ID_PERF_SNAPSHOT_DATA1 = 15, + ID_PERF_SNAPSHOT_DATA2 = 16, + ID_EXCP_FLAG_PRIV = 17, + ID_EXCP_FLAG_USER = 18, + ID_TRAP_CTRL = 19, // GFX940 specific registers ID_XCC_ID = 20, diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp index ce40d82021cf8..403efd6ffed35 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp @@ -87,41 +87,56 @@ namespace Hwreg { // Disable lint checking for this block since it makes the table unreadable. // NOLINTBEGIN +// clang-format off const CustomOperand Opr[] = { {{""}}, {{"HW_REG_MODE"}, ID_MODE}, {{"HW_REG_STATUS"}, ID_STATUS}, - {{"HW_REG_TRAPSTS"}, ID_TRAPSTS}, + {{"HW_REG_TRAPSTS"}, ID_TRAPSTS, isNotGFX12Plus}, {{"HW_REG_HW_ID"}, ID_HW_ID, isNotGFX10Plus}, {{"HW_REG_GPR_ALLOC"}, ID_GPR_ALLOC}, {{"HW_REG_LDS_ALLOC"}, ID_LDS_ALLOC}, {{"HW_REG_IB_STS"}, ID_IB_STS}, {{""}}, {{""}}, + {{"HW_REG_PERF_SNAPSHOT_DATA"}, ID_PERF_SNAPSHOT_DATA_gfx12, isGFX12Plus}, + {{"HW_REG_PERF_SNAPSHOT_PC_LO"}, ID_PERF_SNAPSHOT_PC_LO_gfx12, isGFX12Plus}, + {{"HW_REG_PERF_SNAPSHOT_PC_HI"}, ID_PERF_SNAPSHOT_PC_HI_gfx12, isGFX12Plus}, {{""}}, {{""}}, - {{""}}, - {{""}}, - {{""}}, - {{"HW_REG_SH_MEM_BASES"}, ID_MEM_BASES, isGFX9Plus}, + {{"HW_REG_SH_MEM_BASES"}, ID_MEM_BASES, isGFX9_GFX10_GFX11}, {{"HW_REG_TBA_LO"}, ID_TBA_LO, isGFX9_GFX10}, {{"HW_REG_TBA_HI"}, ID_TBA_HI, isGFX9_GFX10}, {{"HW_REG_TMA_LO"}, ID_TMA_LO, isGFX9_GFX10}, {{"HW_REG_TMA_HI"}, ID_TMA_HI, isGFX9_GFX10}, - {{"HW_REG_FLAT_SCR_LO"}, ID_FLAT_SCR_LO, isGFX10Plus}, - {{"HW_REG_FLAT_SCR_HI"}, ID_FLAT_SCR_HI, isGFX10Plus}, + {{"HW_REG_FLAT_SCR_LO"}, ID_FLAT_SCR_LO, isGFX10_GFX11}, + {{"HW_REG_FLAT_SCR_HI"}, ID_FLAT_SCR_HI, isGFX10_GFX11}, {{"HW_REG_XNACK_MASK"}, ID_XNACK_MASK, isGFX10Before1030}, {{"HW_REG_HW_ID1"}, ID_HW_ID1, isGFX10Plus}, {{"HW_REG_HW_ID2"}, ID_HW_ID2, isGFX10Plus}, {{"HW_REG_POPS_PACKER"}, ID_POPS_PACKER, isGFX10}, {{""}}, - {{"HW_REG_PERF_SNAPSHOT_DATA"}, ID_PERF_SNAPSHOT_DATA, isGFX11Plus}, + {{"HW_REG_PERF_SNAPSHOT_DATA"}, ID_PERF_SNAPSHOT_DATA_gfx11, isGFX11}, {{""}}, - {{"HW_REG_SHADER_CYCLES"}, ID_SHADER_CYCLES, isGFX10_BEncoding}, - - // Register numbers reused in GFX11+ - {{"HW_REG_PERF_SNAPSHOT_PC_LO"}, ID_PERF_SNAPSHOT_PC_LO, isGFX11Plus}, - {{"HW_REG_PERF_SNAPSHOT_PC_HI"}, ID_PERF_SNAPSHOT_PC_HI, isGFX11Plus}, + {{"HW_REG_SHADER_CYCLES"}, ID_SHADER_CYCLES, isGFX10_3_GFX11}, + {{"HW_REG_SHADER_CYCLES_HI"}, ID_SHADER_CYCLES_HI, isGFX12Plus}, + {{"HW_REG_DVGPR_ALLOC_LO"}, ID_DVGPR_ALLOC_LO, isGFX12Plus}, + {{"HW_REG_DVGPR_ALLOC_HI"}, ID_DVGPR_ALLOC_HI, isGFX12Plus}, + + // Register numbers reused in GFX11 + {{"HW_REG_PERF_SNAPSHOT_PC_LO"}, ID_PERF_SNAPSHOT_PC_LO_gfx11, isGFX11}, + {{"HW_REG_PERF_SNAPSHOT_PC_HI"}, ID_PERF_SNAPSHOT_PC_HI_gfx11, isGFX11}, + + // Register numbers reused in GFX12+ + {{"HW_REG_STATE_PRIV"}, ID_STATE_PRIV, isGFX12Plus}, + {{"HW_REG_PERF_SNAPSHOT_DATA1"}, ID_PERF_SNAPSHOT_DATA1, isGFX12Plus}, + {{"HW_REG_PERF_SNAPSHOT_DATA2"}, ID_PERF_SNAPSHOT_DATA2, isGFX12Plus}, + {{"HW_REG_EXCP_FLAG_PRIV"}, ID_EXCP_FLAG_PRIV, isGFX12Plus}, + {{"HW_REG_EXCP_FLAG_USER"}, ID_EXCP_FLAG_USER, isGFX12Plus}, + {{"HW_REG_TRAP_CTRL"}, ID_TRAP_CTRL, isGFX12Plus}, + {{"HW_REG_SCRATCH_BASE_LO"}, ID_FLAT_SCR_LO, isGFX12Plus}, + {{"HW_REG_SCRATCH_BASE_HI"}, ID_FLAT_SCR_HI, isGFX12Plus}, + {{"HW_REG_SHADER_CYCLES_LO"}, ID_SHADER_CYCLES, isGFX12Plus}, // GFX940 specific registers {{"HW_REG_XCC_ID"}, ID_XCC_ID, isGFX940}, @@ -133,6 +148,7 @@ const CustomOperand Opr[] = { // Aliases {{"HW_REG_HW_ID"}, ID_HW_ID1, isGFX10}, }; +// clang-format on // NOLINTEND const int OPR_SIZE = static_cast( diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_sopk.s b/llvm/test/MC/AMDGPU/gfx12_asm_sopk.s index 8eb4ebd385a77..3d8d59cfada32 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_sopk.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_sopk.s @@ -183,6 +183,9 @@ s_getreg_b32 s0, hwreg(HW_REG_MODE) s_getreg_b32 s0, hwreg(HW_REG_STATUS) // GFX12: encoding: [0x02,0xf8,0x80,0xb8] +s_getreg_b32 s0, hwreg(HW_REG_STATE_PRIV) +// GFX12: encoding: [0x04,0xf8,0x80,0xb8] + s_getreg_b32 s0, hwreg(HW_REG_GPR_ALLOC) // GFX12: encoding: [0x05,0xf8,0x80,0xb8] @@ -192,8 +195,50 @@ s_getreg_b32 s0, hwreg(HW_REG_LDS_ALLOC) s_getreg_b32 s0, hwreg(HW_REG_IB_STS) // GFX12: encoding: [0x07,0xf8,0x80,0xb8] +s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_DATA) +// GFX12: encoding: [0x0a,0xf8,0x80,0xb8] + +s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_PC_LO) +// GFX12: encoding: [0x0b,0xf8,0x80,0xb8] + +s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_PC_HI) +// GFX12: encoding: [0x0c,0xf8,0x80,0xb8] + +s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_DATA1) +// GFX12: encoding: [0x0f,0xf8,0x80,0xb8] + +s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_DATA2) +// GFX12: encoding: [0x10,0xf8,0x80,0xb8] + +s_getreg_b32 s0, hwreg(HW_REG_EXCP_FLAG_PRIV) +// GFX12: encoding: [0x11,0xf8,0x80,0xb8] + +s_getreg_b32 s0, hwreg(HW_REG_EXCP_FLAG_USER) +// GFX12: encoding: [0x12,0xf8,0x80,0xb8] + +s_getreg_b32 s0, hwreg(HW_REG_TRAP_CTRL) +// GFX12: encoding: [0x13,0xf8,0x80,0xb8] + +s_getreg_b32 s0, hwreg(HW_REG_SCRATCH_BASE_LO) +// GFX12: encoding: [0x14,0xf8,0x80,0xb8] + +s_getreg_b32 s0, hwreg(HW_REG_SCRATCH_BASE_HI) +// GFX12: encoding: [0x15,0xf8,0x80,0xb8] + s_getreg_b32 s0, hwreg(HW_REG_HW_ID1) // GFX12: encoding: [0x17,0xf8,0x80,0xb8] s_getreg_b32 s0, hwreg(HW_REG_HW_ID2) // GFX12: encoding: [0x18,0xf8,0x80,0xb8] + +s_getreg_b32 s0, hwreg(HW_REG_DVGPR_ALLOC_LO) +// GFX12: encoding: [0x1f,0xf8,0x80,0xb8] + +s_getreg_b32 s0, hwreg(HW_REG_DVGPR_ALLOC_HI) +// GFX12: encoding: [0x20,0xf8,0x80,0xb8] + +s_getreg_b32 s0, hwreg(HW_REG_SHADER_CYCLES_LO) +// GFX12: encoding: [0x1d,0xf8,0x80,0xb8] + +s_getreg_b32 s0, hwreg(HW_REG_SHADER_CYCLES_HI) +// GFX12: encoding: [0x1e,0xf8,0x80,0xb8] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopk.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopk.txt index fc840e971b55e..ebf65a45d1765 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopk.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopk.txt @@ -76,6 +76,9 @@ # GFX12: s_getreg_b32 s0, hwreg(52, 8, 3) ; encoding: [0x34,0x12,0x80,0xb8] 0x34,0x12,0x80,0xb8 +# GFX12: s_getreg_b32 s0, hwreg(HW_REG_EXCP_FLAG_PRIV, 7, 25) ; encoding: [0xd1,0xc1,0x80,0xb8] +0xd1,0xc1,0x80,0xb8 + # GFX12: s_getreg_b32 s105, hwreg(52, 8, 3) ; encoding: [0x34,0x12,0xe9,0xb8] 0x34,0x12,0xe9,0xb8 @@ -154,6 +157,9 @@ # GFX12: s_setreg_b32 hwreg(52, 8, 3), vcc_lo ; encoding: [0x34,0x12,0x6a,0xb9] 0x34,0x12,0x6a,0xb9 +# GFX12: s_setreg_b32 hwreg(HW_REG_EXCP_FLAG_PRIV, 7, 25), s0 ; encoding: [0xd1,0xc1,0x00,0xb9] +0xd1,0xc1,0x00,0xb9 + # GFX12: s_version 0x1234 ; encoding: [0x34,0x12,0x80,0xb0] 0x34,0x12,0x80,0xb0 @@ -190,6 +196,9 @@ # GFX12: s_getreg_b32 s0, hwreg(HW_REG_STATUS) ; encoding: [0x02,0xf8,0x80,0xb8] 0x02,0xf8,0x80,0xb8 +# GFX12: s_getreg_b32 s0, hwreg(HW_REG_STATE_PRIV) ; encoding: [0x04,0xf8,0x80,0xb8] +0x04,0xf8,0x80,0xb8 + # GFX12: s_getreg_b32 s0, hwreg(HW_REG_GPR_ALLOC) ; encoding: [0x05,0xf8,0x80,0xb8] 0x05,0xf8,0x80,0xb8 @@ -199,8 +208,50 @@ # GFX12: s_getreg_b32 s0, hwreg(HW_REG_IB_STS) ; encoding: [0x07,0xf8,0x80,0xb8] 0x07,0xf8,0x80,0xb8 +# GFX12: s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_DATA) ; encoding: [0x0a,0xf8,0x80,0xb8] +0x0a,0xf8,0x80,0xb8 + +# GFX12: s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_PC_LO) ; encoding: [0x0b,0xf8,0x80,0xb8] +0x0b,0xf8,0x80,0xb8 + +# GFX12: s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_PC_HI) ; encoding: [0x0c,0xf8,0x80,0xb8] +0x0c,0xf8,0x80,0xb8 + +# GFX12: s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_DATA1) ; encoding: [0x0f,0xf8,0x80,0xb8] +0x0f,0xf8,0x80,0xb8 + +# GFX12: s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_DATA2) ; encoding: [0x10,0xf8,0x80,0xb8] +0x10,0xf8,0x80,0xb8 + +# GFX12: s_getreg_b32 s0, hwreg(HW_REG_EXCP_FLAG_PRIV) ; encoding: [0x11,0xf8,0x80,0xb8] +0x11,0xf8,0x80,0xb8 + +# GFX12: s_getreg_b32 s0, hwreg(HW_REG_EXCP_FLAG_USER) ; encoding: [0x12,0xf8,0x80,0xb8] +0x12,0xf8,0x80,0xb8 + +# GFX12: s_getreg_b32 s0, hwreg(HW_REG_TRAP_CTRL) ; encoding: [0x13,0xf8,0x80,0xb8] +0x13,0xf8,0x80,0xb8 + +# GFX12: s_getreg_b32 s0, hwreg(HW_REG_SCRATCH_BASE_LO) ; encoding: [0x14,0xf8,0x80,0xb8] +0x14,0xf8,0x80,0xb8 + +# GFX12: s_getreg_b32 s0, hwreg(HW_REG_SCRATCH_BASE_HI) ; encoding: [0x15,0xf8,0x80,0xb8] +0x15,0xf8,0x80,0xb8 + # GFX12: s_getreg_b32 s0, hwreg(HW_REG_HW_ID1) ; encoding: [0x17,0xf8,0x80,0xb8] 0x17,0xf8,0x80,0xb8 # GFX12: s_getreg_b32 s0, hwreg(HW_REG_HW_ID2) ; encoding: [0x18,0xf8,0x80,0xb8] 0x18,0xf8,0x80,0xb8 + +# GFX12: s_getreg_b32 s0, hwreg(HW_REG_DVGPR_ALLOC_LO) ; encoding: [0x1f,0xf8,0x80,0xb8] +0x1f,0xf8,0x80,0xb8 + +# GFX12: s_getreg_b32 s0, hwreg(HW_REG_DVGPR_ALLOC_HI) ; encoding: [0x20,0xf8,0x80,0xb8] +0x20,0xf8,0x80,0xb8 + +# GFX12: s_getreg_b32 s0, hwreg(HW_REG_SHADER_CYCLES_LO) ; encoding: [0x1d,0xf8,0x80,0xb8] +0x1d,0xf8,0x80,0xb8 + +# GFX12: s_getreg_b32 s0, hwreg(HW_REG_SHADER_CYCLES_HI) ; encoding: [0x1e,0xf8,0x80,0xb8] +0x1e,0xf8,0x80,0xb8