Skip to content

Commit

Permalink
[AMDGPU] Update hardware registers for GFX12 (#74445)
Browse files Browse the repository at this point in the history
  • Loading branch information
jayfoad committed Dec 6, 2023
1 parent ecd2f56 commit 8d4977a
Show file tree
Hide file tree
Showing 4 changed files with 144 additions and 18 deletions.
24 changes: 19 additions & 5 deletions llvm/lib/Target/AMDGPU/SIDefines.h
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,9 @@ enum Id { // HwRegCode, (6) [5:0]
ID_GPR_ALLOC = 5,
ID_LDS_ALLOC = 6,
ID_IB_STS = 7,
ID_PERF_SNAPSHOT_DATA_gfx12 = 10,
ID_PERF_SNAPSHOT_PC_LO_gfx12 = 11,
ID_PERF_SNAPSHOT_PC_HI_gfx12 = 12,
ID_MEM_BASES = 15,
ID_TBA_LO = 16,
ID_TBA_HI = 17,
Expand All @@ -489,12 +492,23 @@ enum Id { // HwRegCode, (6) [5:0]
ID_HW_ID1 = 23,
ID_HW_ID2 = 24,
ID_POPS_PACKER = 25,
ID_PERF_SNAPSHOT_DATA = 27,
ID_PERF_SNAPSHOT_DATA_gfx11 = 27,
ID_SHADER_CYCLES = 29,

// Register numbers reused in GFX11+
ID_PERF_SNAPSHOT_PC_LO = 18,
ID_PERF_SNAPSHOT_PC_HI = 19,
ID_SHADER_CYCLES_HI = 30,
ID_DVGPR_ALLOC_LO = 31,
ID_DVGPR_ALLOC_HI = 32,

// Register numbers reused in GFX11
ID_PERF_SNAPSHOT_PC_LO_gfx11 = 18,
ID_PERF_SNAPSHOT_PC_HI_gfx11 = 19,

// Register numbers reused in GFX12+
ID_STATE_PRIV = 4,
ID_PERF_SNAPSHOT_DATA1 = 15,
ID_PERF_SNAPSHOT_DATA2 = 16,
ID_EXCP_FLAG_PRIV = 17,
ID_EXCP_FLAG_USER = 18,
ID_TRAP_CTRL = 19,

// GFX940 specific registers
ID_XCC_ID = 20,
Expand Down
42 changes: 29 additions & 13 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,41 +87,56 @@ namespace Hwreg {

// Disable lint checking for this block since it makes the table unreadable.
// NOLINTBEGIN
// clang-format off
const CustomOperand<const MCSubtargetInfo &> Opr[] = {
{{""}},
{{"HW_REG_MODE"}, ID_MODE},
{{"HW_REG_STATUS"}, ID_STATUS},
{{"HW_REG_TRAPSTS"}, ID_TRAPSTS},
{{"HW_REG_TRAPSTS"}, ID_TRAPSTS, isNotGFX12Plus},
{{"HW_REG_HW_ID"}, ID_HW_ID, isNotGFX10Plus},
{{"HW_REG_GPR_ALLOC"}, ID_GPR_ALLOC},
{{"HW_REG_LDS_ALLOC"}, ID_LDS_ALLOC},
{{"HW_REG_IB_STS"}, ID_IB_STS},
{{""}},
{{""}},
{{"HW_REG_PERF_SNAPSHOT_DATA"}, ID_PERF_SNAPSHOT_DATA_gfx12, isGFX12Plus},
{{"HW_REG_PERF_SNAPSHOT_PC_LO"}, ID_PERF_SNAPSHOT_PC_LO_gfx12, isGFX12Plus},
{{"HW_REG_PERF_SNAPSHOT_PC_HI"}, ID_PERF_SNAPSHOT_PC_HI_gfx12, isGFX12Plus},
{{""}},
{{""}},
{{""}},
{{""}},
{{""}},
{{"HW_REG_SH_MEM_BASES"}, ID_MEM_BASES, isGFX9Plus},
{{"HW_REG_SH_MEM_BASES"}, ID_MEM_BASES, isGFX9_GFX10_GFX11},
{{"HW_REG_TBA_LO"}, ID_TBA_LO, isGFX9_GFX10},
{{"HW_REG_TBA_HI"}, ID_TBA_HI, isGFX9_GFX10},
{{"HW_REG_TMA_LO"}, ID_TMA_LO, isGFX9_GFX10},
{{"HW_REG_TMA_HI"}, ID_TMA_HI, isGFX9_GFX10},
{{"HW_REG_FLAT_SCR_LO"}, ID_FLAT_SCR_LO, isGFX10Plus},
{{"HW_REG_FLAT_SCR_HI"}, ID_FLAT_SCR_HI, isGFX10Plus},
{{"HW_REG_FLAT_SCR_LO"}, ID_FLAT_SCR_LO, isGFX10_GFX11},
{{"HW_REG_FLAT_SCR_HI"}, ID_FLAT_SCR_HI, isGFX10_GFX11},
{{"HW_REG_XNACK_MASK"}, ID_XNACK_MASK, isGFX10Before1030},
{{"HW_REG_HW_ID1"}, ID_HW_ID1, isGFX10Plus},
{{"HW_REG_HW_ID2"}, ID_HW_ID2, isGFX10Plus},
{{"HW_REG_POPS_PACKER"}, ID_POPS_PACKER, isGFX10},
{{""}},
{{"HW_REG_PERF_SNAPSHOT_DATA"}, ID_PERF_SNAPSHOT_DATA, isGFX11Plus},
{{"HW_REG_PERF_SNAPSHOT_DATA"}, ID_PERF_SNAPSHOT_DATA_gfx11, isGFX11},
{{""}},
{{"HW_REG_SHADER_CYCLES"}, ID_SHADER_CYCLES, isGFX10_BEncoding},

// Register numbers reused in GFX11+
{{"HW_REG_PERF_SNAPSHOT_PC_LO"}, ID_PERF_SNAPSHOT_PC_LO, isGFX11Plus},
{{"HW_REG_PERF_SNAPSHOT_PC_HI"}, ID_PERF_SNAPSHOT_PC_HI, isGFX11Plus},
{{"HW_REG_SHADER_CYCLES"}, ID_SHADER_CYCLES, isGFX10_3_GFX11},
{{"HW_REG_SHADER_CYCLES_HI"}, ID_SHADER_CYCLES_HI, isGFX12Plus},
{{"HW_REG_DVGPR_ALLOC_LO"}, ID_DVGPR_ALLOC_LO, isGFX12Plus},
{{"HW_REG_DVGPR_ALLOC_HI"}, ID_DVGPR_ALLOC_HI, isGFX12Plus},

// Register numbers reused in GFX11
{{"HW_REG_PERF_SNAPSHOT_PC_LO"}, ID_PERF_SNAPSHOT_PC_LO_gfx11, isGFX11},
{{"HW_REG_PERF_SNAPSHOT_PC_HI"}, ID_PERF_SNAPSHOT_PC_HI_gfx11, isGFX11},

// Register numbers reused in GFX12+
{{"HW_REG_STATE_PRIV"}, ID_STATE_PRIV, isGFX12Plus},
{{"HW_REG_PERF_SNAPSHOT_DATA1"}, ID_PERF_SNAPSHOT_DATA1, isGFX12Plus},
{{"HW_REG_PERF_SNAPSHOT_DATA2"}, ID_PERF_SNAPSHOT_DATA2, isGFX12Plus},
{{"HW_REG_EXCP_FLAG_PRIV"}, ID_EXCP_FLAG_PRIV, isGFX12Plus},
{{"HW_REG_EXCP_FLAG_USER"}, ID_EXCP_FLAG_USER, isGFX12Plus},
{{"HW_REG_TRAP_CTRL"}, ID_TRAP_CTRL, isGFX12Plus},
{{"HW_REG_SCRATCH_BASE_LO"}, ID_FLAT_SCR_LO, isGFX12Plus},
{{"HW_REG_SCRATCH_BASE_HI"}, ID_FLAT_SCR_HI, isGFX12Plus},
{{"HW_REG_SHADER_CYCLES_LO"}, ID_SHADER_CYCLES, isGFX12Plus},

// GFX940 specific registers
{{"HW_REG_XCC_ID"}, ID_XCC_ID, isGFX940},
Expand All @@ -133,6 +148,7 @@ const CustomOperand<const MCSubtargetInfo &> Opr[] = {
// Aliases
{{"HW_REG_HW_ID"}, ID_HW_ID1, isGFX10},
};
// clang-format on
// NOLINTEND

const int OPR_SIZE = static_cast<int>(
Expand Down
45 changes: 45 additions & 0 deletions llvm/test/MC/AMDGPU/gfx12_asm_sopk.s
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,9 @@ s_getreg_b32 s0, hwreg(HW_REG_MODE)
s_getreg_b32 s0, hwreg(HW_REG_STATUS)
// GFX12: encoding: [0x02,0xf8,0x80,0xb8]

s_getreg_b32 s0, hwreg(HW_REG_STATE_PRIV)
// GFX12: encoding: [0x04,0xf8,0x80,0xb8]

s_getreg_b32 s0, hwreg(HW_REG_GPR_ALLOC)
// GFX12: encoding: [0x05,0xf8,0x80,0xb8]

Expand All @@ -192,8 +195,50 @@ s_getreg_b32 s0, hwreg(HW_REG_LDS_ALLOC)
s_getreg_b32 s0, hwreg(HW_REG_IB_STS)
// GFX12: encoding: [0x07,0xf8,0x80,0xb8]

s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_DATA)
// GFX12: encoding: [0x0a,0xf8,0x80,0xb8]

s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_PC_LO)
// GFX12: encoding: [0x0b,0xf8,0x80,0xb8]

s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_PC_HI)
// GFX12: encoding: [0x0c,0xf8,0x80,0xb8]

s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_DATA1)
// GFX12: encoding: [0x0f,0xf8,0x80,0xb8]

s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_DATA2)
// GFX12: encoding: [0x10,0xf8,0x80,0xb8]

s_getreg_b32 s0, hwreg(HW_REG_EXCP_FLAG_PRIV)
// GFX12: encoding: [0x11,0xf8,0x80,0xb8]

s_getreg_b32 s0, hwreg(HW_REG_EXCP_FLAG_USER)
// GFX12: encoding: [0x12,0xf8,0x80,0xb8]

s_getreg_b32 s0, hwreg(HW_REG_TRAP_CTRL)
// GFX12: encoding: [0x13,0xf8,0x80,0xb8]

s_getreg_b32 s0, hwreg(HW_REG_SCRATCH_BASE_LO)
// GFX12: encoding: [0x14,0xf8,0x80,0xb8]

s_getreg_b32 s0, hwreg(HW_REG_SCRATCH_BASE_HI)
// GFX12: encoding: [0x15,0xf8,0x80,0xb8]

s_getreg_b32 s0, hwreg(HW_REG_HW_ID1)
// GFX12: encoding: [0x17,0xf8,0x80,0xb8]

s_getreg_b32 s0, hwreg(HW_REG_HW_ID2)
// GFX12: encoding: [0x18,0xf8,0x80,0xb8]

s_getreg_b32 s0, hwreg(HW_REG_DVGPR_ALLOC_LO)
// GFX12: encoding: [0x1f,0xf8,0x80,0xb8]

s_getreg_b32 s0, hwreg(HW_REG_DVGPR_ALLOC_HI)
// GFX12: encoding: [0x20,0xf8,0x80,0xb8]

s_getreg_b32 s0, hwreg(HW_REG_SHADER_CYCLES_LO)
// GFX12: encoding: [0x1d,0xf8,0x80,0xb8]

s_getreg_b32 s0, hwreg(HW_REG_SHADER_CYCLES_HI)
// GFX12: encoding: [0x1e,0xf8,0x80,0xb8]
51 changes: 51 additions & 0 deletions llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopk.txt
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@
# GFX12: s_getreg_b32 s0, hwreg(52, 8, 3) ; encoding: [0x34,0x12,0x80,0xb8]
0x34,0x12,0x80,0xb8

# GFX12: s_getreg_b32 s0, hwreg(HW_REG_EXCP_FLAG_PRIV, 7, 25) ; encoding: [0xd1,0xc1,0x80,0xb8]
0xd1,0xc1,0x80,0xb8

# GFX12: s_getreg_b32 s105, hwreg(52, 8, 3) ; encoding: [0x34,0x12,0xe9,0xb8]
0x34,0x12,0xe9,0xb8

Expand Down Expand Up @@ -154,6 +157,9 @@
# GFX12: s_setreg_b32 hwreg(52, 8, 3), vcc_lo ; encoding: [0x34,0x12,0x6a,0xb9]
0x34,0x12,0x6a,0xb9

# GFX12: s_setreg_b32 hwreg(HW_REG_EXCP_FLAG_PRIV, 7, 25), s0 ; encoding: [0xd1,0xc1,0x00,0xb9]
0xd1,0xc1,0x00,0xb9

# GFX12: s_version 0x1234 ; encoding: [0x34,0x12,0x80,0xb0]
0x34,0x12,0x80,0xb0

Expand Down Expand Up @@ -190,6 +196,9 @@
# GFX12: s_getreg_b32 s0, hwreg(HW_REG_STATUS) ; encoding: [0x02,0xf8,0x80,0xb8]
0x02,0xf8,0x80,0xb8

# GFX12: s_getreg_b32 s0, hwreg(HW_REG_STATE_PRIV) ; encoding: [0x04,0xf8,0x80,0xb8]
0x04,0xf8,0x80,0xb8

# GFX12: s_getreg_b32 s0, hwreg(HW_REG_GPR_ALLOC) ; encoding: [0x05,0xf8,0x80,0xb8]
0x05,0xf8,0x80,0xb8

Expand All @@ -199,8 +208,50 @@
# GFX12: s_getreg_b32 s0, hwreg(HW_REG_IB_STS) ; encoding: [0x07,0xf8,0x80,0xb8]
0x07,0xf8,0x80,0xb8

# GFX12: s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_DATA) ; encoding: [0x0a,0xf8,0x80,0xb8]
0x0a,0xf8,0x80,0xb8

# GFX12: s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_PC_LO) ; encoding: [0x0b,0xf8,0x80,0xb8]
0x0b,0xf8,0x80,0xb8

# GFX12: s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_PC_HI) ; encoding: [0x0c,0xf8,0x80,0xb8]
0x0c,0xf8,0x80,0xb8

# GFX12: s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_DATA1) ; encoding: [0x0f,0xf8,0x80,0xb8]
0x0f,0xf8,0x80,0xb8

# GFX12: s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_DATA2) ; encoding: [0x10,0xf8,0x80,0xb8]
0x10,0xf8,0x80,0xb8

# GFX12: s_getreg_b32 s0, hwreg(HW_REG_EXCP_FLAG_PRIV) ; encoding: [0x11,0xf8,0x80,0xb8]
0x11,0xf8,0x80,0xb8

# GFX12: s_getreg_b32 s0, hwreg(HW_REG_EXCP_FLAG_USER) ; encoding: [0x12,0xf8,0x80,0xb8]
0x12,0xf8,0x80,0xb8

# GFX12: s_getreg_b32 s0, hwreg(HW_REG_TRAP_CTRL) ; encoding: [0x13,0xf8,0x80,0xb8]
0x13,0xf8,0x80,0xb8

# GFX12: s_getreg_b32 s0, hwreg(HW_REG_SCRATCH_BASE_LO) ; encoding: [0x14,0xf8,0x80,0xb8]
0x14,0xf8,0x80,0xb8

# GFX12: s_getreg_b32 s0, hwreg(HW_REG_SCRATCH_BASE_HI) ; encoding: [0x15,0xf8,0x80,0xb8]
0x15,0xf8,0x80,0xb8

# GFX12: s_getreg_b32 s0, hwreg(HW_REG_HW_ID1) ; encoding: [0x17,0xf8,0x80,0xb8]
0x17,0xf8,0x80,0xb8

# GFX12: s_getreg_b32 s0, hwreg(HW_REG_HW_ID2) ; encoding: [0x18,0xf8,0x80,0xb8]
0x18,0xf8,0x80,0xb8

# GFX12: s_getreg_b32 s0, hwreg(HW_REG_DVGPR_ALLOC_LO) ; encoding: [0x1f,0xf8,0x80,0xb8]
0x1f,0xf8,0x80,0xb8

# GFX12: s_getreg_b32 s0, hwreg(HW_REG_DVGPR_ALLOC_HI) ; encoding: [0x20,0xf8,0x80,0xb8]
0x20,0xf8,0x80,0xb8

# GFX12: s_getreg_b32 s0, hwreg(HW_REG_SHADER_CYCLES_LO) ; encoding: [0x1d,0xf8,0x80,0xb8]
0x1d,0xf8,0x80,0xb8

# GFX12: s_getreg_b32 s0, hwreg(HW_REG_SHADER_CYCLES_HI) ; encoding: [0x1e,0xf8,0x80,0xb8]
0x1e,0xf8,0x80,0xb8

0 comments on commit 8d4977a

Please sign in to comment.