Skip to content

Commit

Permalink
[AMDGPU] gfx11 FLAT Instructions
Browse files Browse the repository at this point in the history
MachineCode Support for FLAT type instructions

Contributors:
Sebastian Neubauer <sebastian.neubauer@amd.com>

Patch 12/N for upstreaming of AMDGPU gfx11 architecture.

Depends on D125989

Reviewed By: rampitec, #amdgpu

Differential Revision: https://reviews.llvm.org/D125992
  • Loading branch information
Sisyph committed May 25, 2022
1 parent 8e757c6 commit 835e09c
Show file tree
Hide file tree
Showing 9 changed files with 3,372 additions and 16 deletions.
245 changes: 232 additions & 13 deletions llvm/lib/Target/AMDGPU/FLATInstructions.td

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
Expand Up @@ -144,7 +144,7 @@ void AMDGPUInstPrinter::printFlatOffset(const MCInst *MI, unsigned OpNo,
if (IsFlatSeg) { // Unsigned offset
printU16ImmDecOperand(MI, OpNo, O);
} else { // Signed offset
if (AMDGPU::isGFX10Plus(STI)) {
if (AMDGPU::isGFX10(STI)) {
O << formatDec(SignExtend32<12>(MI->getOperand(OpNo).getImm()));
} else {
O << formatDec(SignExtend32<13>(MI->getOperand(OpNo).getImm()));
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Expand Up @@ -2198,7 +2198,7 @@ Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
}

unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed) {
// Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
// Address offset is 12-bit signed for GFX10, 13-bit for GFX9 and GFX11+.
if (AMDGPU::isGFX10(ST))
return Signed ? 12 : 11;

Expand Down
196 changes: 196 additions & 0 deletions llvm/test/MC/AMDGPU/flat-gfx11-mnemonic.s
@@ -0,0 +1,196 @@
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck --check-prefix=GFX11 %s

// FLAT

flat_load_ubyte v1, v[4:5]
// GFX11: flat_load_u8 v1, v[4:5] ; encoding: [0x00,0x00,0x40,0xdc,0x04,0x00,0x7c,0x01]

flat_load_sbyte v1, v[3:4]
// GFX11: encoding: [0x00,0x00,0x44,0xdc,0x03,0x00,0x7c,0x01]

flat_load_ushort v1, v[3:4]
// GFX11: encoding: [0x00,0x00,0x48,0xdc,0x03,0x00,0x7c,0x01]

flat_load_sshort v1, v[3:4]
// GFX11: encoding: [0x00,0x00,0x4c,0xdc,0x03,0x00,0x7c,0x01]

flat_load_dword v1, v[3:4]
// GFX11: encoding: [0x00,0x00,0x50,0xdc,0x03,0x00,0x7c,0x01]

flat_load_dwordx2 v[1:2], v[3:4]
// GFX11: encoding: [0x00,0x00,0x54,0xdc,0x03,0x00,0x7c,0x01]

flat_load_dwordx3 v[1:3], v[5:6]
// GFX11: encoding: [0x00,0x00,0x58,0xdc,0x05,0x00,0x7c,0x01]

flat_load_dwordx4 v[1:4], v[5:6]
// GFX11: encoding: [0x00,0x00,0x5c,0xdc,0x05,0x00,0x7c,0x01]

flat_store_byte v[3:4], v1
// GFX11: encoding: [0x00,0x00,0x60,0xdc,0x03,0x01,0x7c,0x00]

flat_store_short v[3:4], v1
// GFX11: encoding: [0x00,0x00,0x64,0xdc,0x03,0x01,0x7c,0x00]

flat_store_dword v[3:4], v1 offset:16
// GFX11: encoding: [0x10,0x00,0x68,0xdc,0x03,0x01,0x7c,0x00]

flat_store_dwordx2 v[1:2], v[3:4]
// GFX11: encoding: [0x00,0x00,0x6c,0xdc,0x01,0x03,0x7c,0x00]

flat_store_dwordx3 v[1:2], v[3:5]
// GFX11: encoding: [0x00,0x00,0x70,0xdc,0x01,0x03,0x7c,0x00]

flat_store_dwordx4 v[1:2], v[3:6]
// GFX11: flat_store_b128 v[1:2], v[3:6] ; encoding: [0x00,0x00,0x74,0xdc,0x01,0x03,0x7c,0x00]

flat_atomic_swap v0, v[1:2], v3 offset:2047 glc
// GFX11: encoding: [0xff,0x47,0xcc,0xdc,0x01,0x03,0x7c,0x00]

flat_atomic_swap_x2 v[1:2], v[3:4], v[5:6] offset:2047 glc
// GFX11: flat_atomic_swap_b64 v[1:2], v[3:4], v[5:6] offset:2047 glc ; encoding: [0xff,0x47,0x04,0xdd,0x03,0x05,0x7c,0x01]

flat_atomic_add v[3:4], v5 slc
// GFX11: encoding: [0x00,0x80,0xd4,0xdc,0x03,0x05,0x7c,0x00]

flat_atomic_cmpswap v0, v[1:2], v[3:4] offset:2047 glc
// GFX11: encoding: [0xff,0x47,0xd0,0xdc,0x01,0x03,0x7c,0x00]

flat_atomic_cmpswap_x2 v[1:2], v[3:4], v[5:8] offset:2047 glc
// GFX11: encoding: [0xff,0x47,0x08,0xdd,0x03,0x05,0x7c,0x01]

// GLOBAL No saddr

global_load_ubyte v1, v[3:4], off
// GFX11: global_load_u8 v1, v[3:4], off ; encoding: [0x00,0x00,0x42,0xdc,0x03,0x00,0x7c,0x01]

global_load_sbyte v1, v[3:4], off
// GFX11: encoding: [0x00,0x00,0x46,0xdc,0x03,0x00,0x7c,0x01]

global_load_ushort v1, v[3:4], off
// GFX11: encoding: [0x00,0x00,0x4a,0xdc,0x03,0x00,0x7c,0x01]

global_load_sshort v1, v[3:4], off
// GFX11: encoding: [0x00,0x00,0x4e,0xdc,0x03,0x00,0x7c,0x01]

global_load_dword v1, v[3:4], off
// GFX11: encoding: [0x00,0x00,0x52,0xdc,0x03,0x00,0x7c,0x01]

global_load_dwordx2 v[1:2], v[3:4], off
// GFX11: encoding: [0x00,0x00,0x56,0xdc,0x03,0x00,0x7c,0x01]

global_load_dwordx3 v[1:3], v[5:6], off
// GFX11: encoding: [0x00,0x00,0x5a,0xdc,0x05,0x00,0x7c,0x01]

global_load_dwordx4 v[1:4], v[5:6], off
// GFX11: encoding: [0x00,0x00,0x5e,0xdc,0x05,0x00,0x7c,0x01]

global_store_byte v[3:4], v1, off
// GFX11: encoding: [0x00,0x00,0x62,0xdc,0x03,0x01,0x7c,0x00]

global_store_short v[3:4], v1, off
// GFX11: encoding: [0x00,0x00,0x66,0xdc,0x03,0x01,0x7c,0x00]

global_store_dword v[3:4], v1, off offset:16
// GFX11: encoding: [0x10,0x00,0x6a,0xdc,0x03,0x01,0x7c,0x00]

global_store_dwordx2 v[1:2], v[3:4], off
// GFX11: encoding: [0x00,0x00,0x6e,0xdc,0x01,0x03,0x7c,0x00]

global_store_dwordx3 v[1:2], v[3:5], off
// GFX11: encoding: [0x00,0x00,0x72,0xdc,0x01,0x03,0x7c,0x00]

global_store_dwordx4 v[1:2], v[3:6], off
// GFX11: encoding: [0x00,0x00,0x76,0xdc,0x01,0x03,0x7c,0x00]

global_atomic_swap v0, v[1:2], v3, off offset:2047 glc
// GFX11: encoding: [0xff,0x47,0xce,0xdc,0x01,0x03,0x7c,0x00]

global_atomic_swap_x2 v[1:2], v[3:4], v[5:6], off offset:2047 glc
// GFX11: encoding: [0xff,0x47,0x06,0xdd,0x03,0x05,0x7c,0x01]

// SCRATCH

scratch_load_ubyte v1, v2, s1
// GFX11: encoding: [0x00,0x00,0x41,0xdc,0x02,0x00,0x81,0x01]

scratch_load_sbyte v1, v2, s1
// GFX11: encoding: [0x00,0x00,0x45,0xdc,0x02,0x00,0x81,0x01]

scratch_load_ushort v1, v2, s1
// GFX11: encoding: [0x00,0x00,0x49,0xdc,0x02,0x00,0x81,0x01]

scratch_load_sshort v1, v2, s1
// GFX11: encoding: [0x00,0x00,0x4d,0xdc,0x02,0x00,0x81,0x01]

scratch_load_dword v1, v2, s1
// GFX11: encoding: [0x00,0x00,0x51,0xdc,0x02,0x00,0x81,0x01]

scratch_load_dwordx2 v[1:2], v2, s1
// GFX11: encoding: [0x00,0x00,0x55,0xdc,0x02,0x00,0x81,0x01]

scratch_load_dwordx3 v[1:3], v2, s1
// GFX11: encoding: [0x00,0x00,0x59,0xdc,0x02,0x00,0x81,0x01]

scratch_load_dwordx4 v[1:4], v2, s1
// GFX11: encoding: [0x00,0x00,0x5d,0xdc,0x02,0x00,0x81,0x01]

scratch_store_byte v1, v2, s3
// GFX11: encoding: [0x00,0x00,0x61,0xdc,0x01,0x02,0x83,0x00]

scratch_store_short v1, v2, s3
// GFX11: encoding: [0x00,0x00,0x65,0xdc,0x01,0x02,0x83,0x00]

scratch_store_dword v1, v2, s3
// GFX11: encoding: [0x00,0x00,0x69,0xdc,0x01,0x02,0x83,0x00]

scratch_store_dwordx2 v1, v[2:3], s3
// GFX11: encoding: [0x00,0x00,0x6d,0xdc,0x01,0x02,0x83,0x00]

scratch_store_dwordx3 v1, v[2:4], s3
// GFX11: encoding: [0x00,0x00,0x71,0xdc,0x01,0x02,0x83,0x00]

scratch_store_dwordx4 v1, v[2:5], s3
// GFX11: encoding: [0x00,0x00,0x75,0xdc,0x01,0x02,0x83,0x00]

scratch_load_dword v1, v2, s1 offset:2047
// GFX11: encoding: [0xff,0x07,0x51,0xdc,0x02,0x00,0x81,0x01]

scratch_load_dword v1, v2, off offset:2047
// GFX11: encoding: [0xff,0x07,0x51,0xdc,0x02,0x00,0xfc,0x01]

scratch_load_dword v1, off, s1 offset:2047
// GFX11: encoding: [0xff,0x07,0x51,0xdc,0x00,0x00,0x01,0x01]

scratch_load_dword v1, off, off offset:2047
// GFX11: encoding: [0xff,0x07,0x51,0xdc,0x00,0x00,0x7c,0x01]

scratch_load_dword v1, off, off
// GFX11: encoding: [0x00,0x00,0x51,0xdc,0x00,0x00,0x7c,0x01]

scratch_store_dword v1, v2, s3 offset:2047
// GFX11: encoding: [0xff,0x07,0x69,0xdc,0x01,0x02,0x83,0x00]

scratch_store_dword v1, v2, off offset:2047
// GFX11: encoding: [0xff,0x07,0x69,0xdc,0x01,0x02,0xfc,0x00]

scratch_store_dword off, v2, s3 offset:2047
// GFX11: encoding: [0xff,0x07,0x69,0xdc,0x00,0x02,0x03,0x00]

scratch_store_dword off, v2, off offset:2047
// GFX11: encoding: [0xff,0x07,0x69,0xdc,0x00,0x02,0x7c,0x00]

scratch_load_dword v1, v2, s1 offset:4095
// GFX11: encoding: [0xff,0x0f,0x51,0xdc,0x02,0x00,0x81,0x01]

scratch_load_dword v1, v2, s1 offset:-4096
// GFX11: encoding: [0x00,0x10,0x51,0xdc,0x02,0x00,0x81,0x01]

scratch_store_dword v1, v2, s1 offset:4095
// GFX11: encoding: [0xff,0x0f,0x69,0xdc,0x01,0x02,0x81,0x00]

scratch_store_dword v1, v2, s1 offset:-4096
// GFX11: encoding: [0x00,0x10,0x69,0xdc,0x01,0x02,0x81,0x00]

scratch_store_dword off, v2, off
// GFX11: encoding: [0x00,0x00,0x69,0xdc,0x00,0x02,0x7c,0x00]

0 comments on commit 835e09c

Please sign in to comment.