Skip to content

Commit

Permalink
[AMDGPU][GFX9][GFX10] Support base+soffset+offset SMEM atomics.
Browse files Browse the repository at this point in the history
Resolves a part of
#38652

Reviewed By: dp

Differential Revision: https://reviews.llvm.org/D127314
  • Loading branch information
kosarev committed Jun 10, 2022
1 parent 8daaea2 commit 60d6fbb
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 15 deletions.
59 changes: 44 additions & 15 deletions llvm/lib/Target/AMDGPU/SMInstructions.td
Expand Up @@ -87,6 +87,21 @@ class SM_Real <SM_Pseudo ps, string opName = ps.Mnemonic>
bits<5> cpol;
}

class OffsetMode<bit hasOffset, bit hasSOffset, string variant,
dag ins, string asm> {
bit HasOffset = hasOffset;
bit HasSOffset = hasSOffset;
string Variant = variant;
dag Ins = ins;
string Asm = asm;
}

def IMM_Offset : OffsetMode<1, 0, "_IMM", (ins smem_offset:$offset), "$offset">;
def SGPR_Offset : OffsetMode<0, 1, "_SGPR", (ins SReg_32:$soffset), "$soffset">;
def SGPR_IMM_Offset : OffsetMode<1, 1, "_SGPR_IMM",
(ins SReg_32:$soffset, smem_offset_mod:$offset),
"$soffset$offset">;

class SM_Probe_Pseudo <string opName, string variant, RegisterClass baseClass,
dag offsets, string asmOffsets,
bit hasOffset, bit hasSOffset>
Expand Down Expand Up @@ -282,23 +297,21 @@ class SM_Atomic_Pseudo <string opName,
class SM_Pseudo_Atomic<string opName,
RegisterClass baseClass,
RegisterClass dataClass,
bit isImm,
OffsetMode offsets,
bit isRet,
string opNameWithSuffix = opName # !if(isImm,
!if(isRet, "_IMM_RTN", "_IMM"),
!if(isRet, "_SGPR_RTN", "_SGPR")),
string opNameWithSuffix =
opName # offsets.Variant # !if(isRet, "_RTN", ""),
Operand CPolTy = !if(isRet, CPol_GLC1, CPol)> :
SM_Atomic_Pseudo<opName,
!if(isRet, (outs dataClass:$sdst), (outs)),
!if(isImm,
(ins dataClass:$sdata, baseClass:$sbase, smem_offset:$offset, CPolTy:$cpol),
(ins dataClass:$sdata, baseClass:$sbase, SReg_32:$soffset, CPolTy:$cpol)),
!if(isRet, " $sdst", " $sdata") # ", $sbase, " #
!if(isImm, "$offset", "$soffset") # "$cpol",
!con((ins dataClass:$sdata, baseClass:$sbase), offsets.Ins,
(ins CPolTy:$cpol)),
!if(isRet, " $sdst", " $sdata") #
", $sbase, " # offsets.Asm # "$cpol",
isRet>,
AtomicNoRet <opNameWithSuffix, isRet> {
let has_offset = isImm;
let has_soffset = !not(isImm);
let has_offset = offsets.HasOffset;
let has_soffset = offsets.HasSOffset;
let PseudoInstr = opNameWithSuffix;

let Constraints = !if(isRet, "$sdst = $sdata", "");
Expand All @@ -308,10 +321,12 @@ class SM_Pseudo_Atomic<string opName,
multiclass SM_Pseudo_Atomics<string opName,
RegisterClass baseClass,
RegisterClass dataClass> {
def _IMM : SM_Pseudo_Atomic <opName, baseClass, dataClass, 1, 0>;
def _SGPR : SM_Pseudo_Atomic <opName, baseClass, dataClass, 0, 0>;
def _IMM_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, 1, 1>;
def _SGPR_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, 0, 1>;
def _IMM : SM_Pseudo_Atomic <opName, baseClass, dataClass, IMM_Offset, 0>;
def _SGPR : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_Offset, 0>;
def _SGPR_IMM : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_IMM_Offset, 0>;
def _IMM_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, IMM_Offset, 1>;
def _SGPR_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_Offset, 1>;
def _SGPR_IMM_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_IMM_Offset, 1>;
}

//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -705,8 +720,20 @@ class SMEM_Atomic_Real_vi <bits<8> op, SM_Atomic_Pseudo ps>
multiclass SM_Real_Atomics_vi<bits<8> op, string ps> {
def _IMM_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>;
def _SGPR_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>;
def _SGPR_alt_gfx9
: SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>,
SMEM_Real_SGPR_alt_gfx9;
let IsGFX9SpecificEncoding = true in
def _SGPR_IMM_gfx9
: SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM)>;
def _IMM_RTN_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_IMM_RTN)>;
def _SGPR_RTN_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>;
def _SGPR_RTN_alt_gfx9
: SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>,
SMEM_Real_SGPR_alt_gfx9;
let IsGFX9SpecificEncoding = true in
def _SGPR_IMM_RTN_gfx9
: SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM_RTN)>;
}

defm S_BUFFER_ATOMIC_SWAP : SM_Real_Atomics_vi <0x40, "S_BUFFER_ATOMIC_SWAP">;
Expand Down Expand Up @@ -1103,8 +1130,10 @@ class SMEM_Atomic_Real_gfx10 <bits<8> op, SM_Atomic_Pseudo ps>
multiclass SM_Real_Atomics_gfx10<bits<8> op, string ps> {
def _IMM_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>;
def _SGPR_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>;
def _SGPR_IMM_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM)>;
def _IMM_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM_RTN)>;
def _SGPR_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>;
def _SGPR_IMM_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM_RTN)>;
}

let SubtargetPredicate = HasScalarAtomics in {
Expand Down
6 changes: 6 additions & 0 deletions llvm/test/MC/AMDGPU/gfx10_asm_smem.s
Expand Up @@ -983,6 +983,9 @@ s_atomic_add s5, s[2:3], s101
s_atomic_add s5, s[2:3], 0x64
// GFX10: encoding: [0x41,0x01,0x08,0xf6,0x64,0x00,0x00,0xfa]

s_atomic_add s5, s[2:3], s7 offset:0x64
// GFX10: encoding: [0x41,0x01,0x08,0xf6,0x64,0x00,0x00,0x0e]

s_atomic_add_x2 s[10:11], s[2:3], s101
// GFX10: encoding: [0x81,0x02,0x88,0xf6,0x00,0x00,0x00,0xca]

Expand Down Expand Up @@ -1109,6 +1112,9 @@ s_atomic_add s5, s[2:3], s101 glc
s_atomic_add s5, s[2:3], 0x64 glc
// GFX10: encoding: [0x41,0x01,0x09,0xf6,0x64,0x00,0x00,0xfa]

s_atomic_add s5, s[2:3], s7 offset:0x64 glc
// GFX10: encoding: [0x41,0x01,0x09,0xf6,0x64,0x00,0x00,0x0e]

s_atomic_add_x2 s[10:11], s[2:3], s101 glc
// GFX10: encoding: [0x81,0x02,0x89,0xf6,0x00,0x00,0x00,0xca]

Expand Down
6 changes: 6 additions & 0 deletions llvm/test/MC/AMDGPU/gfx9_asm_smem.s
Expand Up @@ -3123,9 +3123,15 @@ s_atomic_add s5, s[2:3], m0
s_atomic_add s5, s[2:3], 0x0
// CHECK: [0x41,0x01,0x0a,0xc2,0x00,0x00,0x00,0x00]

s_atomic_add s5, s[2:3], s7 offset:0x12345
// CHECK: [0x41,0x41,0x0a,0xc2,0x45,0x23,0x01,0x0e]

s_atomic_add s5, s[2:3], s0 glc
// CHECK: [0x41,0x01,0x09,0xc2,0x00,0x00,0x00,0x00]

s_atomic_add s5, s[2:3], s7 offset:0x12345 glc
// CHECK: [0x41,0x41,0x0b,0xc2,0x45,0x23,0x01,0x0e]

s_atomic_sub s5, s[2:3], s0
// CHECK: [0x41,0x01,0x0c,0xc2,0x00,0x00,0x00,0x00]

Expand Down
6 changes: 6 additions & 0 deletions llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
Expand Up @@ -13253,12 +13253,18 @@
# GFX10: s_atomic_add s5, s[2:3], 0x64 ; encoding: [0x41,0x01,0x08,0xf6,0x64,0x00,0x00,0xfa]
0x41,0x01,0x08,0xf6,0x64,0x00,0x00,0xfa

# GFX10: s_atomic_add s5, s[2:3], s7 offset:0x64 ; encoding: [0x41,0x01,0x08,0xf6,0x64,0x00,0x00,0x0e]
0x41,0x01,0x08,0xf6,0x64,0x00,0x00,0x0e

# GFX10: s_atomic_add s5, s[2:3], 0x64 dlc ; encoding: [0x41,0x41,0x08,0xf6,0x64,0x00,0x00,0xfa]
0x41,0x41,0x08,0xf6,0x64,0x00,0x00,0xfa

# GFX10: s_atomic_add s5, s[2:3], 0x64 glc ; encoding: [0x41,0x01,0x09,0xf6,0x64,0x00,0x00,0xfa]
0x41,0x01,0x09,0xf6,0x64,0x00,0x00,0xfa

# GFX10: s_atomic_add s5, s[2:3], s7 offset:0x64 glc ; encoding: [0x41,0x01,0x09,0xf6,0x64,0x00,0x00,0x0e]
0x41,0x01,0x09,0xf6,0x64,0x00,0x00,0x0e

# GFX10: s_atomic_add s5, s[2:3], s101 ; encoding: [0x41,0x01,0x08,0xf6,0x00,0x00,0x00,0xca]
0x41,0x01,0x08,0xf6,0x00,0x00,0x00,0xca

Expand Down
18 changes: 18 additions & 0 deletions llvm/test/MC/Disassembler/AMDGPU/smem_gfx9.txt
Expand Up @@ -66,9 +66,27 @@
# GFX9: s_atomic_add s5, s[2:3], s101 ; encoding: [0x41,0x01,0x08,0xc2,0x65,0x00,0x00,0x00]
0x41,0x01,0x08,0xc2,0x65,0x00,0x00,0x00

# The SGRP variants can alternatively be encoded with imm=0, soffset_en=1
# and the offset register encoded in the soffset field with the offset
# field being disregarded.
# GFX9: s_atomic_add s5, s[2:3], s101 ; encoding: [0x41,0x41,0x08,0xc2,0x00,0x00,0x00,0xca]
0x41,0x41,0x08,0xc2,0x2e,0x00,0x00,0xca

# The SGRP variants can alternatively be encoded with imm=0, soffset_en=1
# and the offset register encoded in the soffset field with the offset
# field being disregarded.
# GFX9: s_atomic_add s5, s[2:3], s101 glc ; encoding: [0x41,0x41,0x09,0xc2,0x00,0x00,0x00,0xca]
0x41,0x41,0x09,0xc2,0x2e,0x00,0x00,0xca

# GFX9: s_atomic_add_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x88,0xc2,0x65,0x00,0x00,0x00]
0x81,0x02,0x88,0xc2,0x65,0x00,0x00,0x00

# GFX9: s_atomic_add s5, s[2:3], s7 offset:0x12345 ; encoding: [0x41,0x41,0x0a,0xc2,0x45,0x23,0x01,0x0e]
0x41,0x41,0x0a,0xc2,0x45,0x23,0x01,0x0e

# GFX9: s_atomic_add s5, s[2:3], s7 offset:0x12345 glc ; encoding: [0x41,0x41,0x0b,0xc2,0x45,0x23,0x01,0x0e]
0x41,0x41,0x0b,0xc2,0x45,0x23,0x01,0x0e

# GFX9: s_atomic_and s101, s[2:3], s0 ; encoding: [0x41,0x19,0x20,0xc2,0x00,0x00,0x00,0x00]
0x41,0x19,0x20,0xc2,0x00,0x00,0x00,0x00

Expand Down

0 comments on commit 60d6fbb

Please sign in to comment.