Skip to content

Commit

Permalink
[AMDGPU][GFX10] Support base+soffset+offset SMEM stores.
Browse files Browse the repository at this point in the history
Also makes another step towards resolving
#38652

Reviewed By: foad, dp

Differential Revision: https://reviews.llvm.org/D125380
  • Loading branch information
kosarev committed May 12, 2022
1 parent 82ea0d8 commit cb67b2c
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 12 deletions.
35 changes: 23 additions & 12 deletions llvm/lib/Target/AMDGPU/SMInstructions.td
Expand Up @@ -110,10 +110,11 @@ class SM_Load_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag>
let has_dlc = 1;
}

class SM_Store_Pseudo <string opName, dag ins, string asmOps, list<dag> pattern = []>
: SM_Pseudo<opName, (outs), ins, asmOps, pattern> {
RegisterClass BaseClass;
RegisterClass SrcClass;
class SM_Store_Pseudo <string opName, RegisterClass baseClass,
RegisterClass srcClass, dag ins, string asmOps>
: SM_Pseudo<opName, (outs), ins, asmOps, []> {
RegisterClass BaseClass = baseClass;
RegisterClass SrcClass = srcClass;
let mayLoad = 0;
let mayStore = 1;
let has_glc = 1;
Expand Down Expand Up @@ -177,23 +178,28 @@ multiclass SM_Pseudo_Loads<string opName,
multiclass SM_Pseudo_Stores<string opName,
RegisterClass baseClass,
RegisterClass srcClass> {
def _IMM : SM_Store_Pseudo <opName,
def _IMM : SM_Store_Pseudo <opName, baseClass, srcClass,
(ins srcClass:$sdata, baseClass:$sbase, i32imm:$offset, CPol:$cpol),
" $sdata, $sbase, $offset$cpol", []> {
" $sdata, $sbase, $offset$cpol"> {
let has_offset = 1;
let BaseClass = baseClass;
let SrcClass = srcClass;
let PseudoInstr = opName # "_IMM";
}

def _SGPR : SM_Store_Pseudo <opName,
def _SGPR : SM_Store_Pseudo <opName, baseClass, srcClass,
(ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soffset, CPol:$cpol),
" $sdata, $sbase, $soffset$cpol", []> {
" $sdata, $sbase, $soffset$cpol"> {
let has_soffset = 1;
let BaseClass = baseClass;
let SrcClass = srcClass;
let PseudoInstr = opName # "_SGPR";
}

def _SGPR_IMM : SM_Store_Pseudo <opName, baseClass, srcClass,
(ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soffset, i32imm:$offset,
CPol:$cpol),
" $sdata, $sbase, $soffset$offset$cpol"> {
let has_offset = 1;
let has_soffset = 1;
let PseudoInstr = opName # "_SGPR_IMM";
}
}

multiclass SM_Pseudo_Discards<string opName> {
Expand Down Expand Up @@ -948,6 +954,11 @@ multiclass SM_Real_Stores_gfx10<bits<8> op, string ps,
def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, sgprPs> {
let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$soffset, CPol:$cpol);
}

def _SGPR_IMM_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Store_Pseudo>(ps#_SGPR_IMM)> {
let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase,
SReg_32:$soffset, smem_offset_mod:$offset, CPol:$cpol);
}
}

defm S_LOAD_DWORD : SM_Real_Loads_gfx10<0x000, "S_LOAD_DWORD">;
Expand Down
39 changes: 39 additions & 0 deletions llvm/test/MC/AMDGPU/gfx10_asm_smem.s
Expand Up @@ -581,6 +581,9 @@ s_store_dword s1, s[4:5], null
s_store_dword s1, s[4:5], 0x0
// GFX10: encoding: [0x42,0x00,0x40,0xf4,0x00,0x00,0x00,0xfa]

s_store_dword s1, s[4:5], s0 offset:0x12345
// GFX10: encoding: [0x42,0x00,0x40,0xf4,0x45,0x23,0x01,0x00]

s_store_dword s1, s[4:5], s0 glc
// GFX10: encoding: [0x42,0x00,0x41,0xf4,0x00,0x00,0x00,0x00]

Expand All @@ -593,6 +596,9 @@ s_store_dword s1, s[4:5], s0 glc dlc
s_store_dword s1, s[4:5], 0x1234 glc dlc
// GFX10: encoding: [0x42,0x40,0x41,0xf4,0x34,0x12,0x00,0xfa]

s_store_dword s1, s[4:5], s0 offset:0x12345 glc dlc
// GFX10: encoding: [0x42,0x40,0x41,0xf4,0x45,0x23,0x01,0x00]

s_store_dwordx2 s[2:3], s[4:5], s0
// GFX10: encoding: [0x82,0x00,0x44,0xf4,0x00,0x00,0x00,0x00]

Expand Down Expand Up @@ -632,6 +638,9 @@ s_store_dwordx2 s[2:3], s[4:5], null
s_store_dwordx2 s[2:3], s[4:5], 0x0
// GFX10: encoding: [0x82,0x00,0x44,0xf4,0x00,0x00,0x00,0xfa]

s_store_dwordx2 s[2:3], s[4:5], s0 offset:0x12345
// GFX10: encoding: [0x82,0x00,0x44,0xf4,0x45,0x23,0x01,0x00]

s_store_dwordx2 s[2:3], s[4:5], s0 glc
// GFX10: encoding: [0x82,0x00,0x45,0xf4,0x00,0x00,0x00,0x00]

Expand All @@ -644,6 +653,9 @@ s_store_dwordx2 s[2:3], s[4:5], s0 glc dlc
s_store_dwordx2 s[2:3], s[4:5], 0x1234 glc dlc
// GFX10: encoding: [0x82,0x40,0x45,0xf4,0x34,0x12,0x00,0xfa]

s_store_dwordx2 s[2:3], s[4:5], s0 offset:0x12345 glc dlc
// GFX10: encoding: [0x82,0x40,0x45,0xf4,0x45,0x23,0x01,0x00]

s_store_dwordx4 s[4:7], s[4:5], s0
// GFX10: encoding: [0x02,0x01,0x48,0xf4,0x00,0x00,0x00,0x00]

Expand Down Expand Up @@ -680,6 +692,9 @@ s_store_dwordx4 s[4:7], s[4:5], null
s_store_dwordx4 s[4:7], s[4:5], 0x0
// GFX10: encoding: [0x02,0x01,0x48,0xf4,0x00,0x00,0x00,0xfa]

s_store_dwordx4 s[4:7], s[4:5], s0 offset:0x12345
// GFX10: encoding: [0x02,0x01,0x48,0xf4,0x45,0x23,0x01,0x00]

s_store_dwordx4 s[4:7], s[4:5], s0 glc
// GFX10: encoding: [0x02,0x01,0x49,0xf4,0x00,0x00,0x00,0x00]

Expand All @@ -692,6 +707,9 @@ s_store_dwordx4 s[4:7], s[4:5], s0 glc dlc
s_store_dwordx4 s[4:7], s[4:5], 0x1234 glc dlc
// GFX10: encoding: [0x02,0x41,0x49,0xf4,0x34,0x12,0x00,0xfa]

s_store_dwordx4 s[4:7], s[4:5], s0 offset:0x12345 glc dlc
// GFX10: encoding: [0x02,0x41,0x49,0xf4,0x45,0x23,0x01,0x00]

s_buffer_store_dword s1, s[8:11], s0
// GFX10: encoding: [0x44,0x00,0x60,0xf4,0x00,0x00,0x00,0x00]

Expand Down Expand Up @@ -728,6 +746,9 @@ s_buffer_store_dword s1, s[8:11], null
s_buffer_store_dword s1, s[8:11], 0x0
// GFX10: encoding: [0x44,0x00,0x60,0xf4,0x00,0x00,0x00,0xfa]

s_buffer_store_dword s1, s[8:11], s0 offset:0x12345
// GFX10: encoding: [0x44,0x00,0x60,0xf4,0x45,0x23,0x01,0x00]

s_buffer_store_dword s1, s[8:11], s0 glc
// GFX10: encoding: [0x44,0x00,0x61,0xf4,0x00,0x00,0x00,0x00]

Expand All @@ -740,6 +761,9 @@ s_buffer_store_dword s1, s[8:11], s0 glc dlc
s_buffer_store_dword s1, s[8:11], 0x1234 glc dlc
// GFX10: encoding: [0x44,0x40,0x61,0xf4,0x34,0x12,0x00,0xfa]

s_buffer_store_dword s1, s[8:11], s0 offset:0x12345 glc dlc
// GFX10: encoding: [0x44,0x40,0x61,0xf4,0x45,0x23,0x01,0x00]

s_buffer_store_dwordx2 s[2:3], s[8:11], s0
// GFX10: encoding: [0x84,0x00,0x64,0xf4,0x00,0x00,0x00,0x00]

Expand Down Expand Up @@ -776,6 +800,9 @@ s_buffer_store_dwordx2 s[2:3], s[8:11], null
s_buffer_store_dwordx2 s[2:3], s[8:11], 0x0
// GFX10: encoding: [0x84,0x00,0x64,0xf4,0x00,0x00,0x00,0xfa]

s_buffer_store_dwordx2 s[2:3], s[8:11], s0 offset:0x12345
// GFX10: encoding: [0x84,0x00,0x64,0xf4,0x45,0x23,0x01,0x00]

s_buffer_store_dwordx2 s[2:3], s[8:11], s0 glc
// GFX10: encoding: [0x84,0x00,0x65,0xf4,0x00,0x00,0x00,0x00]

Expand All @@ -788,6 +815,9 @@ s_buffer_store_dwordx2 s[2:3], s[8:11], s0 glc dlc
s_buffer_store_dwordx2 s[2:3], s[8:11], 0x1234 glc dlc
// GFX10: encoding: [0x84,0x40,0x65,0xf4,0x34,0x12,0x00,0xfa]

s_buffer_store_dwordx2 s[2:3], s[8:11], s0 offset:0x12345 glc dlc
// GFX10: encoding: [0x84,0x40,0x65,0xf4,0x45,0x23,0x01,0x00]

s_buffer_store_dwordx4 s[4:7], s[8:11], s0
// GFX10: encoding: [0x04,0x01,0x68,0xf4,0x00,0x00,0x00,0x00]

Expand Down Expand Up @@ -821,6 +851,9 @@ s_buffer_store_dwordx4 s[4:7], s[8:11], null
s_buffer_store_dwordx4 s[4:7], s[8:11], 0x0
// GFX10: encoding: [0x04,0x01,0x68,0xf4,0x00,0x00,0x00,0xfa]

s_buffer_store_dwordx4 s[4:7], s[8:11], s0 offset:0x12345
// GFX10: encoding: [0x04,0x01,0x68,0xf4,0x45,0x23,0x01,0x00]

s_buffer_store_dwordx4 s[4:7], s[8:11], s0 glc
// GFX10: encoding: [0x04,0x01,0x69,0xf4,0x00,0x00,0x00,0x00]

Expand All @@ -833,6 +866,9 @@ s_buffer_store_dwordx4 s[4:7], s[8:11], s0 glc dlc
s_buffer_store_dwordx4 s[4:7], s[8:11], 0x1234 glc dlc
// GFX10: encoding: [0x04,0x41,0x69,0xf4,0x34,0x12,0x00,0xfa]

s_buffer_store_dwordx4 s[4:7], s[8:11], s0 offset:0x12345 glc dlc
// GFX10: encoding: [0x04,0x41,0x69,0xf4,0x45,0x23,0x01,0x00]

s_memrealtime s[10:11]
// GFX10: encoding: [0x80,0x02,0x94,0xf4,0x00,0x00,0x00,0x00]

Expand Down Expand Up @@ -893,6 +929,9 @@ s_scratch_store_dword s101, s[4:5], s0
s_scratch_store_dword s1, s[4:5], 0x123 glc
// GFX10: encoding: [0x42,0x00,0x55,0xf4,0x23,0x01,0x00,0xfa]

s_scratch_store_dword s1, s[4:5], s0 offset:0x12345 glc
// GFX10: encoding: [0x42,0x00,0x55,0xf4,0x45,0x23,0x01,0x00]

s_scratch_store_dwordx2 s[2:3], s[4:5], s101 glc
// GFX10: encoding: [0x82,0x00,0x59,0xf4,0x00,0x00,0x00,0xca]

Expand Down
21 changes: 21 additions & 0 deletions llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
Expand Up @@ -11591,6 +11591,9 @@
# GFX10: s_buffer_store_dword s1, s[8:11], 0x1234 glc dlc ; encoding: [0x44,0x40,0x61,0xf4,0x34,0x12,0x00,0xfa]
0x44,0x40,0x61,0xf4,0x34,0x12,0x00,0xfa

# GFX10: s_buffer_store_dword s1, s[8:11], s0 offset:0x12345 glc dlc ; encoding: [0x44,0x40,0x61,0xf4,0x45,0x23,0x01,0x00]
0x44,0x40,0x61,0xf4,0x45,0x23,0x01,0x00

# GFX10: s_buffer_store_dword s1, s[8:11], m0 ; encoding: [0x44,0x00,0x60,0xf4,0x00,0x00,0x00,0xf8]
0x44,0x00,0x60,0xf4,0x00,0x00,0x00,0xf8

Expand Down Expand Up @@ -11639,6 +11642,9 @@
# GFX10: s_buffer_store_dwordx2 s[2:3], s[8:11], 0x1234 glc dlc ; encoding: [0x84,0x40,0x65,0xf4,0x34,0x12,0x00,0xfa]
0x84,0x40,0x65,0xf4,0x34,0x12,0x00,0xfa

# GFX10: s_buffer_store_dwordx2 s[2:3], s[8:11], s0 offset:0x12345 glc dlc ; encoding: [0x84,0x40,0x65,0xf4,0x45,0x23,0x01,0x00]
0x84,0x40,0x65,0xf4,0x45,0x23,0x01,0x00

# GFX10: s_buffer_store_dwordx2 s[2:3], s[8:11], m0 ; encoding: [0x84,0x00,0x64,0xf4,0x00,0x00,0x00,0xf8]
0x84,0x00,0x64,0xf4,0x00,0x00,0x00,0xf8

Expand Down Expand Up @@ -11681,6 +11687,9 @@
# GFX10: s_buffer_store_dwordx4 s[4:7], s[8:11], 0x1234 glc dlc ; encoding: [0x04,0x41,0x69,0xf4,0x34,0x12,0x00,0xfa]
0x04,0x41,0x69,0xf4,0x34,0x12,0x00,0xfa

# GFX10: s_buffer_store_dwordx4 s[4:7], s[8:11], s0 offset:0x12345 glc dlc ; encoding: [0x04,0x41,0x69,0xf4,0x45,0x23,0x01,0x00]
0x04,0x41,0x69,0xf4,0x45,0x23,0x01,0x00

# GFX10: s_buffer_store_dwordx4 s[4:7], s[8:11], m0 ; encoding: [0x04,0x01,0x68,0xf4,0x00,0x00,0x00,0xf8]
0x04,0x01,0x68,0xf4,0x00,0x00,0x00,0xf8

Expand Down Expand Up @@ -18086,6 +18095,9 @@
# GFX10: s_scratch_store_dword s1, s[4:5], 0x123 glc ; encoding: [0x42,0x00,0x55,0xf4,0x23,0x01,0x00,0xfa]
0x42,0x00,0x55,0xf4,0x23,0x01,0x00,0xfa

# GFX10: s_scratch_store_dword s1, s[4:5], s0 offset:0x12345 glc ; encoding: [0x42,0x00,0x55,0xf4,0x45,0x23,0x01,0x00]
0x42,0x00,0x55,0xf4,0x45,0x23,0x01,0x00

# GFX10: s_scratch_store_dword s101, s[4:5], s0 ; encoding: [0x42,0x19,0x54,0xf4,0x00,0x00,0x00,0x00]
0x42,0x19,0x54,0xf4,0x00,0x00,0x00,0x00

Expand Down Expand Up @@ -18323,6 +18335,9 @@
# GFX10: s_store_dword s1, s[4:5], 0x1234 glc dlc ; encoding: [0x42,0x40,0x41,0xf4,0x34,0x12,0x00,0xfa]
0x42,0x40,0x41,0xf4,0x34,0x12,0x00,0xfa

# GFX10: s_store_dword s1, s[4:5], s0 offset:0x12345 glc dlc ; encoding: [0x42,0x40,0x41,0xf4,0x45,0x23,0x01,0x00]
0x42,0x40,0x41,0xf4,0x45,0x23,0x01,0x00

# GFX10: s_store_dword s1, s[4:5], m0 ; encoding: [0x42,0x00,0x40,0xf4,0x00,0x00,0x00,0xf8]
0x42,0x00,0x40,0xf4,0x00,0x00,0x00,0xf8

Expand Down Expand Up @@ -18374,6 +18389,9 @@
# GFX10: s_store_dwordx2 s[2:3], s[4:5], 0x1234 glc dlc ; encoding: [0x82,0x40,0x45,0xf4,0x34,0x12,0x00,0xfa]
0x82,0x40,0x45,0xf4,0x34,0x12,0x00,0xfa

# GFX10: s_store_dwordx2 s[2:3], s[4:5], s0 offset:0x12345 glc dlc ; encoding: [0x82,0x40,0x45,0xf4,0x45,0x23,0x01,0x00]
0x82,0x40,0x45,0xf4,0x45,0x23,0x01,0x00

# GFX10: s_store_dwordx2 s[2:3], s[4:5], m0 ; encoding: [0x82,0x00,0x44,0xf4,0x00,0x00,0x00,0xf8]
0x82,0x00,0x44,0xf4,0x00,0x00,0x00,0xf8

Expand Down Expand Up @@ -18419,6 +18437,9 @@
# GFX10: s_store_dwordx4 s[4:7], s[4:5], 0x1234 glc dlc ; encoding: [0x02,0x41,0x49,0xf4,0x34,0x12,0x00,0xfa]
0x02,0x41,0x49,0xf4,0x34,0x12,0x00,0xfa

# GFX10: s_store_dwordx4 s[4:7], s[4:5], s0 offset:0x12345 glc dlc ; encoding: [0x02,0x41,0x49,0xf4,0x45,0x23,0x01,0x00]
0x02,0x41,0x49,0xf4,0x45,0x23,0x01,0x00

# GFX10: s_store_dwordx4 s[4:7], s[4:5], m0 ; encoding: [0x02,0x01,0x48,0xf4,0x00,0x00,0x00,0xf8]
0x02,0x01,0x48,0xf4,0x00,0x00,0x00,0xf8

Expand Down

0 comments on commit cb67b2c

Please sign in to comment.