Skip to content

Commit

Permalink
[AMDGPU] Allow no saddr for global addtid insts
Browse files Browse the repository at this point in the history
I think the global_load/store_dword_addtid instructions support
switching off the scalar address.
Add assembler and disassembler support for this.

Differential Revision: https://reviews.llvm.org/D93288
  • Loading branch information
Flakebi committed Dec 16, 2020
1 parent 741978d commit 409a2f0
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 14 deletions.
46 changes: 32 additions & 14 deletions llvm/lib/Target/AMDGPU/FLATInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -192,24 +192,34 @@ multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit Ha
}

class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass,
bit HasTiedOutput = 0, bit HasSignedOffset = 0> : FLAT_Pseudo<
bit HasTiedOutput = 0, bit HasSignedOffset = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
opName,
(outs regClass:$vdst),
!con((ins SReg_64:$saddr, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc),
!con(!if(EnableSaddr, (ins SReg_64:$saddr), (ins)),
(ins flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc),
!if(HasTiedOutput, (ins regClass:$vdst_in), (ins))),
" $vdst, $saddr$offset$glc$slc$dlc"> {
" $vdst, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> {
let is_flat_global = 1;
let has_data = 0;
let mayLoad = 1;
let has_vaddr = 0;
let has_saddr = 1;
let enabled_saddr = 1;
let enabled_saddr = EnableSaddr;
let maybeAtomic = 1;
let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");

let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
}

multiclass FLAT_Global_Load_AddTid_Pseudo<string opName, RegisterClass regClass,
bit HasTiedOutput = 0, bit HasSignedOffset = 0> {
def "" : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, HasSignedOffset>,
GlobalSaddrTable<0, opName>;
def _SADDR : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, HasSignedOffset, 1>,
GlobalSaddrTable<1, opName>;
}

multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
def "" : FLAT_Store_Pseudo<opName, regClass, 1>,
Expand All @@ -220,21 +230,29 @@ multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
}

class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass,
bit HasSignedOffset = 0> : FLAT_Pseudo<
bit HasSignedOffset = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
opName,
(outs),
!con(
(ins vdataClass:$vdata, SReg_64:$saddr),
(ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
" $vdata, $saddr$offset$glc$slc$dlc"> {
!con(!if(EnableSaddr, (ins vdataClass:$vdata, SReg_64:$saddr), (ins vdataClass:$vdata)),
(ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
" $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> {
let is_flat_global = 1;
let mayLoad = 0;
let mayStore = 1;
let has_vdst = 0;
let has_vaddr = 0;
let has_saddr = 1;
let enabled_saddr = 1;
let enabled_saddr = EnableSaddr;
let maybeAtomic = 1;
let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
}

multiclass FLAT_Global_Store_AddTid_Pseudo<string opName, RegisterClass regClass,
bit HasSignedOffset = 0> {
def "" : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, HasSignedOffset>,
GlobalSaddrTable<0, opName>;
def _SADDR : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, HasSignedOffset, 1>,
GlobalSaddrTable<1, opName>;
}

class FlatScratchInst <string sv_op, string mode> {
Expand Down Expand Up @@ -603,7 +621,7 @@ defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_
defm GLOBAL_LOAD_SHORT_D16 : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>;
defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>;
let OtherPredicates = [HasGFX10_BEncoding] in
def GLOBAL_LOAD_DWORD_ADDTID : FLAT_Global_Load_AddTid_Pseudo <"global_load_dword_addtid", VGPR_32>;
defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Global_Load_AddTid_Pseudo <"global_load_dword_addtid", VGPR_32>;

defm GLOBAL_STORE_BYTE : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>;
defm GLOBAL_STORE_SHORT : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>;
Expand All @@ -612,7 +630,7 @@ defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VR
defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>;
defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>;
let OtherPredicates = [HasGFX10_BEncoding] in
def GLOBAL_STORE_DWORD_ADDTID : FLAT_Global_Store_AddTid_Pseudo <"global_store_dword_addtid", VGPR_32>;
defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Global_Store_AddTid_Pseudo <"global_store_dword_addtid", VGPR_32>;

defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>;
defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>;
Expand Down Expand Up @@ -1651,8 +1669,8 @@ defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05d>;
defm GLOBAL_ATOMIC_FCMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x05e>;
defm GLOBAL_ATOMIC_FMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x05f>;
defm GLOBAL_ATOMIC_FMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x060>;
defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Real_Base_gfx10<0x016>;
defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Real_Base_gfx10<0x017>;
defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x016>;
defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x017>;

// ENC_FLAT_SCRATCH.
defm SCRATCH_LOAD_UBYTE : FLAT_Real_ScratchAllAddr_gfx10<0x008>;
Expand Down
9 changes: 9 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1030_new.s
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,15 @@ global_load_dword_addtid v1, s[2:3] offset:16 glc slc dlc
global_store_dword_addtid v1, s[2:3] offset:16 glc slc dlc
// GFX10: encoding: [0x10,0x90,0x5f,0xdc,0x00,0x01,0x02,0x00]

global_load_dword_addtid v1, off offset:16
// GFX10: encoding: [0x10,0x80,0x58,0xdc,0x00,0x00,0x7d,0x01]

global_load_dword_addtid v1, off offset:16 glc slc dlc
// GFX10: encoding: [0x10,0x90,0x5b,0xdc,0x00,0x00,0x7d,0x01]

global_store_dword_addtid v1, off offset:16 glc slc dlc
// GFX10: encoding: [0x10,0x90,0x5f,0xdc,0x00,0x01,0x7d,0x00]

global_store_dword v254, v1, s[2:3] offset:16
// GFX10: encoding: [0x10,0x80,0x70,0xdc,0xfe,0x01,0x02,0x00]

Expand Down
9 changes: 9 additions & 0 deletions llvm/test/MC/Disassembler/AMDGPU/gfx1030_dasm_new.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,15 @@
# GFX10: global_store_dword_addtid v1, s[2:3] offset:16 glc slc dlc
0x10,0x90,0x5f,0xdc,0x00,0x01,0x02,0x00

# GFX10: global_load_dword_addtid v1, off offset:16
0x10,0x80,0x58,0xdc,0x00,0x00,0x7d,0x01

# GFX10: global_load_dword_addtid v1, off offset:16 glc slc dlc
0x10,0x90,0x5b,0xdc,0x00,0x00,0x7d,0x01

# GFX10: global_store_dword_addtid v1, off offset:16 glc slc dlc
0x10,0x90,0x5f,0xdc,0x00,0x01,0x7d,0x00

# GFX10: global_store_dword v254, v1, s[2:3] offset:16
0x10,0x80,0x70,0xdc,0xfe,0x01,0x02,0x00

Expand Down

0 comments on commit 409a2f0

Please sign in to comment.