Skip to content

Commit

Permalink
[AMDGPU] Use AddedComplexity for ret and noret atomic ops selection
Browse files Browse the repository at this point in the history
This patch removes the predicate for return atomic ops and uses
AddedComplexity to distinguish its selection from its no return variant.
This will produce better matchers that doesn't unnecessarily check for
the negated predicate if the initial predicate failed. Also, it
simplifies the enabling of no return atomic ops selection in GlobalISel.

Differential Revision: https://reviews.llvm.org/D128241
  • Loading branch information
abinavpp committed Jul 8, 2022
1 parent c20a581 commit 7504c7a
Show file tree
Hide file tree
Showing 5 changed files with 117 additions and 122 deletions.
52 changes: 19 additions & 33 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
Expand Up @@ -546,59 +546,43 @@ def atomic_store_64_#as : PatFrag<(ops node:$ptr, node:$val),
// GlobalISelEmitter allows pattern matches where src and dst def count
// mismatch.

multiclass ret_noret_op {
let PredicateCode = [{ return !(SDValue(N, 0).use_empty()); }],
GISelPredicateCode = [{ return true; }] in {
def "_ret" : PatFrag<(ops node:$ptr, node:$data),
(!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>;
}

multiclass noret_op {
let PredicateCode = [{ return (SDValue(N, 0).use_empty()); }],
GISelPredicateCode = [{ return false; }] in {
def "_noret" : PatFrag<(ops node:$ptr, node:$data),
(!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>;
}
}

defm int_amdgcn_flat_atomic_fadd : ret_noret_op;
defm int_amdgcn_flat_atomic_fadd_v2bf16 : ret_noret_op;
defm int_amdgcn_flat_atomic_fmin : ret_noret_op;
defm int_amdgcn_flat_atomic_fmax : ret_noret_op;
defm int_amdgcn_global_atomic_fadd : ret_noret_op;
defm int_amdgcn_global_atomic_fadd_v2bf16 : ret_noret_op;
defm int_amdgcn_global_atomic_fmin : ret_noret_op;
defm int_amdgcn_global_atomic_fmax : ret_noret_op;
defm int_amdgcn_ds_fadd_v2bf16 : ret_noret_op;
defm int_amdgcn_flat_atomic_fadd : noret_op;
defm int_amdgcn_flat_atomic_fadd_v2bf16 : noret_op;
defm int_amdgcn_flat_atomic_fmin : noret_op;
defm int_amdgcn_flat_atomic_fmax : noret_op;
defm int_amdgcn_global_atomic_fadd : noret_op;
defm int_amdgcn_global_atomic_fadd_v2bf16 : noret_op;
defm int_amdgcn_global_atomic_fmin : noret_op;
defm int_amdgcn_global_atomic_fmax : noret_op;
defm int_amdgcn_ds_fadd_v2bf16 : noret_op;

multiclass ret_noret_binary_atomic_op<SDNode atomic_op, bit IsInt = 1> {
multiclass noret_binary_atomic_op<SDNode atomic_op, bit IsInt = 1> {
let PredicateCode = [{ return (SDValue(N, 0).use_empty()); }],
GISelPredicateCode = [{ return false; }] in {
defm "_noret" : binary_atomic_op<atomic_op, IsInt>;
}

let PredicateCode = [{ return !(SDValue(N, 0).use_empty()); }],
GISelPredicateCode = [{ return true; }] in {
defm "_ret" : binary_atomic_op<atomic_op, IsInt>;
}
}

multiclass ret_noret_ternary_atomic_op<SDNode atomic_op> {
multiclass noret_ternary_atomic_op<SDNode atomic_op> {
let PredicateCode = [{ return (SDValue(N, 0).use_empty()); }],
GISelPredicateCode = [{ return false; }] in {
defm "_noret" : ternary_atomic_op<atomic_op>;
}

let PredicateCode = [{ return !(SDValue(N, 0).use_empty()); }],
GISelPredicateCode = [{ return true; }] in {
defm "_ret" : ternary_atomic_op<atomic_op>;
}
}

multiclass binary_atomic_op_all_as<SDNode atomic_op, bit IsInt = 1> {
foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
defm "_"#as : binary_atomic_op<atomic_op, IsInt>;
defm "_"#as : ret_noret_binary_atomic_op<atomic_op, IsInt>;
defm "_"#as : noret_binary_atomic_op<atomic_op, IsInt>;
}
}
}
Expand Down Expand Up @@ -640,13 +624,15 @@ def store_align16_local: PatFrag<(ops node:$val, node:$ptr),

let AddressSpaces = StoreAddress_local.AddrSpaces in {
defm atomic_cmp_swap_local : ternary_atomic_op<atomic_cmp_swap>;
defm atomic_cmp_swap_local : ret_noret_ternary_atomic_op<atomic_cmp_swap>;
defm atomic_cmp_swap_local_m0 : ret_noret_ternary_atomic_op<atomic_cmp_swap_glue>;
defm atomic_cmp_swap_local : noret_ternary_atomic_op<atomic_cmp_swap>;
defm atomic_cmp_swap_local_m0 : noret_ternary_atomic_op<atomic_cmp_swap_glue>;
defm atomic_cmp_swap_local_m0 : ternary_atomic_op<atomic_cmp_swap_glue>;
}

let AddressSpaces = StoreAddress_region.AddrSpaces in {
defm atomic_cmp_swap_region : ret_noret_ternary_atomic_op<atomic_cmp_swap>;
defm atomic_cmp_swap_region_m0 : ret_noret_ternary_atomic_op<atomic_cmp_swap_glue>;
defm atomic_cmp_swap_region : noret_ternary_atomic_op<atomic_cmp_swap>;
defm atomic_cmp_swap_region_m0 : noret_ternary_atomic_op<atomic_cmp_swap_glue>;
defm atomic_cmp_swap_region_m0 : ternary_atomic_op<atomic_cmp_swap_glue>;
}

//===----------------------------------------------------------------------===//
Expand Down
27 changes: 18 additions & 9 deletions llvm/lib/Target/AMDGPU/BUFInstructions.td
Expand Up @@ -1412,10 +1412,12 @@ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_short, i32, "BUFFER_STORE_SHORT">;
multiclass BufferAtomicPat<string OpPrefix, ValueType vt, string Inst, bit isIntr = 0> {
foreach RtnMode = ["ret", "noret"] in {

defvar Op = !cast<SDPatternOperator>(OpPrefix # "_" # RtnMode
defvar Op = !cast<SDPatternOperator>(OpPrefix
# !if(!eq(RtnMode, "ret"), "", "_noret")
# !if(isIntr, "", "_" # vt.Size));
defvar InstSuffix = !if(!eq(RtnMode, "ret"), "_RTN", "");

let AddedComplexity = !if(!eq(RtnMode, "ret"), 0, 1) in {
def : GCNPat<
(vt (Op (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset), vt:$vdata_in)),
(!cast<MUBUF_Pseudo>(Inst # "_OFFSET" # InstSuffix) getVregSrcForVT<vt>.ret:$vdata_in,
Expand All @@ -1428,6 +1430,7 @@ multiclass BufferAtomicPat<string OpPrefix, ValueType vt, string Inst, bit isInt
(!cast<MUBUF_Pseudo>(Inst # "_ADDR64" # InstSuffix) getVregSrcForVT<vt>.ret:$vdata_in,
VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset)
>;
} // end let AddedComplexity

} // end foreach RtnMode
}
Expand All @@ -1439,10 +1442,12 @@ multiclass BufferAtomicIntrPat<string OpPrefix, ValueType vt, string Inst> {
multiclass BufferAtomicCmpSwapPat<ValueType vt, ValueType data_vt, string Inst> {
foreach RtnMode = ["ret", "noret"] in {

defvar Op = !cast<SDPatternOperator>("AMDGPUatomic_cmp_swap_global_" # RtnMode
defvar Op = !cast<SDPatternOperator>("AMDGPUatomic_cmp_swap_global"
# !if(!eq(RtnMode, "ret"), "", "_noret")
# "_" # vt.Size);
defvar InstSuffix = !if(!eq(RtnMode, "ret"), "_RTN", "");

let AddedComplexity = !if(!eq(RtnMode, "ret"), 0, 1) in {
defvar OffsetResDag = (!cast<MUBUF_Pseudo>(Inst # "_OFFSET" # InstSuffix)
getVregSrcForVT<data_vt>.ret:$vdata_in, SReg_128:$srsrc, SCSrc_b32:$soffset,
offset:$offset);
Expand All @@ -1465,6 +1470,7 @@ multiclass BufferAtomicCmpSwapPat<ValueType vt, ValueType data_vt, string Inst>
!if(!eq(vt, i32), sub0, sub0_sub1)),
Addr64ResDag)
>;
} // end let AddedComplexity

} // end foreach RtnMode
}
Expand Down Expand Up @@ -1495,13 +1501,14 @@ multiclass SIBufferAtomicPat<string OpPrefix, ValueType vt, string Inst,
list<string> RtnModes = ["ret", "noret"]> {
foreach RtnMode = RtnModes in {

defvar Op = !cast<SDPatternOperator>(!if(!eq(RtnMode, "none"),
OpPrefix, OpPrefix # "_" # RtnMode));
defvar InstSuffix = !if(!or(!eq(RtnMode, "none"), !eq(RtnMode, "ret")),
"_RTN", "");
defvar CachePolicy = !if(!or(!eq(RtnMode, "none"), !eq(RtnMode, "ret")),
defvar Op = !cast<SDPatternOperator>(OpPrefix
# !if(!eq(RtnMode, "ret"), "", "_noret"));

defvar InstSuffix = !if(!eq(RtnMode, "ret"), "_RTN", "");
defvar CachePolicy = !if(!eq(RtnMode, "ret"),
(set_glc $cachepolicy), (timm:$cachepolicy));

let AddedComplexity = !if(!eq(RtnMode, "ret"), 0, 1) in {
def : GCNPat<
(vt (Op vt:$vdata_in, v4i32:$rsrc, 0, 0, i32:$soffset,
timm:$offset, timm:$cachepolicy, 0)),
Expand Down Expand Up @@ -1534,6 +1541,7 @@ multiclass SIBufferAtomicPat<string OpPrefix, ValueType vt, string Inst,
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), CachePolicy)
>;
} // end let AddedComplexity

} // end foreach RtnMode
}
Expand All @@ -1551,7 +1559,7 @@ defm : SIBufferAtomicPat<"SIbuffer_atomic_or", i32, "BUFFER_ATOMIC_OR">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_xor", i32, "BUFFER_ATOMIC_XOR">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_inc", i32, "BUFFER_ATOMIC_INC">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_dec", i32, "BUFFER_ATOMIC_DEC">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_csub", i32, "BUFFER_ATOMIC_CSUB", ["none"]>;
defm : SIBufferAtomicPat<"SIbuffer_atomic_csub", i32, "BUFFER_ATOMIC_CSUB", ["ret"]>;
defm : SIBufferAtomicPat<"SIbuffer_atomic_swap", i64, "BUFFER_ATOMIC_SWAP_X2">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_add", i64, "BUFFER_ATOMIC_ADD_X2">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_sub", i64, "BUFFER_ATOMIC_SUB_X2">;
Expand Down Expand Up @@ -1643,7 +1651,8 @@ let SubtargetPredicate = isGFX90APlus in {

foreach RtnMode = ["ret", "noret"] in {

defvar Op = !cast<SDPatternOperator>(SIbuffer_atomic_cmpswap # "_" # RtnMode);
defvar Op = !cast<SDPatternOperator>(SIbuffer_atomic_cmpswap
# !if(!eq(RtnMode, "ret"), "", "_noret"));
defvar InstSuffix = !if(!eq(RtnMode, "ret"), "_RTN", "");
defvar CachePolicy = !if(!eq(RtnMode, "ret"), (set_glc $cachepolicy),
(timm:$cachepolicy));
Expand Down
77 changes: 47 additions & 30 deletions llvm/lib/Target/AMDGPU/DSInstructions.td
Expand Up @@ -950,10 +950,11 @@ defm : DSWritePat_mc <DS_WRITE_B128, vt, "store_align_less_than_4_local">;

} // End AddedComplexity = 100

class DSAtomicRetPat<DS_Pseudo inst, ValueType vt, PatFrag frag, bit gds=0> : GCNPat <
(frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$value),
(inst $ptr, getVregSrcForVT<vt>.ret:$value, offset:$offset, (i1 gds))
>;
class DSAtomicRetPat<DS_Pseudo inst, ValueType vt, PatFrag frag, int complexity = 0,
bit gds=0> : GCNPat <(frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$value),
(inst $ptr, getVregSrcForVT<vt>.ret:$value, offset:$offset, (i1 gds))> {
let AddedComplexity = complexity;
}

multiclass DSAtomicRetPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
let OtherPredicates = [LDSRequiresM0Init] in {
Expand All @@ -965,75 +966,88 @@ multiclass DSAtomicRetPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
!cast<PatFrag>(frag#"_local_"#vt.Size)>;
}

def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size), 1>;
def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
/* complexity */ 0, /* gds */ 1>;
}

multiclass DSAtomicRetNoRetPat_mc<DS_Pseudo inst, DS_Pseudo noRetInst,
ValueType vt, string frag> {
let OtherPredicates = [LDSRequiresM0Init] in {
def : DSAtomicRetPat<inst, vt,
!cast<PatFrag>(frag#"_local_m0_ret_"#vt.Size)>;
!cast<PatFrag>(frag#"_local_m0_"#vt.Size)>;
def : DSAtomicRetPat<noRetInst, vt,
!cast<PatFrag>(frag#"_local_m0_noret_"#vt.Size)>;
!cast<PatFrag>(frag#"_local_m0_noret_"#vt.Size), /* complexity */ 1>;
}

let OtherPredicates = [NotLDSRequiresM0Init] in {
def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
!cast<PatFrag>(frag#"_local_ret_"#vt.Size)>;
!cast<PatFrag>(frag#"_local_"#vt.Size)>;
def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
!cast<PatFrag>(frag#"_local_noret_"#vt.Size)>;
!cast<PatFrag>(frag#"_local_noret_"#vt.Size), /* complexity */ 1>;
}

def : DSAtomicRetPat<inst, vt,
!cast<PatFrag>(frag#"_region_m0_ret_"#vt.Size), 1>;
!cast<PatFrag>(frag#"_region_m0_"#vt.Size),
/* complexity */ 0, /* gds */ 1>;
def : DSAtomicRetPat<noRetInst, vt,
!cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size), 1>;
!cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
/* complexity */ 1, /* gds */ 1>;
}



let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in {
// Caution, the order of src and cmp is the *opposite* of the BUFFER_ATOMIC_CMPSWAP opcode.
class DSAtomicCmpXChgSwapped<DS_Pseudo inst, ValueType vt, PatFrag frag, bit gds=0> : GCNPat <
class DSAtomicCmpXChgSwapped<DS_Pseudo inst, ValueType vt, PatFrag frag,
int complexity = 0, bit gds=0> : GCNPat<
(frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$cmp, vt:$swap),
(inst $ptr, getVregSrcForVT<vt>.ret:$cmp, getVregSrcForVT<vt>.ret:$swap, offset:$offset, (i1 gds))
>;
(inst $ptr, getVregSrcForVT<vt>.ret:$cmp, getVregSrcForVT<vt>.ret:$swap, offset:$offset, (i1 gds))> {
let AddedComplexity = complexity;
}

multiclass DSAtomicCmpXChgSwapped_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueType vt,
string frag> {
let OtherPredicates = [LDSRequiresM0Init] in {
def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_local_m0_ret_"#vt.Size)>;
def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_local_m0_noret_"#vt.Size)>;
def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_local_m0_"#vt.Size)>;
def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_local_m0_noret_"#vt.Size),
/* complexity */ 1>;
}

let OtherPredicates = [NotLDSRequiresM0Init] in {
def : DSAtomicCmpXChgSwapped<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
!cast<PatFrag>(frag#"_local_ret_"#vt.Size)>;
!cast<PatFrag>(frag#"_local_"#vt.Size)>;
def : DSAtomicCmpXChgSwapped<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
!cast<PatFrag>(frag#"_local_noret_"#vt.Size)>;
!cast<PatFrag>(frag#"_local_noret_"#vt.Size),
/* complexity */ 1>;
}

def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_region_m0_ret_"#vt.Size), 1>;
def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size), 1>;
def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
/* complexity */ 0, /* gds */ 1>;
def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
/* complexity */ 1, /* gds */ 1>;
}
} // End SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10

let SubtargetPredicate = isGFX11Plus in {
// The order of src and cmp agrees with the BUFFER_ATOMIC_CMPSWAP opcode.
class DSAtomicCmpXChg<DS_Pseudo inst, ValueType vt, PatFrag frag, bit gds=0> : GCNPat <
class DSAtomicCmpXChg<DS_Pseudo inst, ValueType vt, PatFrag frag,
int complexity = 0, bit gds=0> : GCNPat<
(frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$cmp, vt:$swap),
(inst $ptr, getVregSrcForVT<vt>.ret:$swap, getVregSrcForVT<vt>.ret:$cmp, offset:$offset, (i1 gds))
>;
(inst $ptr, getVregSrcForVT<vt>.ret:$swap, getVregSrcForVT<vt>.ret:$cmp, offset:$offset, (i1 gds))> {
let AddedComplexity = complexity;
}

multiclass DSAtomicCmpXChg_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueType vt, string frag> {

def : DSAtomicCmpXChg<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
!cast<PatFrag>(frag#"_local_ret_"#vt.Size)>;
!cast<PatFrag>(frag#"_local_"#vt.Size)>;
def : DSAtomicCmpXChg<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
!cast<PatFrag>(frag#"_local_noret_"#vt.Size)>;
!cast<PatFrag>(frag#"_local_noret_"#vt.Size), /* complexity */ 1>;

def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_region_m0_ret_"#vt.Size), 1>;
def : DSAtomicCmpXChg<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size), 1>;
def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
/* complexity */ 0, /* gds */ 1>;
def : DSAtomicCmpXChg<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
/* complexity */ 1, /* gds */ 1>;
}
} // End SubtargetPredicate = isGFX11Plus

Expand Down Expand Up @@ -1090,17 +1104,20 @@ defm : DSAtomicCmpXChg_mc<DS_CMPSTORE_RTN_B64, DS_CMPSTORE_B64, i64, "atomic_cmp
} // End SubtargetPredicate = isGFX11Plus

let SubtargetPredicate = isGFX90APlus in {
def : DSAtomicRetPat<DS_ADD_RTN_F64, f64, atomic_load_fadd_local_ret_64>;
def : DSAtomicRetPat<DS_ADD_RTN_F64, f64, atomic_load_fadd_local_64>;
let AddedComplexity = 1 in
def : DSAtomicRetPat<DS_ADD_F64, f64, atomic_load_fadd_local_noret_64>;
}

let SubtargetPredicate = isGFX940Plus in {
def : DSAtomicRetPat<DS_PK_ADD_RTN_F16, v2f16, atomic_load_fadd_v2f16_local_ret_32>;
def : DSAtomicRetPat<DS_PK_ADD_RTN_F16, v2f16, atomic_load_fadd_v2f16_local_32>;
let AddedComplexity = 1 in
def : DSAtomicRetPat<DS_PK_ADD_F16, v2f16, atomic_load_fadd_v2f16_local_noret_32>;
def : GCNPat <
(v2i16 (int_amdgcn_ds_fadd_v2bf16_ret i32:$ptr, v2i16:$src)),
(v2i16 (int_amdgcn_ds_fadd_v2bf16 i32:$ptr, v2i16:$src)),
(DS_PK_ADD_RTN_BF16 VGPR_32:$ptr, VGPR_32:$src, 0, 0)
>;
let AddedComplexity = 1 in
def : GCNPat <
(v2i16 (int_amdgcn_ds_fadd_v2bf16_noret i32:$ptr, v2i16:$src)),
(DS_PK_ADD_BF16 VGPR_32:$ptr, VGPR_32:$src, 0, 0)
Expand Down

0 comments on commit 7504c7a

Please sign in to comment.