diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index eaa1870f4be28..7003a40a940aa 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -2589,6 +2589,8 @@ def NotHasTrue16BitInsts : True16PredicateClass<"!Subtarget->hasTrue16BitInsts() // only allow 32-bit registers in operands and use low halves thereof. def UseRealTrue16Insts : True16PredicateClass<"Subtarget->useRealTrue16Insts()">, AssemblerPredicate<(all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts)>; +def NotUseRealTrue16Insts : True16PredicateClass<"!Subtarget->useRealTrue16Insts()">, + AssemblerPredicate<(not (all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts))>; def UseFakeTrue16Insts : True16PredicateClass<"Subtarget->hasTrue16BitInsts() && " "!Subtarget->useRealTrue16Insts()">, AssemblerPredicate<(all_of FeatureTrue16BitInsts)>; diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index f2e432fa8d7f5..b2ff5a11aec6e 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -969,10 +969,9 @@ multiclass DSReadPat_t16 { } let OtherPredicates = [NotLDSRequiresM0Init] in { - foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in - let True16Predicate = p in { - def : DSReadPat(!cast(inst)#"_gfx9"), vt, !cast(frag)>; - } + let True16Predicate = NotUseRealTrue16Insts in { + def : DSReadPat(!cast(inst)#"_gfx9"), vt, !cast(frag)>; + } let True16Predicate = UseRealTrue16Insts in { def : DSReadPat(!cast(inst)#"_t16"), vt, !cast(frag)>; } @@ -1050,10 +1049,9 @@ multiclass DSWritePat_t16 { } let OtherPredicates = [NotLDSRequiresM0Init] in { - foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in - let True16Predicate = p in { - def : DSWritePat(!cast(inst)#"_gfx9"), vt, !cast(frag)>; - } + let True16Predicate = NotUseRealTrue16Insts in { + def : DSWritePat(!cast(inst)#"_gfx9"), vt, !cast(frag)>; + } let True16Predicate = UseRealTrue16Insts in { def : DSWritePat(!cast(inst)#"_t16"), vt, !cast(frag)>; } diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 9f33bac4c56ea..5a22b23cecf86 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -1982,8 +1982,7 @@ defm : FlatLoadPats ; defm : FlatLoadPats ; defm : FlatLoadPats ; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { defm : FlatLoadPats ; defm : FlatLoadPats ; defm : FlatLoadPats ; @@ -2127,8 +2126,7 @@ defm : GlobalFLATLoadPats ; defm : GlobalFLATLoadPats ; defm : GlobalFLATLoadPats ; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { defm : GlobalFLATLoadPats ; defm : GlobalFLATLoadPats ; defm : GlobalFLATLoadPats ; @@ -2187,8 +2185,7 @@ defm : GlobalFLATStorePats ; defm : GlobalFLATStorePats ; defm : GlobalFLATStorePats ; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let OtherPredicates = [HasFlatGlobalInsts], True16Predicate = p in { +let OtherPredicates = [HasFlatGlobalInsts], True16Predicate = NotUseRealTrue16Insts in { defm : GlobalFLATStorePats ; defm : GlobalFLATStorePats ; defm : GlobalFLATStorePats ; @@ -2356,8 +2353,7 @@ defm : ScratchFLATLoadPats ; defm : ScratchFLATLoadPats ; defm : ScratchFLATLoadPats ; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { defm : ScratchFLATLoadPats ; defm : ScratchFLATLoadPats ; defm : ScratchFLATLoadPats ; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index d4c1bc6d84384..01cf5a6b16f17 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1466,8 +1466,7 @@ class VOPSelectPat_t16 : GCNPat < def : VOPSelectModsPat ; def : VOPSelectModsPat ; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { def : VOPSelectPat ; def : VOPSelectPat ; } // End True16Predicate = p @@ -2137,8 +2136,7 @@ def : GCNPat < >; foreach fp16vt = [f16, bf16] in { -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let SubtargetPredicate = p in { +let SubtargetPredicate = NotUseRealTrue16Insts in { def : GCNPat < (fabs (fp16vt VGPR_32:$src)), (V_AND_B32_e64 (S_MOV_B32 (i32 0x00007fff)), VGPR_32:$src) @@ -2230,8 +2228,7 @@ def : GCNPat < } foreach fp16vt = [f16, bf16] in { -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { def : GCNPat < (fcopysign fp16vt:$src0, fp16vt:$src1), (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00007fff)), $src0, $src1) @@ -2353,23 +2350,21 @@ def : GCNPat < (S_MOV_B32 $ga) >; -foreach pred = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in { - let True16Predicate = pred in { - def : GCNPat < - (VGPRImm<(i16 imm)>:$imm), - (V_MOV_B32_e32 imm:$imm) - >; +let True16Predicate = NotUseRealTrue16Insts in { + def : GCNPat < + (VGPRImm<(i16 imm)>:$imm), + (V_MOV_B32_e32 imm:$imm) + >; - // FIXME: Workaround for ordering issue with peephole optimizer where - // a register class copy interferes with immediate folding. Should - // use s_mov_b32, which can be shrunk to s_movk_i32 + // FIXME: Workaround for ordering issue with peephole optimizer where + // a register class copy interferes with immediate folding. Should + // use s_mov_b32, which can be shrunk to s_movk_i32 - foreach vt = [f16, bf16] in { - def : GCNPat < - (VGPRImm<(vt fpimm)>:$imm), - (V_MOV_B32_e32 (vt (bitcast_fpimm_to_i32 $imm))) - >; - } + foreach vt = [f16, bf16] in { + def : GCNPat < + (VGPRImm<(vt fpimm)>:$imm), + (V_MOV_B32_e32 (vt (bitcast_fpimm_to_i32 $imm))) + >; } } @@ -2858,8 +2853,7 @@ def : GCNPat< (i32 (DivergentSextInreg i32:$src)), (V_BFE_I32_e64 i32:$src, (i32 0), (i32 1))>; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { def : GCNPat < (i16 (DivergentSextInreg i16:$src)), (V_BFE_I32_e64 $src, (i32 0), (i32 1)) @@ -3204,8 +3198,7 @@ def : GCNPat< } } // AddedComplexity = 1 -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { def : GCNPat< (i32 (DivergentUnaryFrag i16:$src)), (V_AND_B32_e64 (S_MOV_B32 (i32 0xffff)), $src) @@ -3415,8 +3408,7 @@ def : GCNPat < // Magic number: 1 | (0 << 8) | (12 << 16) | (12 << 24) // The 12s emit 0s. -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { def : GCNPat < (i16 (bswap i16:$a)), (V_PERM_B32_e64 (i32 0), VSrc_b32:$a, (S_MOV_B32 (i32 0x0c0c0001))) @@ -3669,8 +3661,7 @@ def : GCNPat < (S_LSHL_B32 SReg_32:$src1, (i16 16)) >; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { def : GCNPat < (v2i16 (DivergentBinFrag (i16 0), (i16 VGPR_32:$src1))), (v2i16 (V_LSHLREV_B32_e64 (i16 16), VGPR_32:$src1)) @@ -3706,8 +3697,7 @@ def : GCNPat < (COPY_TO_REGCLASS SReg_32:$src0, SReg_32) >; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { def : GCNPat < (vecTy (DivergentBinFrag (Ty VGPR_32:$src0), (Ty undef))), (COPY_TO_REGCLASS VGPR_32:$src0, VGPR_32) @@ -3734,8 +3724,7 @@ def : GCNPat < >; let SubtargetPredicate = HasVOP3PInsts in { -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in +let True16Predicate = NotUseRealTrue16Insts in def : GCNPat < (v2i16 (DivergentBinFrag (i16 VGPR_32:$src0), (i16 VGPR_32:$src1))), (v2i16 (V_LSHL_OR_B32_e64 $src1, (i32 16), (i32 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), $src0)))) @@ -3765,8 +3754,7 @@ def : GCNPat < (S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1) >; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { // Take the lower 16 bits from each VGPR_32 and concat them def : GCNPat < (vecTy (DivergentBinFrag (Ty VGPR_32:$a), (Ty VGPR_32:$b))), @@ -3837,8 +3825,7 @@ def : GCNPat < >; // Take the upper 16 bits from each VGPR_32 and concat them -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in +let True16Predicate = NotUseRealTrue16Insts in def : GCNPat < (vecTy (DivergentBinFrag (Ty !if(!eq(Ty, i16), @@ -3880,8 +3867,7 @@ def : GCNPat < (v2i16 (S_PACK_HL_B32_B16 SReg_32:$src0, SReg_32:$src1)) >; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { def : GCNPat < (v2f16 (scalar_to_vector f16:$src0)), (COPY $src0) diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 6230c17e20804..77df72111605e 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -1561,8 +1561,7 @@ def : GCNPat < } // End OtherPredicates = [isGFX8Plus] -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let OtherPredicates = [isGFX8Plus, p] in { +let OtherPredicates = [isGFX8Plus, NotUseRealTrue16Insts] in { def : GCNPat< (i32 (anyext i16:$src)), (COPY $src) diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 37d92bc5076de..30dab55df7c29 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -1378,8 +1378,7 @@ class ZExt_i16_i1_Pat : GCNPat < $src) >; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { def : GCNPat < (and i16:$src0, i16:$src1), (V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index e6a7c35dce0be..4a2b54dde68d3 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -387,8 +387,7 @@ let SchedRW = [Write64Bit] in { } // End SchedRW = [Write64Bit] } // End isReMaterializable = 1 -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in +let True16Predicate = NotUseRealTrue16Insts in def : GCNPat< (i32 (DivergentUnaryFrag i16:$src)), (i32 (V_BFE_I32_e64 i16:$src, (i32 0), (i32 0x10))) @@ -501,8 +500,7 @@ def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32 } // End SubtargetPredicate = Has16BitInsts, isCommutable = 1 -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in +let True16Predicate = NotUseRealTrue16Insts in def : GCNPat< (i64 (DivergentUnaryFrag i16:$src)), (REG_SEQUENCE VReg_64, diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 52ee1e874ad86..5daf860d540ca 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -402,8 +402,7 @@ defm V_FMA_MIX_F16_t16 : VOP3_VOP3PInst_t16<"v_fma_mix_f16_t16", VOP3P_Mix_Profi defm : MadFmaMixFP32Pats; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in +let True16Predicate = NotUseRealTrue16Insts in defm : MadFmaMixFP16Pats; let True16Predicate = UseRealTrue16Insts in defm : MadFmaMixFP16Pats_t16; @@ -428,8 +427,7 @@ defm V_FMA_MIX_BF16_t16 : VOP3_VOP3PInst_t16<"v_fma_mix_bf16_t16", VOP3P_Mix_Pro } // End isCommutable = 1 defm : MadFmaMixFP32Pats; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in +let True16Predicate = NotUseRealTrue16Insts in defm : MadFmaMixFP16Pats; let True16Predicate = UseRealTrue16Insts in defm : MadFmaMixFP16Pats_t16;