-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[AMDGPU] Introduce and use NotUseRealTrue16Insts. NFC. #161373
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This removes ~2000 lines from both AMDGPUGenDAGISel.inc and AMDGPUGenGlobalISel.inc.
@llvm/pr-subscribers-backend-amdgpu Author: Jay Foad (jayfoad) ChangesThis removes ~2000 lines from both AMDGPUGenDAGISel.inc and Full diff: https://github.com/llvm/llvm-project/pull/161373.diff 8 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index eaa1870f4be28..7003a40a940aa 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -2589,6 +2589,8 @@ def NotHasTrue16BitInsts : True16PredicateClass<"!Subtarget->hasTrue16BitInsts()
// only allow 32-bit registers in operands and use low halves thereof.
def UseRealTrue16Insts : True16PredicateClass<"Subtarget->useRealTrue16Insts()">,
AssemblerPredicate<(all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts)>;
+def NotUseRealTrue16Insts : True16PredicateClass<"!Subtarget->useRealTrue16Insts()">,
+ AssemblerPredicate<(not (all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts))>;
def UseFakeTrue16Insts : True16PredicateClass<"Subtarget->hasTrue16BitInsts() && "
"!Subtarget->useRealTrue16Insts()">,
AssemblerPredicate<(all_of FeatureTrue16BitInsts)>;
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index f2e432fa8d7f5..b2ff5a11aec6e 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -969,10 +969,9 @@ multiclass DSReadPat_t16<DS_Pseudo inst, ValueType vt, string frag> {
}
let OtherPredicates = [NotLDSRequiresM0Init] in {
- foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
- let True16Predicate = p in {
- def : DSReadPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
- }
+ let True16Predicate = NotUseRealTrue16Insts in {
+ def : DSReadPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
+ }
let True16Predicate = UseRealTrue16Insts in {
def : DSReadPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_t16"), vt, !cast<PatFrag>(frag)>;
}
@@ -1050,10 +1049,9 @@ multiclass DSWritePat_t16 <DS_Pseudo inst, ValueType vt, string frag> {
}
let OtherPredicates = [NotLDSRequiresM0Init] in {
- foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
- let True16Predicate = p in {
- def : DSWritePat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
- }
+ let True16Predicate = NotUseRealTrue16Insts in {
+ def : DSWritePat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
+ }
let True16Predicate = UseRealTrue16Insts in {
def : DSWritePat<!cast<DS_Pseudo>(!cast<string>(inst)#"_t16"), vt, !cast<PatFrag>(frag)>;
}
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 9f33bac4c56ea..5a22b23cecf86 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -1982,8 +1982,7 @@ defm : FlatLoadPats <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
defm : FlatLoadPats <FLAT_LOAD_SSHORT, atomic_load_sext_16_flat, i32>;
defm : FlatLoadPats <FLAT_LOAD_DWORDX3, load_flat, v3i32>;
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in {
+let True16Predicate = NotUseRealTrue16Insts in {
defm : FlatLoadPats <FLAT_LOAD_UBYTE, extloadi8_flat, i16>;
defm : FlatLoadPats <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>;
defm : FlatLoadPats <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>;
@@ -2127,8 +2126,7 @@ defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, extloadi16_global, i32>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>;
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in {
+let True16Predicate = NotUseRealTrue16Insts in {
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>;
@@ -2187,8 +2185,7 @@ defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i32>;
defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, truncstorei16_global, i32>;
defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX3, store_global, v3i32>;
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let OtherPredicates = [HasFlatGlobalInsts], True16Predicate = p in {
+let OtherPredicates = [HasFlatGlobalInsts], True16Predicate = NotUseRealTrue16Insts in {
defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i16>;
defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, store_global, i16>;
defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i16>;
@@ -2356,8 +2353,7 @@ defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, extloadi16_private, i32>;
defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, zextloadi16_private, i32>;
defm : ScratchFLATLoadPats <SCRATCH_LOAD_SSHORT, sextloadi16_private, i32>;
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in {
+let True16Predicate = NotUseRealTrue16Insts in {
defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i16>;
defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i16>;
defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i16>;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index d4c1bc6d84384..01cf5a6b16f17 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1466,8 +1466,7 @@ class VOPSelectPat_t16 <ValueType vt> : GCNPat <
def : VOPSelectModsPat <i32>;
def : VOPSelectModsPat <f32>;
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in {
+let True16Predicate = NotUseRealTrue16Insts in {
def : VOPSelectPat <f16>;
def : VOPSelectPat <i16>;
} // End True16Predicate = p
@@ -2137,8 +2136,7 @@ def : GCNPat <
>;
foreach fp16vt = [f16, bf16] in {
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let SubtargetPredicate = p in {
+let SubtargetPredicate = NotUseRealTrue16Insts in {
def : GCNPat <
(fabs (fp16vt VGPR_32:$src)),
(V_AND_B32_e64 (S_MOV_B32 (i32 0x00007fff)), VGPR_32:$src)
@@ -2230,8 +2228,7 @@ def : GCNPat <
}
foreach fp16vt = [f16, bf16] in {
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in {
+let True16Predicate = NotUseRealTrue16Insts in {
def : GCNPat <
(fcopysign fp16vt:$src0, fp16vt:$src1),
(V_BFI_B32_e64 (S_MOV_B32 (i32 0x00007fff)), $src0, $src1)
@@ -2353,23 +2350,21 @@ def : GCNPat <
(S_MOV_B32 $ga)
>;
-foreach pred = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in {
- let True16Predicate = pred in {
- def : GCNPat <
- (VGPRImm<(i16 imm)>:$imm),
- (V_MOV_B32_e32 imm:$imm)
- >;
+let True16Predicate = NotUseRealTrue16Insts in {
+ def : GCNPat <
+ (VGPRImm<(i16 imm)>:$imm),
+ (V_MOV_B32_e32 imm:$imm)
+ >;
- // FIXME: Workaround for ordering issue with peephole optimizer where
- // a register class copy interferes with immediate folding. Should
- // use s_mov_b32, which can be shrunk to s_movk_i32
+ // FIXME: Workaround for ordering issue with peephole optimizer where
+ // a register class copy interferes with immediate folding. Should
+ // use s_mov_b32, which can be shrunk to s_movk_i32
- foreach vt = [f16, bf16] in {
- def : GCNPat <
- (VGPRImm<(vt fpimm)>:$imm),
- (V_MOV_B32_e32 (vt (bitcast_fpimm_to_i32 $imm)))
- >;
- }
+ foreach vt = [f16, bf16] in {
+ def : GCNPat <
+ (VGPRImm<(vt fpimm)>:$imm),
+ (V_MOV_B32_e32 (vt (bitcast_fpimm_to_i32 $imm)))
+ >;
}
}
@@ -2858,8 +2853,7 @@ def : GCNPat<
(i32 (DivergentSextInreg<i1> i32:$src)),
(V_BFE_I32_e64 i32:$src, (i32 0), (i32 1))>;
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in {
+let True16Predicate = NotUseRealTrue16Insts in {
def : GCNPat <
(i16 (DivergentSextInreg<i1> i16:$src)),
(V_BFE_I32_e64 $src, (i32 0), (i32 1))
@@ -3204,8 +3198,7 @@ def : GCNPat<
}
} // AddedComplexity = 1
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in {
+let True16Predicate = NotUseRealTrue16Insts in {
def : GCNPat<
(i32 (DivergentUnaryFrag<zext> i16:$src)),
(V_AND_B32_e64 (S_MOV_B32 (i32 0xffff)), $src)
@@ -3415,8 +3408,7 @@ def : GCNPat <
// Magic number: 1 | (0 << 8) | (12 << 16) | (12 << 24)
// The 12s emit 0s.
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in {
+let True16Predicate = NotUseRealTrue16Insts in {
def : GCNPat <
(i16 (bswap i16:$a)),
(V_PERM_B32_e64 (i32 0), VSrc_b32:$a, (S_MOV_B32 (i32 0x0c0c0001)))
@@ -3669,8 +3661,7 @@ def : GCNPat <
(S_LSHL_B32 SReg_32:$src1, (i16 16))
>;
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in {
+let True16Predicate = NotUseRealTrue16Insts in {
def : GCNPat <
(v2i16 (DivergentBinFrag<build_vector> (i16 0), (i16 VGPR_32:$src1))),
(v2i16 (V_LSHLREV_B32_e64 (i16 16), VGPR_32:$src1))
@@ -3706,8 +3697,7 @@ def : GCNPat <
(COPY_TO_REGCLASS SReg_32:$src0, SReg_32)
>;
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in {
+let True16Predicate = NotUseRealTrue16Insts in {
def : GCNPat <
(vecTy (DivergentBinFrag<build_vector> (Ty VGPR_32:$src0), (Ty undef))),
(COPY_TO_REGCLASS VGPR_32:$src0, VGPR_32)
@@ -3734,8 +3724,7 @@ def : GCNPat <
>;
let SubtargetPredicate = HasVOP3PInsts in {
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in
+let True16Predicate = NotUseRealTrue16Insts in
def : GCNPat <
(v2i16 (DivergentBinFrag<build_vector> (i16 VGPR_32:$src0), (i16 VGPR_32:$src1))),
(v2i16 (V_LSHL_OR_B32_e64 $src1, (i32 16), (i32 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), $src0))))
@@ -3765,8 +3754,7 @@ def : GCNPat <
(S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1)
>;
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in {
+let True16Predicate = NotUseRealTrue16Insts in {
// Take the lower 16 bits from each VGPR_32 and concat them
def : GCNPat <
(vecTy (DivergentBinFrag<build_vector> (Ty VGPR_32:$a), (Ty VGPR_32:$b))),
@@ -3837,8 +3825,7 @@ def : GCNPat <
>;
// Take the upper 16 bits from each VGPR_32 and concat them
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in
+let True16Predicate = NotUseRealTrue16Insts in
def : GCNPat <
(vecTy (DivergentBinFrag<build_vector>
(Ty !if(!eq(Ty, i16),
@@ -3880,8 +3867,7 @@ def : GCNPat <
(v2i16 (S_PACK_HL_B32_B16 SReg_32:$src0, SReg_32:$src1))
>;
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in {
+let True16Predicate = NotUseRealTrue16Insts in {
def : GCNPat <
(v2f16 (scalar_to_vector f16:$src0)),
(COPY $src0)
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 6230c17e20804..77df72111605e 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -1561,8 +1561,7 @@ def : GCNPat <
} // End OtherPredicates = [isGFX8Plus]
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let OtherPredicates = [isGFX8Plus, p] in {
+let OtherPredicates = [isGFX8Plus, NotUseRealTrue16Insts] in {
def : GCNPat<
(i32 (anyext i16:$src)),
(COPY $src)
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 37d92bc5076de..30dab55df7c29 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -1378,8 +1378,7 @@ class ZExt_i16_i1_Pat <SDNode ext> : GCNPat <
$src)
>;
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in {
+let True16Predicate = NotUseRealTrue16Insts in {
def : GCNPat <
(and i16:$src0, i16:$src1),
(V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index e6a7c35dce0be..4a2b54dde68d3 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -387,8 +387,7 @@ let SchedRW = [Write64Bit] in {
} // End SchedRW = [Write64Bit]
} // End isReMaterializable = 1
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in
+let True16Predicate = NotUseRealTrue16Insts in
def : GCNPat<
(i32 (DivergentUnaryFrag<sext> i16:$src)),
(i32 (V_BFE_I32_e64 i16:$src, (i32 0), (i32 0x10)))
@@ -501,8 +500,7 @@ def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32
} // End SubtargetPredicate = Has16BitInsts, isCommutable = 1
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in
+let True16Predicate = NotUseRealTrue16Insts in
def : GCNPat<
(i64 (DivergentUnaryFrag<sext> i16:$src)),
(REG_SEQUENCE VReg_64,
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 52ee1e874ad86..5daf860d540ca 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -402,8 +402,7 @@ defm V_FMA_MIX_F16_t16 : VOP3_VOP3PInst_t16<"v_fma_mix_f16_t16", VOP3P_Mix_Profi
defm : MadFmaMixFP32Pats<fma, V_FMA_MIX_F32>;
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in
+let True16Predicate = NotUseRealTrue16Insts in
defm : MadFmaMixFP16Pats<fma, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16>;
let True16Predicate = UseRealTrue16Insts in
defm : MadFmaMixFP16Pats_t16<fma, V_FMA_MIX_F16_t16>;
@@ -428,8 +427,7 @@ defm V_FMA_MIX_BF16_t16 : VOP3_VOP3PInst_t16<"v_fma_mix_bf16_t16", VOP3P_Mix_Pro
} // End isCommutable = 1
defm : MadFmaMixFP32Pats<fma, V_FMA_MIX_F32_BF16, bf16>;
-foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-let True16Predicate = p in
+let True16Predicate = NotUseRealTrue16Insts in
defm : MadFmaMixFP16Pats<fma, V_FMA_MIXLO_BF16, V_FMA_MIXHI_BF16, bf16, v2bf16>;
let True16Predicate = UseRealTrue16Insts in
defm : MadFmaMixFP16Pats_t16<fma, V_FMA_MIX_BF16_t16>;
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, but please give others a chance to comment.
In terms of fundamental modes of instructions, we need 3. NotHasTrue16BitInsts, UseFakeTrue16 and useRealTrue16. We have 3 versions of instructions that correspond to those. For patterns we need 2 fundamental modes, the true16 pattern and the other pattern (which is used for NotHasTrue16 and UseFakeTrue16).
Then we can talk about having composite predicates on top of that, to make the Tablegen cleaner.
HasTrue16BitInsts is a composite of UseFakeTrue16 and useRealTrue16. This new predicate is a composite of NotUseRealTrue16Insts and NotHasTrue16BitInsts.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This removes ~2000 lines from both AMDGPUGenDAGISel.inc and AMDGPUGenGlobalISel.inc.
This removes ~2000 lines from both AMDGPUGenDAGISel.inc and
AMDGPUGenGlobalISel.inc.