Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -2589,6 +2589,8 @@ def NotHasTrue16BitInsts : True16PredicateClass<"!Subtarget->hasTrue16BitInsts()
// only allow 32-bit registers in operands and use low halves thereof.
def UseRealTrue16Insts : True16PredicateClass<"Subtarget->useRealTrue16Insts()">,
AssemblerPredicate<(all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts)>;
def NotUseRealTrue16Insts : True16PredicateClass<"!Subtarget->useRealTrue16Insts()">,
AssemblerPredicate<(not (all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts))>;
def UseFakeTrue16Insts : True16PredicateClass<"Subtarget->hasTrue16BitInsts() && "
"!Subtarget->useRealTrue16Insts()">,
AssemblerPredicate<(all_of FeatureTrue16BitInsts)>;
Expand Down
14 changes: 6 additions & 8 deletions llvm/lib/Target/AMDGPU/DSInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -969,10 +969,9 @@ multiclass DSReadPat_t16<DS_Pseudo inst, ValueType vt, string frag> {
}

let OtherPredicates = [NotLDSRequiresM0Init] in {
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
let True16Predicate = p in {
def : DSReadPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
}
let True16Predicate = NotUseRealTrue16Insts in {
def : DSReadPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
}
let True16Predicate = UseRealTrue16Insts in {
def : DSReadPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_t16"), vt, !cast<PatFrag>(frag)>;
}
Expand Down Expand Up @@ -1050,10 +1049,9 @@ multiclass DSWritePat_t16 <DS_Pseudo inst, ValueType vt, string frag> {
}

let OtherPredicates = [NotLDSRequiresM0Init] in {
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
let True16Predicate = p in {
def : DSWritePat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
}
let True16Predicate = NotUseRealTrue16Insts in {
def : DSWritePat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
}
let True16Predicate = UseRealTrue16Insts in {
def : DSWritePat<!cast<DS_Pseudo>(!cast<string>(inst)#"_t16"), vt, !cast<PatFrag>(frag)>;
}
Expand Down
12 changes: 4 additions & 8 deletions llvm/lib/Target/AMDGPU/FLATInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1982,8 +1982,7 @@ defm : FlatLoadPats <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
defm : FlatLoadPats <FLAT_LOAD_SSHORT, atomic_load_sext_16_flat, i32>;
defm : FlatLoadPats <FLAT_LOAD_DWORDX3, load_flat, v3i32>;

foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
let True16Predicate = p in {
let True16Predicate = NotUseRealTrue16Insts in {
defm : FlatLoadPats <FLAT_LOAD_UBYTE, extloadi8_flat, i16>;
defm : FlatLoadPats <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>;
defm : FlatLoadPats <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>;
Expand Down Expand Up @@ -2127,8 +2126,7 @@ defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, extloadi16_global, i32>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>;

foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
let True16Predicate = p in {
let True16Predicate = NotUseRealTrue16Insts in {
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>;
Expand Down Expand Up @@ -2187,8 +2185,7 @@ defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i32>;
defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, truncstorei16_global, i32>;
defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX3, store_global, v3i32>;

foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
let OtherPredicates = [HasFlatGlobalInsts], True16Predicate = p in {
let OtherPredicates = [HasFlatGlobalInsts], True16Predicate = NotUseRealTrue16Insts in {
defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i16>;
defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, store_global, i16>;
defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i16>;
Expand Down Expand Up @@ -2356,8 +2353,7 @@ defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, extloadi16_private, i32>;
defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, zextloadi16_private, i32>;
defm : ScratchFLATLoadPats <SCRATCH_LOAD_SSHORT, sextloadi16_private, i32>;

foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
let True16Predicate = p in {
let True16Predicate = NotUseRealTrue16Insts in {
defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i16>;
defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i16>;
defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i16>;
Expand Down
64 changes: 25 additions & 39 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1466,8 +1466,7 @@ class VOPSelectPat_t16 <ValueType vt> : GCNPat <

def : VOPSelectModsPat <i32>;
def : VOPSelectModsPat <f32>;
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
let True16Predicate = p in {
let True16Predicate = NotUseRealTrue16Insts in {
def : VOPSelectPat <f16>;
def : VOPSelectPat <i16>;
} // End True16Predicate = p
Expand Down Expand Up @@ -2137,8 +2136,7 @@ def : GCNPat <
>;

foreach fp16vt = [f16, bf16] in {
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
let SubtargetPredicate = p in {
let SubtargetPredicate = NotUseRealTrue16Insts in {
def : GCNPat <
(fabs (fp16vt VGPR_32:$src)),
(V_AND_B32_e64 (S_MOV_B32 (i32 0x00007fff)), VGPR_32:$src)
Expand Down Expand Up @@ -2230,8 +2228,7 @@ def : GCNPat <
}

foreach fp16vt = [f16, bf16] in {
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
let True16Predicate = p in {
let True16Predicate = NotUseRealTrue16Insts in {
def : GCNPat <
(fcopysign fp16vt:$src0, fp16vt:$src1),
(V_BFI_B32_e64 (S_MOV_B32 (i32 0x00007fff)), $src0, $src1)
Expand Down Expand Up @@ -2353,23 +2350,21 @@ def : GCNPat <
(S_MOV_B32 $ga)
>;

foreach pred = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in {
let True16Predicate = pred in {
def : GCNPat <
(VGPRImm<(i16 imm)>:$imm),
(V_MOV_B32_e32 imm:$imm)
>;
let True16Predicate = NotUseRealTrue16Insts in {
def : GCNPat <
(VGPRImm<(i16 imm)>:$imm),
(V_MOV_B32_e32 imm:$imm)
>;

// FIXME: Workaround for ordering issue with peephole optimizer where
// a register class copy interferes with immediate folding. Should
// use s_mov_b32, which can be shrunk to s_movk_i32
// FIXME: Workaround for ordering issue with peephole optimizer where
// a register class copy interferes with immediate folding. Should
// use s_mov_b32, which can be shrunk to s_movk_i32

foreach vt = [f16, bf16] in {
def : GCNPat <
(VGPRImm<(vt fpimm)>:$imm),
(V_MOV_B32_e32 (vt (bitcast_fpimm_to_i32 $imm)))
>;
}
foreach vt = [f16, bf16] in {
def : GCNPat <
(VGPRImm<(vt fpimm)>:$imm),
(V_MOV_B32_e32 (vt (bitcast_fpimm_to_i32 $imm)))
>;
}
}

Expand Down Expand Up @@ -2858,8 +2853,7 @@ def : GCNPat<
(i32 (DivergentSextInreg<i1> i32:$src)),
(V_BFE_I32_e64 i32:$src, (i32 0), (i32 1))>;

foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
let True16Predicate = p in {
let True16Predicate = NotUseRealTrue16Insts in {
def : GCNPat <
(i16 (DivergentSextInreg<i1> i16:$src)),
(V_BFE_I32_e64 $src, (i32 0), (i32 1))
Expand Down Expand Up @@ -3204,8 +3198,7 @@ def : GCNPat<
}
} // AddedComplexity = 1

foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
let True16Predicate = p in {
let True16Predicate = NotUseRealTrue16Insts in {
def : GCNPat<
(i32 (DivergentUnaryFrag<zext> i16:$src)),
(V_AND_B32_e64 (S_MOV_B32 (i32 0xffff)), $src)
Expand Down Expand Up @@ -3415,8 +3408,7 @@ def : GCNPat <

// Magic number: 1 | (0 << 8) | (12 << 16) | (12 << 24)
// The 12s emit 0s.
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
let True16Predicate = p in {
let True16Predicate = NotUseRealTrue16Insts in {
def : GCNPat <
(i16 (bswap i16:$a)),
(V_PERM_B32_e64 (i32 0), VSrc_b32:$a, (S_MOV_B32 (i32 0x0c0c0001)))
Expand Down Expand Up @@ -3669,8 +3661,7 @@ def : GCNPat <
(S_LSHL_B32 SReg_32:$src1, (i16 16))
>;

foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
let True16Predicate = p in {
let True16Predicate = NotUseRealTrue16Insts in {
def : GCNPat <
(v2i16 (DivergentBinFrag<build_vector> (i16 0), (i16 VGPR_32:$src1))),
(v2i16 (V_LSHLREV_B32_e64 (i16 16), VGPR_32:$src1))
Expand Down Expand Up @@ -3706,8 +3697,7 @@ def : GCNPat <
(COPY_TO_REGCLASS SReg_32:$src0, SReg_32)
>;

foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
let True16Predicate = p in {
let True16Predicate = NotUseRealTrue16Insts in {
def : GCNPat <
(vecTy (DivergentBinFrag<build_vector> (Ty VGPR_32:$src0), (Ty undef))),
(COPY_TO_REGCLASS VGPR_32:$src0, VGPR_32)
Expand All @@ -3734,8 +3724,7 @@ def : GCNPat <
>;

let SubtargetPredicate = HasVOP3PInsts in {
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
let True16Predicate = p in
let True16Predicate = NotUseRealTrue16Insts in
def : GCNPat <
(v2i16 (DivergentBinFrag<build_vector> (i16 VGPR_32:$src0), (i16 VGPR_32:$src1))),
(v2i16 (V_LSHL_OR_B32_e64 $src1, (i32 16), (i32 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), $src0))))
Expand Down Expand Up @@ -3765,8 +3754,7 @@ def : GCNPat <
(S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1)
>;

foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
let True16Predicate = p in {
let True16Predicate = NotUseRealTrue16Insts in {
// Take the lower 16 bits from each VGPR_32 and concat them
def : GCNPat <
(vecTy (DivergentBinFrag<build_vector> (Ty VGPR_32:$a), (Ty VGPR_32:$b))),
Expand Down Expand Up @@ -3837,8 +3825,7 @@ def : GCNPat <
>;

// Take the upper 16 bits from each VGPR_32 and concat them
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
let True16Predicate = p in
let True16Predicate = NotUseRealTrue16Insts in
def : GCNPat <
(vecTy (DivergentBinFrag<build_vector>
(Ty !if(!eq(Ty, i16),
Expand Down Expand Up @@ -3880,8 +3867,7 @@ def : GCNPat <
(v2i16 (S_PACK_HL_B32_B16 SReg_32:$src0, SReg_32:$src1))
>;

foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
let True16Predicate = p in {
let True16Predicate = NotUseRealTrue16Insts in {
def : GCNPat <
(v2f16 (scalar_to_vector f16:$src0)),
(COPY $src0)
Expand Down
3 changes: 1 addition & 2 deletions llvm/lib/Target/AMDGPU/VOP1Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1561,8 +1561,7 @@ def : GCNPat <

} // End OtherPredicates = [isGFX8Plus]

foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
let OtherPredicates = [isGFX8Plus, p] in {
let OtherPredicates = [isGFX8Plus, NotUseRealTrue16Insts] in {
def : GCNPat<
(i32 (anyext i16:$src)),
(COPY $src)
Expand Down
3 changes: 1 addition & 2 deletions llvm/lib/Target/AMDGPU/VOP2Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1378,8 +1378,7 @@ class ZExt_i16_i1_Pat <SDNode ext> : GCNPat <
$src)
>;

foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
let True16Predicate = p in {
let True16Predicate = NotUseRealTrue16Insts in {
def : GCNPat <
(and i16:$src0, i16:$src1),
(V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
Expand Down
6 changes: 2 additions & 4 deletions llvm/lib/Target/AMDGPU/VOP3Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -387,8 +387,7 @@ let SchedRW = [Write64Bit] in {
} // End SchedRW = [Write64Bit]
} // End isReMaterializable = 1

foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
let True16Predicate = p in
let True16Predicate = NotUseRealTrue16Insts in
def : GCNPat<
(i32 (DivergentUnaryFrag<sext> i16:$src)),
(i32 (V_BFE_I32_e64 i16:$src, (i32 0), (i32 0x10)))
Expand Down Expand Up @@ -501,8 +500,7 @@ def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32

} // End SubtargetPredicate = Has16BitInsts, isCommutable = 1

foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
let True16Predicate = p in
let True16Predicate = NotUseRealTrue16Insts in
def : GCNPat<
(i64 (DivergentUnaryFrag<sext> i16:$src)),
(REG_SEQUENCE VReg_64,
Expand Down
6 changes: 2 additions & 4 deletions llvm/lib/Target/AMDGPU/VOP3PInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -402,8 +402,7 @@ defm V_FMA_MIX_F16_t16 : VOP3_VOP3PInst_t16<"v_fma_mix_f16_t16", VOP3P_Mix_Profi

defm : MadFmaMixFP32Pats<fma, V_FMA_MIX_F32>;

foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
let True16Predicate = p in
let True16Predicate = NotUseRealTrue16Insts in
defm : MadFmaMixFP16Pats<fma, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16>;
let True16Predicate = UseRealTrue16Insts in
defm : MadFmaMixFP16Pats_t16<fma, V_FMA_MIX_F16_t16>;
Expand All @@ -428,8 +427,7 @@ defm V_FMA_MIX_BF16_t16 : VOP3_VOP3PInst_t16<"v_fma_mix_bf16_t16", VOP3P_Mix_Pro
} // End isCommutable = 1

defm : MadFmaMixFP32Pats<fma, V_FMA_MIX_F32_BF16, bf16>;
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
let True16Predicate = p in
let True16Predicate = NotUseRealTrue16Insts in
defm : MadFmaMixFP16Pats<fma, V_FMA_MIXLO_BF16, V_FMA_MIXHI_BF16, bf16, v2bf16>;
let True16Predicate = UseRealTrue16Insts in
defm : MadFmaMixFP16Pats_t16<fma, V_FMA_MIX_BF16_t16>;
Expand Down