diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index a9a33c7617d7d..2111e82e1a99d 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -869,6 +869,9 @@ class CombinerHelper { /// Combine insert vector element OOB. bool matchInsertVectorElementOOB(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchFreezeOfSingleMaybePoisonOperand(MachineInstr &MI, + BuildFnTy &MatchInfo); + private: /// Checks for legality of an indexed variant of \p LdSt. bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h index 2a3145b635e6c..2b3efc3b609f0 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h @@ -34,6 +34,17 @@ class GenericMachineInstr : public MachineInstr { static bool classof(const MachineInstr *MI) { return isPreISelGenericOpcode(MI->getOpcode()); } + + bool hasPoisonGeneratingFlags() const { + return getFlags() & (NoUWrap | NoSWrap | IsExact | Disjoint | NonNeg | + FmNoNans | FmNoInfs); + } + + void dropPoisonGeneratingFlags() { + clearFlags(NoUWrap | NoSWrap | IsExact | Disjoint | NonNeg | FmNoNans | + FmNoInfs); + assert(!hasPoisonGeneratingFlags()); + } }; /// Provides common memory operand functionality. diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index 2b0c5d166d88b..db48a0ae55145 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -416,6 +416,12 @@ class MachineInstr Flags &= ~((uint32_t)Flag); } + void clearFlags(unsigned flags) { + assert(isUInt(flags) && + "flags to be cleared are out of range for the Flags field"); + Flags &= ~flags; + } + /// Return true if MI is in a bundle (but not the first MI in a bundle). /// /// A bundle looks like this before it's finalized: diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 5d4b5a2479f6a..34698f195615b 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -220,6 +220,13 @@ def idempotent_prop : GICombineRule< (match (idempotent_prop_frags $dst, $src)), (apply (GIReplaceReg $dst, $src))>; +// Convert freeze(Op(Op0, NonPoisonOps...)) to Op(freeze(Op0), NonPoisonOps...) +// when Op0 is not guaranteed non-poison +def push_freeze_to_prevent_poison_from_propagating : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_FREEZE $dst, $src):$root, + [{ return !isGuaranteedNotToBePoison(${src}.getReg(), MRI) && Helper.matchFreezeOfSingleMaybePoisonOperand(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>; def extending_loads : GICombineRule< (defs root:$root, extending_load_matchdata:$matchinfo), @@ -1713,7 +1720,8 @@ def all_combines : GICombineGroup<[trivial_combines, vector_ops_combines, sub_add_reg, select_to_minmax, redundant_binop_in_equality, fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors, combine_concat_vector, double_icmp_zero_and_or_combine, match_addos, - sext_trunc, zext_trunc, combine_shuffle_concat]>; + sext_trunc, zext_trunc, combine_shuffle_concat, + push_freeze_to_prevent_poison_from_propagating]>; // A combine group used to for prelegalizer combiners at -O0. The combines in // this group have been selected based on experiments to balance code size and diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 22eb4a3e0d7cb..4cc602b5c8709 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -223,6 +223,70 @@ void CombinerHelper::applyCombineCopy(MachineInstr &MI) { replaceRegWith(MRI, DstReg, SrcReg); } +bool CombinerHelper::matchFreezeOfSingleMaybePoisonOperand( + MachineInstr &MI, BuildFnTy &MatchInfo) { + // Ported from InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating. + Register DstOp = MI.getOperand(0).getReg(); + Register OrigOp = MI.getOperand(1).getReg(); + + if (!MRI.hasOneNonDBGUse(OrigOp)) + return false; + + MachineInstr *OrigDef = MRI.getUniqueVRegDef(OrigOp); + // Even if only a single operand of the PHI is not guaranteed non-poison, + // moving freeze() backwards across a PHI can cause optimization issues for + // other users of that operand. + // + // Moving freeze() from one of the output registers of a G_UNMERGE_VALUES to + // the source register is unprofitable because it makes the freeze() more + // strict than is necessary (it would affect the whole register instead of + // just the subreg being frozen). + if (OrigDef->isPHI() || isa(OrigDef)) + return false; + + if (canCreateUndefOrPoison(OrigOp, MRI, + /*ConsiderFlagsAndMetadata=*/false)) + return false; + + std::optional MaybePoisonOperand; + for (MachineOperand &Operand : OrigDef->uses()) { + if (!Operand.isReg()) + return false; + + if (isGuaranteedNotToBeUndefOrPoison(Operand.getReg(), MRI)) + continue; + + if (!MaybePoisonOperand) + MaybePoisonOperand = Operand; + else { + // We have more than one maybe-poison operand. Moving the freeze is + // unsafe. + return false; + } + } + + cast(OrigDef)->dropPoisonGeneratingFlags(); + + // Eliminate freeze if all operands are guaranteed non-poison. + if (!MaybePoisonOperand) { + MatchInfo = [=](MachineIRBuilder &B) { MRI.replaceRegWith(DstOp, OrigOp); }; + return true; + } + + Register MaybePoisonOperandReg = MaybePoisonOperand->getReg(); + LLT MaybePoisonOperandRegTy = MRI.getType(MaybePoisonOperandReg); + + MatchInfo = [=](MachineIRBuilder &B) mutable { + B.setInsertPt(*OrigDef->getParent(), OrigDef->getIterator()); + auto Freeze = B.buildFreeze(MaybePoisonOperandRegTy, MaybePoisonOperandReg); + replaceRegOpWith( + MRI, *OrigDef->findRegisterUseOperand(MaybePoisonOperandReg, TRI), + Freeze.getReg(0)); + replaceRegWith(MRI, DstOp, OrigOp); + }; + return true; +} + bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, SmallVector &Ops) { assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index cd5dc0e01ed0e..f455482e02943 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -1745,11 +1745,20 @@ static bool canCreateUndefOrPoison(Register Reg, const MachineRegisterInfo &MRI, UndefPoisonKind Kind) { MachineInstr *RegDef = MRI.getVRegDef(Reg); + if (auto *GMI = dyn_cast(RegDef)) { + if (ConsiderFlagsAndMetadata && includesPoison(Kind) && + GMI->hasPoisonGeneratingFlags()) + return true; + } else { + // Conservatively return true. + return true; + } + switch (RegDef->getOpcode()) { case TargetOpcode::G_FREEZE: return false; default: - return true; + return !isa(RegDef) && !isa(RegDef); } } @@ -1767,8 +1776,17 @@ static bool isGuaranteedNotToBeUndefOrPoison(Register Reg, return true; case TargetOpcode::G_IMPLICIT_DEF: return !includesUndef(Kind); - default: - return false; + default: { + auto MOCheck = [&](const MachineOperand &MO) { + if (!MO.isReg()) + return true; + return ::isGuaranteedNotToBeUndefOrPoison(MO.getReg(), MRI, Depth + 1, + Kind); + }; + return !::canCreateUndefOrPoison(Reg, MRI, + /*ConsiderFlagsAndMetadata=*/true, Kind) && + all_of(RegDef->uses(), MOCheck); + } } } diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td index 10cad6d192440..1c7f6b870d390 100644 --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -295,5 +295,6 @@ def AArch64PostLegalizerCombiner ptr_add_immed_chain, overlapping_and, split_store_zero_128, undef_combines, select_to_minmax, or_to_bsp, combine_concat_vector, - commute_constant_to_rhs]> { + commute_constant_to_rhs, + push_freeze_to_prevent_poison_from_propagating]> { } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir index 353c1550d6974..074d4ecbd8785 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir @@ -117,9 +117,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: %f:_(s1) = G_TRUNC [[COPY1]](s64) - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE %f - ; CHECK-NEXT: %sel:_(s1) = G_OR %c, [[FREEZE]] + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY1]] + ; CHECK-NEXT: %f:_(s1) = G_TRUNC [[FREEZE]](s64) + ; CHECK-NEXT: %sel:_(s1) = G_OR %c, %f ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1) ; CHECK-NEXT: $w0 = COPY %ext(s32) %0:_(s64) = COPY $x0 @@ -144,9 +144,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: %f:_(s1) = G_TRUNC [[COPY1]](s64) - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE %f - ; CHECK-NEXT: %sel:_(s1) = G_OR %c, [[FREEZE]] + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY1]] + ; CHECK-NEXT: %f:_(s1) = G_TRUNC [[FREEZE]](s64) + ; CHECK-NEXT: %sel:_(s1) = G_OR %c, %f ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1) ; CHECK-NEXT: $w0 = COPY %ext(s32) %0:_(s64) = COPY $x0 @@ -172,9 +172,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d2 ; CHECK-NEXT: %c:_(<2 x s1>) = G_TRUNC [[COPY]](<2 x s32>) - ; CHECK-NEXT: %f:_(<2 x s1>) = G_TRUNC [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<2 x s1>) = G_FREEZE %f - ; CHECK-NEXT: %sel:_(<2 x s1>) = G_OR %c, [[FREEZE]] + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<2 x s32>) = G_FREEZE [[COPY1]] + ; CHECK-NEXT: %f:_(<2 x s1>) = G_TRUNC [[FREEZE]](<2 x s32>) + ; CHECK-NEXT: %sel:_(<2 x s1>) = G_OR %c, %f ; CHECK-NEXT: %ext:_(<2 x s32>) = G_ANYEXT %sel(<2 x s1>) ; CHECK-NEXT: $d0 = COPY %ext(<2 x s32>) %0:_(<2 x s32>) = COPY $d0 @@ -201,9 +201,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: %t:_(s1) = G_TRUNC [[COPY1]](s64) - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE %t - ; CHECK-NEXT: %sel:_(s1) = G_AND %c, [[FREEZE]] + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY1]] + ; CHECK-NEXT: %t:_(s1) = G_TRUNC [[FREEZE]](s64) + ; CHECK-NEXT: %sel:_(s1) = G_AND %c, %t ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1) ; CHECK-NEXT: $w0 = COPY %ext(s32) %0:_(s64) = COPY $x0 @@ -229,9 +229,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: %t:_(s1) = G_TRUNC [[COPY1]](s64) - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE %t - ; CHECK-NEXT: %sel:_(s1) = G_AND %c, [[FREEZE]] + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY1]] + ; CHECK-NEXT: %t:_(s1) = G_TRUNC [[FREEZE]](s64) + ; CHECK-NEXT: %sel:_(s1) = G_AND %c, %t ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1) ; CHECK-NEXT: $w0 = COPY %ext(s32) %0:_(s64) = COPY $x0 @@ -257,11 +257,11 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: %t:_(s1) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY1]] + ; CHECK-NEXT: %t:_(s1) = G_TRUNC [[FREEZE]](s64) ; CHECK-NEXT: %one:_(s1) = G_CONSTANT i1 true ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR %c, %one - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE %t - ; CHECK-NEXT: %sel:_(s1) = G_OR [[XOR]], [[FREEZE]] + ; CHECK-NEXT: %sel:_(s1) = G_OR [[XOR]], %t ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1) ; CHECK-NEXT: $w0 = COPY %ext(s32) %0:_(s64) = COPY $x0 @@ -287,11 +287,11 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: %f:_(s1) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY1]] + ; CHECK-NEXT: %f:_(s1) = G_TRUNC [[FREEZE]](s64) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR %c, [[C]] - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE %f - ; CHECK-NEXT: %sel:_(s1) = G_AND [[XOR]], [[FREEZE]] + ; CHECK-NEXT: %sel:_(s1) = G_AND [[XOR]], %f ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1) ; CHECK-NEXT: $w0 = COPY %ext(s32) %0:_(s64) = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/pr58431.ll b/llvm/test/CodeGen/AArch64/pr58431.ll index dcd97597ae409..e87d8f7874d62 100644 --- a/llvm/test/CodeGen/AArch64/pr58431.ll +++ b/llvm/test/CodeGen/AArch64/pr58431.ll @@ -4,8 +4,8 @@ define i32 @f(i64 %0) { ; CHECK-LABEL: f: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #10 -; CHECK-NEXT: mov w9, w0 +; CHECK-NEXT: mov w8, #10 // =0xa +; CHECK-NEXT: and x9, x0, #0xffffffff ; CHECK-NEXT: udiv x10, x9, x8 ; CHECK-NEXT: msub x0, x10, x8, x9 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0