diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index 041a4ce112275..dacda0afc7f03 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -2548,6 +2548,11 @@ class IRBuilderBase { std::optional Rounding = std::nullopt, std::optional Except = std::nullopt); + LLVM_ABI Value *CreateSelectWithUnknownProfile(Value *C, Value *True, + Value *False, + StringRef PassName, + const Twine &Name = ""); + LLVM_ABI Value *CreateSelect(Value *C, Value *True, Value *False, const Twine &Name = "", Instruction *MDFrom = nullptr); diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp index 614c3a9abb8d0..15c0198d07614 100644 --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -25,6 +25,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/NoFolder.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/ProfDataUtils.h" #include "llvm/IR/Statepoint.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" @@ -1002,6 +1003,18 @@ CallInst *IRBuilderBase::CreateConstrainedFPCall( return C; } +Value *IRBuilderBase::CreateSelectWithUnknownProfile(Value *C, Value *True, + Value *False, + StringRef PassName, + const Twine &Name) { + Value *Ret = CreateSelectFMF(C, True, False, {}, Name); + if (auto *SI = dyn_cast(Ret)) { + setExplicitlyUnknownBranchWeightsIfProfiled( + *SI, *SI->getParent()->getParent(), PassName); + } + return Ret; +} + Value *IRBuilderBase::CreateSelect(Value *C, Value *True, Value *False, const Twine &Name, Instruction *MDFrom) { return CreateSelectFMF(C, True, False, {}, Name, MDFrom); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index cfa8d2703592e..08df8faad487c 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -19414,7 +19414,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } assert(getNumElements(Cond->getType()) == TrueNumElements && "Cannot vectorize Instruction::Select"); - Value *V = Builder.CreateSelect(Cond, True, False); + Value *V = + Builder.CreateSelectWithUnknownProfile(Cond, True, False, DEBUG_TYPE); V = FinalShuffle(V, E); E->VectorizedValue = V; @@ -23532,18 +23533,19 @@ class HorizontalReduction { switch (Kind) { case RecurKind::Or: { if (UseSelect && OpTy == CmpInst::makeCmpResultType(OpTy)) - return Builder.CreateSelect( + return Builder.CreateSelectWithUnknownProfile( LHS, ConstantInt::getAllOnesValue(CmpInst::makeCmpResultType(OpTy)), - RHS, Name); + RHS, DEBUG_TYPE, Name); unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(Kind); return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS, Name); } case RecurKind::And: { if (UseSelect && OpTy == CmpInst::makeCmpResultType(OpTy)) - return Builder.CreateSelect( + return Builder.CreateSelectWithUnknownProfile( LHS, RHS, - ConstantInt::getNullValue(CmpInst::makeCmpResultType(OpTy)), Name); + ConstantInt::getNullValue(CmpInst::makeCmpResultType(OpTy)), + DEBUG_TYPE, Name); unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(Kind); return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS, Name); @@ -23564,7 +23566,8 @@ class HorizontalReduction { if (UseSelect) { CmpInst::Predicate Pred = llvm::getMinMaxReductionPredicate(Kind); Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS, Name); - return Builder.CreateSelect(Cmp, LHS, RHS, Name); + return Builder.CreateSelectWithUnknownProfile(Cmp, LHS, RHS, DEBUG_TYPE, + Name); } [[fallthrough]]; case RecurKind::FMax: diff --git a/llvm/utils/profcheck-xfail.txt b/llvm/utils/profcheck-xfail.txt index f10162448bb10..42b1293b08aec 100644 --- a/llvm/utils/profcheck-xfail.txt +++ b/llvm/utils/profcheck-xfail.txt @@ -11,7 +11,6 @@ CodeGen/AArch64/llvm-masked-scatter-legal-for-sve.ll CodeGen/AArch64/selectopt-cast.ll CodeGen/AArch64/selectopt.ll CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll -CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll CodeGen/AMDGPU/amdgpu-codegenprepare-sqrt.ll CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-indirect-access-asan.ll @@ -74,7 +73,6 @@ CodeGen/Hexagon/loop-idiom/hexagon-memmove2.ll CodeGen/Hexagon/loop-idiom/memmove-rt-check.ll CodeGen/NVPTX/lower-ctor-dtor.ll CodeGen/RISCV/zmmul.ll -CodeGen/SPIRV/hlsl-resources/UniqueImplicitBindingNumber.ll CodeGen/WebAssembly/memory-interleave.ll CodeGen/X86/masked_gather_scatter.ll CodeGen/X86/nocfivalue.ll @@ -85,7 +83,6 @@ DebugInfo/KeyInstructions/Generic/loop-unswitch.ll DebugInfo/X86/asan_debug_info.ll Instrumentation/AddressSanitizer/aarch64be.ll Instrumentation/AddressSanitizer/adaptive_global_redzones.ll -Instrumentation/AddressSanitizer/alloca-offset-lifetime.ll Instrumentation/AddressSanitizer/AMDGPU/adaptive_constant_global_redzones.ll Instrumentation/AddressSanitizer/AMDGPU/adaptive_global_redzones.ll Instrumentation/AddressSanitizer/AMDGPU/asan_do_not_instrument_lds.ll @@ -549,12 +546,6 @@ tools/UpdateTestChecks/update_test_checks/stable_ir_values_funcs.test tools/UpdateTestChecks/update_test_checks/stable_ir_values.test tools/UpdateTestChecks/update_test_checks/tbaa-semantics-checks.test tools/UpdateTestChecks/update_test_checks/various_ir_values_dbgrecords.test -Transforms/AggressiveInstCombine/lower-table-based-cttz-basics.ll -Transforms/AggressiveInstCombine/lower-table-based-cttz-dereferencing-pointer.ll -Transforms/AggressiveInstCombine/lower-table-based-cttz-non-argument-value.ll -Transforms/AggressiveInstCombine/lower-table-based-cttz-zero-element.ll -Transforms/AggressiveInstCombine/trunc_select_cmp.ll -Transforms/AggressiveInstCombine/trunc_select.ll Transforms/AtomicExpand/AArch64/atomicrmw-fp.ll Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll Transforms/AtomicExpand/AArch64/pcsections.ll @@ -819,7 +810,6 @@ Transforms/InstCombine/AMDGPU/addrspacecast.ll Transforms/InstCombine/and2.ll Transforms/InstCombine/and-fcmp.ll Transforms/InstCombine/and.ll -Transforms/InstCombine/and-or-icmp-nullptr.ll Transforms/InstCombine/and-or-icmps.ll Transforms/InstCombine/and-or-implied-cond-not.ll Transforms/InstCombine/apint-div1.ll @@ -1105,7 +1095,6 @@ Transforms/LoopSimplifyCFG/invalidate-scev-dispositions.ll Transforms/LoopSimplifyCFG/lcssa.ll Transforms/LoopSimplifyCFG/live_block_marking.ll Transforms/LoopSimplifyCFG/mssa_update.ll -Transforms/LoopSimplifyCFG/pr117537.ll Transforms/LoopSimplifyCFG/update_parents.ll Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll Transforms/LoopUnroll/peel-last-iteration-with-guards.ll @@ -1260,7 +1249,6 @@ Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll Transforms/PhaseOrdering/AArch64/predicated-reduction.ll Transforms/PhaseOrdering/AArch64/quant_4x4.ll Transforms/PhaseOrdering/ARM/arm_mean_q7.ll -Transforms/PhaseOrdering/lower-table-based-cttz.ll Transforms/PhaseOrdering/vector-select.ll Transforms/PhaseOrdering/X86/blendv-select.ll Transforms/PhaseOrdering/X86/merge-functions2.ll @@ -1322,82 +1310,6 @@ Transforms/SimpleLoopUnswitch/pr60736.ll Transforms/SimpleLoopUnswitch/trivial-unswitch-freeze-individual-conditions.ll Transforms/SimpleLoopUnswitch/trivial-unswitch.ll Transforms/SimpleLoopUnswitch/trivial-unswitch-logical-and-or.ll -Transforms/SLPVectorizer/AArch64/gather-root.ll -Transforms/SLPVectorizer/AArch64/horizontal.ll -Transforms/SLPVectorizer/AArch64/loadi8.ll -Transforms/SLPVectorizer/AArch64/phi-node-bitwidt-op-not.ll -Transforms/SLPVectorizer/AArch64/uselistorder.ll -Transforms/SLPVectorizer/AArch64/vec3-reorder-reshuffle.ll -Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll -Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll -Transforms/SLPVectorizer/AMDGPU/horizontal-store.ll -Transforms/SLPVectorizer/bool-logical-op-reduction-with-poison.ll -Transforms/SLPVectorizer/call-arg-reduced-by-minbitwidth.ll -Transforms/SLPVectorizer/const-bool-logical-or-reduction.ll -Transforms/SLPVectorizer/extracts-with-undefs.ll -Transforms/SLPVectorizer/freeze-signedness-missed.ll -Transforms/SLPVectorizer/gathered-consecutive-loads-different-types.ll -Transforms/SLPVectorizer/gather_extract_from_vectorbuild.ll -Transforms/SLPVectorizer/insert-element-build-vector-const.ll -Transforms/SLPVectorizer/insert-element-build-vector-inseltpoison.ll -Transforms/SLPVectorizer/insert-element-build-vector.ll -Transforms/SLPVectorizer/logical-ops-poisonous-repeated.ll -Transforms/SLPVectorizer/minbitwidth-node-with-multi-users.ll -Transforms/SLPVectorizer/minbitwidth-user-not-min.ll -Transforms/SLPVectorizer/partial-register-extract.ll -Transforms/SLPVectorizer/reduction-gather-non-scheduled-extracts.ll -Transforms/SLPVectorizer/reorder-node.ll -Transforms/SLPVectorizer/reused-buildvector-matching-vectorized-node.ll -Transforms/SLPVectorizer/revec.ll -Transforms/SLPVectorizer/RISCV/remarks_cmp_sel_min_max.ll -Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll -Transforms/SLPVectorizer/RISCV/reordered-interleaved-loads.ll -Transforms/SLPVectorizer/RISCV/revec.ll -Transforms/SLPVectorizer/RISCV/select-profitability.ll -Transforms/SLPVectorizer/RISCV/shuffled-gather-casted.ll -Transforms/SLPVectorizer/RISCV/unsigned-node-trunc-with-signed-users.ll -Transforms/SLPVectorizer/slp-deleted-inst.ll -Transforms/SLPVectorizer/SystemZ/cmp-ptr-minmax.ll -Transforms/SLPVectorizer/SystemZ/ext-not-resized-op-resized.ll -Transforms/SLPVectorizer/SystemZ/minbitwidth-trunc.ll -Transforms/SLPVectorizer/X86/bool-mask.ll -Transforms/SLPVectorizer/X86/bv-root-part-of-graph.ll -Transforms/SLPVectorizer/X86/cmp-after-intrinsic-call-minbitwidth.ll -Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll -Transforms/SLPVectorizer/X86/cmp_sel.ll -Transforms/SLPVectorizer/X86/crash_7zip.ll -Transforms/SLPVectorizer/X86/crash_clear_undefs.ll -Transforms/SLPVectorizer/X86/crash_cmpop.ll -Transforms/SLPVectorizer/X86/debug-counter.ll -Transforms/SLPVectorizer/X86/debug-info-salvage.ll -Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll -Transforms/SLPVectorizer/X86/extracts-non-extendable.ll -Transforms/SLPVectorizer/X86/ext-used-scalar-different-bitwidth.ll -Transforms/SLPVectorizer/X86/gather-node-same-as-vect-but-order.ll -Transforms/SLPVectorizer/X86/horizontal-minmax.ll -Transforms/SLPVectorizer/X86/insert-after-bundle.ll -Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll -Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll -Transforms/SLPVectorizer/X86/minbw-user-non-sizable.ll -Transforms/SLPVectorizer/X86/non-load-reduced-as-part-of-bv.ll -Transforms/SLPVectorizer/X86/ordering-bug.ll -Transforms/SLPVectorizer/X86/phi-node-bitwidt-op-not.ll -Transforms/SLPVectorizer/X86/phi-node-reshuffled-part.ll -Transforms/SLPVectorizer/X86/pr46983.ll -Transforms/SLPVectorizer/X86/pr49933.ll -Transforms/SLPVectorizer/X86/propagate_ir_flags.ll -Transforms/SLPVectorizer/X86/reduction-bool-logic-op-inside.ll -Transforms/SLPVectorizer/X86/reduction-logical.ll -Transforms/SLPVectorizer/X86/resized-bv-values-non-power-of2-node.ll -Transforms/SLPVectorizer/X86/reused-reductions-with-minbitwidth.ll -Transforms/SLPVectorizer/X86/select-reduction-op.ll -Transforms/SLPVectorizer/X86/shrink_after_reorder.ll -Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll -Transforms/SLPVectorizer/X86/undef_vect.ll -Transforms/SLPVectorizer/X86/used-reduced-op.ll -Transforms/SLPVectorizer/X86/vec3-reorder-reshuffle.ll -Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll -Transforms/SLPVectorizer/X86/whole-registers-compare.ll Transforms/SROA/addrspacecast.ll Transforms/SROA/phi-and-select.ll Transforms/SROA/phi-gep.ll