diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 8551de6694757..fa267cef3d1f0 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -2942,6 +2942,26 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth, isKnownNonZero(II->getArgOperand(1), DemandedElts, Depth, Q)) return true; break; + case Intrinsic::smin: + case Intrinsic::smax: { + auto KnownOpImpliesNonZero = [&](const KnownBits &K) { + return II->getIntrinsicID() == Intrinsic::smin + ? K.isNegative() + : K.isStrictlyPositive(); + }; + KnownBits XKnown = + computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q); + if (KnownOpImpliesNonZero(XKnown)) + return true; + KnownBits YKnown = + computeKnownBits(II->getArgOperand(1), DemandedElts, Depth, Q); + if (KnownOpImpliesNonZero(YKnown)) + return true; + + if (XKnown.isNonZero() && YKnown.isNonZero()) + return true; + } + [[fallthrough]]; case Intrinsic::umin: return isKnownNonZero(II->getArgOperand(0), DemandedElts, Depth, Q) && isKnownNonZero(II->getArgOperand(1), DemandedElts, Depth, Q); diff --git a/llvm/test/Analysis/ValueTracking/known-non-zero.ll b/llvm/test/Analysis/ValueTracking/known-non-zero.ll index 5f45d30d65fb2..6e688b13a0892 100644 --- a/llvm/test/Analysis/ValueTracking/known-non-zero.ll +++ b/llvm/test/Analysis/ValueTracking/known-non-zero.ll @@ -1074,12 +1074,7 @@ define i1 @smin_nonzero_fail_y_maybe_z(i8 %xx, i8 %yy, i8 %ind) { define i1 @smax_nonzero_pos_arg(i8 %xx, i8 %yy, i8 %ind) { ; CHECK-LABEL: @smax_nonzero_pos_arg( -; CHECK-NEXT: [[YA:%.*]] = and i8 [[YY:%.*]], 127 -; CHECK-NEXT: [[YO:%.*]] = or i8 [[YA]], 1 -; CHECK-NEXT: [[X:%.*]] = call i8 @llvm.smax.i8(i8 [[XX:%.*]], i8 [[YO]]) -; CHECK-NEXT: [[Z:%.*]] = or i8 [[X]], [[IND:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[Z]], 0 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 false ; %ya = and i8 %yy, 127 %yo = or i8 %ya, 1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll index 021ac57bdfe66..2c78b7208c19f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll @@ -263,10 +263,9 @@ define double @external_use_without_fast_math(ptr %a, i64 %n) { ; AUTO_VEC-LABEL: @external_use_without_fast_math( ; AUTO_VEC-NEXT: entry: ; AUTO_VEC-NEXT: [[SMAX:%.*]] = tail call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) -; AUTO_VEC-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1 ; AUTO_VEC-NEXT: [[XTRAITER:%.*]] = and i64 [[SMAX]], 7 -; AUTO_VEC-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7 -; AUTO_VEC-NEXT: br i1 [[TMP1]], label [[FOR_END_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]] +; AUTO_VEC-NEXT: [[TMP0:%.*]] = icmp ult i64 [[SMAX]], 8 +; AUTO_VEC-NEXT: br i1 [[TMP0]], label [[FOR_END_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]] ; AUTO_VEC: entry.new: ; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[SMAX]], 9223372036854775800 ; AUTO_VEC-NEXT: br label [[FOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index 97d912ece06f8..922070e844380 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -943,7 +943,7 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) { ; IND-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1 ; IND-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 3 ; IND-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8 +; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 9 ; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IND: vector.ph: ; IND-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -2 @@ -1003,7 +1003,7 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) { ; UNROLL-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1 ; UNROLL-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 3 ; UNROLL-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 24 +; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 25 ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL: vector.ph: ; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -4 @@ -1160,10 +1160,10 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) { ; INTERLEAVE-LABEL: @scalarize_induction_variable_02( ; INTERLEAVE-NEXT: entry: ; INTERLEAVE-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 8) -; INTERLEAVE-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1 -; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 64 +; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 65 ; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; INTERLEAVE: vector.ph: +; INTERLEAVE-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1 ; INTERLEAVE-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 3 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = and i64 [[TMP2]], 7 diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll index 0aebe34d2bf64..8c3455b7cd4e3 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -1278,7 +1278,7 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) { ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 6 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 7 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -4 diff --git a/llvm/test/Transforms/LoopVectorize/loop-scalars.ll b/llvm/test/Transforms/LoopVectorize/loop-scalars.ll index 7aa8efa566b99..2459658f76360 100644 --- a/llvm/test/Transforms/LoopVectorize/loop-scalars.ll +++ b/llvm/test/Transforms/LoopVectorize/loop-scalars.ll @@ -22,8 +22,8 @@ define void @vector_gep(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: store <2 x ptr> [[TMP0]], ptr [[TMP1]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -37,7 +37,7 @@ define void @vector_gep(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: store ptr [[VAR0]], ptr [[VAR1]], align 8 ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -65,7 +65,7 @@ define void @scalar_store(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 3 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -2 @@ -125,7 +125,7 @@ define void @expansion(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 3 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -2 @@ -142,8 +142,8 @@ define void @expansion(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: store ptr [[TMP4]], ptr [[TMP6]], align 8 ; CHECK-NEXT: store ptr [[TMP5]], ptr [[TMP7]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -191,13 +191,13 @@ define void @no_gep_or_bitcast(ptr noalias %a, i64 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds ptr, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x ptr>, ptr [[TMP0]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x ptr> [[WIDE_LOAD]], i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x ptr> [[WIDE_LOAD]], i64 0 +; CHECK-NEXT: store i32 0, ptr [[TMP1]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x ptr> [[WIDE_LOAD]], i64 1 ; CHECK-NEXT: store i32 0, ptr [[TMP2]], align 8 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x ptr> [[WIDE_LOAD]], i64 1 -; CHECK-NEXT: store i32 0, ptr [[TMP3]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]