diff --git a/llvm/include/llvm/Support/KnownBits.h b/llvm/include/llvm/Support/KnownBits.h index 26b62fb4e3828..6344a3cf90764 100644 --- a/llvm/include/llvm/Support/KnownBits.h +++ b/llvm/include/llvm/Support/KnownBits.h @@ -420,6 +420,11 @@ struct KnownBits { void print(raw_ostream &OS) const; void dump() const; + +private: + // Internal helper for getting the initial KnownBits for an `srem` or `urem` + // operation with the low-bits set. + static KnownBits remGetLowBits(const KnownBits &LHS, const KnownBits &RHS); }; inline KnownBits operator&(KnownBits LHS, const KnownBits &RHS) { diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp index 8b0d6030fea32..e20838278e612 100644 --- a/llvm/lib/Support/KnownBits.cpp +++ b/llvm/lib/Support/KnownBits.cpp @@ -546,16 +546,27 @@ KnownBits KnownBits::udiv(const KnownBits &LHS, const KnownBits &RHS) { return Known; } -KnownBits KnownBits::urem(const KnownBits &LHS, const KnownBits &RHS) { +KnownBits KnownBits::remGetLowBits(const KnownBits &LHS, const KnownBits &RHS) { unsigned BitWidth = LHS.getBitWidth(); + if (!RHS.isZero() && RHS.Zero[0]) { + // rem X, Y where Y[0:N] is zero will preserve X[0:N] in the result. + unsigned RHSZeros = RHS.countMinTrailingZeros(); + APInt Mask = APInt::getLowBitsSet(BitWidth, RHSZeros); + APInt OnesMask = LHS.One & Mask; + APInt ZerosMask = LHS.Zero & Mask; + return KnownBits(ZerosMask, OnesMask); + } + return KnownBits(BitWidth); +} + +KnownBits KnownBits::urem(const KnownBits &LHS, const KnownBits &RHS) { assert(!LHS.hasConflict() && !RHS.hasConflict()); - KnownBits Known(BitWidth); + KnownBits Known = remGetLowBits(LHS, RHS); if (RHS.isConstant() && RHS.getConstant().isPowerOf2()) { - // The upper bits are all zero, the lower ones are unchanged. - APInt LowBits = RHS.getConstant() - 1; - Known.Zero = LHS.Zero | ~LowBits; - Known.One = LHS.One & LowBits; + // NB: Low bits set in `remGetLowBits`. + APInt HighBits = ~(RHS.getConstant() - 1); + Known.Zero |= HighBits; return Known; } @@ -568,16 +579,12 @@ KnownBits KnownBits::urem(const KnownBits &LHS, const KnownBits &RHS) { } KnownBits KnownBits::srem(const KnownBits &LHS, const KnownBits &RHS) { - unsigned BitWidth = LHS.getBitWidth(); assert(!LHS.hasConflict() && !RHS.hasConflict()); - KnownBits Known(BitWidth); + KnownBits Known = remGetLowBits(LHS, RHS); if (RHS.isConstant() && RHS.getConstant().isPowerOf2()) { - // The low bits of the first operand are unchanged by the srem. + // NB: Low bits are set in `remGetLowBits`. APInt LowBits = RHS.getConstant() - 1; - Known.Zero = LHS.Zero & LowBits; - Known.One = LHS.One & LowBits; - // If the first operand is non-negative or has all low bits zero, then // the upper bits are all zero. if (LHS.isNonNegative() || LowBits.isSubsetOf(LHS.Zero)) diff --git a/llvm/test/Analysis/ValueTracking/knownbits-rem-lowbits.ll b/llvm/test/Analysis/ValueTracking/knownbits-rem-lowbits.ll index 6b305bde89db0..0521c7130055f 100644 --- a/llvm/test/Analysis/ValueTracking/knownbits-rem-lowbits.ll +++ b/llvm/test/Analysis/ValueTracking/knownbits-rem-lowbits.ll @@ -3,11 +3,7 @@ define i8 @urem_low_bits_know(i8 %xx, i8 %yy) { ; CHECK-LABEL: @urem_low_bits_know( -; CHECK-NEXT: [[X:%.*]] = or i8 [[XX:%.*]], 2 -; CHECK-NEXT: [[Y:%.*]] = and i8 [[YY:%.*]], -4 -; CHECK-NEXT: [[REM:%.*]] = urem i8 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = and i8 [[REM]], 2 -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 2 ; %x = or i8 %xx, 2 %y = and i8 %yy, -4 @@ -18,12 +14,7 @@ define i8 @urem_low_bits_know(i8 %xx, i8 %yy) { define i8 @urem_low_bits_know2(i8 %xx, i8 %yy) { ; CHECK-LABEL: @urem_low_bits_know2( -; CHECK-NEXT: [[XO:%.*]] = and i8 [[XX:%.*]], -4 -; CHECK-NEXT: [[X:%.*]] = or i8 [[XO]], 2 -; CHECK-NEXT: [[Y:%.*]] = and i8 [[YY:%.*]], -4 -; CHECK-NEXT: [[REM:%.*]] = urem i8 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = and i8 [[REM]], 3 -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 2 ; %xo = or i8 %xx, 2 %x = and i8 %xo, 254 @@ -80,11 +71,7 @@ define i8 @urem_fail_low_bits_unknown2(i8 %xx, i8 %yy) { define i8 @srem_low_bits_know(i8 %xx, i8 %yy) { ; CHECK-LABEL: @srem_low_bits_know( -; CHECK-NEXT: [[X:%.*]] = or i8 [[XX:%.*]], 10 -; CHECK-NEXT: [[Y:%.*]] = and i8 [[YY:%.*]], -4 -; CHECK-NEXT: [[REM:%.*]] = srem i8 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = and i8 [[REM]], 2 -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 2 ; %x = or i8 %xx, 10 %y = and i8 %yy, -4 @@ -95,11 +82,7 @@ define i8 @srem_low_bits_know(i8 %xx, i8 %yy) { define i8 @srem_low_bits_know2(i8 %xx, i8 %yy) { ; CHECK-LABEL: @srem_low_bits_know2( -; CHECK-NEXT: [[X:%.*]] = or i8 [[XX:%.*]], 1 -; CHECK-NEXT: [[Y:%.*]] = and i8 [[YY:%.*]], -2 -; CHECK-NEXT: [[REM:%.*]] = srem i8 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = and i8 [[REM]], 1 -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 1 ; %x = or i8 %xx, 1 %y = and i8 %yy, -2 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll index 1cb00e2f58385..904dcd4fed30e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll @@ -33,8 +33,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TFNONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TFNONE: middle.block: -; TFNONE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] -; TFNONE-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; TFNONE: scalar.ph: ; TFNONE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; TFNONE-NEXT: br label [[FOR_BODY:%.*]] @@ -193,8 +192,7 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TFNONE-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; TFNONE: middle.block: -; TFNONE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] -; TFNONE-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; TFNONE: scalar.ph: ; TFNONE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; TFNONE-NEXT: br label [[FOR_BODY:%.*]] @@ -395,8 +393,7 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TFNONE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; TFNONE: middle.block: -; TFNONE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] -; TFNONE-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; TFNONE: scalar.ph: ; TFNONE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; TFNONE-NEXT: br label [[FOR_BODY:%.*]] @@ -607,8 +604,7 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TFNONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; TFNONE: middle.block: -; TFNONE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] -; TFNONE-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; TFNONE: scalar.ph: ; TFNONE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; TFNONE-NEXT: br label [[FOR_BODY:%.*]] @@ -666,8 +662,7 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 { ; TFFALLBACK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TFFALLBACK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; TFFALLBACK: middle.block: -; TFFALLBACK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] -; TFFALLBACK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; TFFALLBACK-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; TFFALLBACK: scalar.ph: ; TFFALLBACK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; TFFALLBACK-NEXT: br label [[FOR_BODY:%.*]] @@ -731,8 +726,7 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TFNONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; TFNONE: middle.block: -; TFNONE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] -; TFNONE-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; TFNONE: scalar.ph: ; TFNONE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; TFNONE-NEXT: br label [[FOR_BODY:%.*]]