14 changes: 10 additions & 4 deletions llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,8 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,

// Otherwise just compute the known bits of the result.
bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
Known = KnownBits::computeForAddSub(true, NSW, LHSKnown, RHSKnown);
bool NUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
Known = KnownBits::computeForAddSub(true, NSW, NUW, LHSKnown, RHSKnown);
break;
}
case Instruction::Sub: {
Expand Down Expand Up @@ -598,7 +599,8 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,

// Otherwise just compute the known bits of the result.
bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
Known = KnownBits::computeForAddSub(false, NSW, LHSKnown, RHSKnown);
bool NUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
Known = KnownBits::computeForAddSub(false, NSW, NUW, LHSKnown, RHSKnown);
break;
}
case Instruction::Mul: {
Expand Down Expand Up @@ -1206,7 +1208,9 @@ Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits(
return I->getOperand(1);

bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
Known = KnownBits::computeForAddSub(/*Add*/ true, NSW, LHSKnown, RHSKnown);
bool NUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
Known =
KnownBits::computeForAddSub(/*Add=*/true, NSW, NUW, LHSKnown, RHSKnown);
computeKnownBitsFromContext(I, Known, Depth, SQ.getWithInstruction(CxtI));
break;
}
Expand All @@ -1221,8 +1225,10 @@ Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits(
return I->getOperand(0);

bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
bool NUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI);
Known = KnownBits::computeForAddSub(/*Add*/ false, NSW, LHSKnown, RHSKnown);
Known = KnownBits::computeForAddSub(/*Add=*/false, NSW, NUW, LHSKnown,
RHSKnown);
computeKnownBitsFromContext(I, Known, Depth, SQ.getWithInstruction(CxtI));
break;
}
Expand Down
9 changes: 6 additions & 3 deletions llvm/test/CodeGen/AArch64/sve-cmp-folds.ll
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,12 @@ define i1 @foo_last(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: foo_last:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: ptest p0, p1.b
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
; CHECK-NEXT: whilels p1.s, xzr, x8
; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1
; CHECK-NEXT: lastb w8, p1, z0.s
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
%vcond = fcmp oeq <vscale x 4 x float> %a, %b
%vscale = call i64 @llvm.vscale.i64()
Expand Down
8 changes: 5 additions & 3 deletions llvm/test/CodeGen/AArch64/sve-extract-element.ll
Original file line number Diff line number Diff line change
Expand Up @@ -614,9 +614,11 @@ define i1 @test_lane9_8xi1(<vscale x 8 x i1> %a) #0 {
define i1 @test_last_8xi1(<vscale x 8 x i1> %a) #0 {
; CHECK-LABEL: test_last_8xi1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1
; CHECK-NEXT: whilels p1.h, xzr, x8
; CHECK-NEXT: lastb w8, p1, z0.h
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
%vscale = call i64 @llvm.vscale.i64()
%shl = shl nuw nsw i64 %vscale, 3
Expand Down
39 changes: 17 additions & 22 deletions llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
Original file line number Diff line number Diff line change
Expand Up @@ -137,49 +137,46 @@ define amdgpu_kernel void @write_ds_sub_max_offset_global_clamp_bit(float %dummy
; CI: ; %bb.0:
; CI-NEXT: s_load_dword s0, s[0:1], 0x0
; CI-NEXT: s_mov_b64 vcc, 0
; CI-NEXT: v_not_b32_e32 v0, v0
; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; CI-NEXT: v_mov_b32_e32 v2, 0x7b
; CI-NEXT: v_mov_b32_e32 v1, 0x7b
; CI-NEXT: v_mov_b32_e32 v2, 0
; CI-NEXT: s_mov_b32 m0, -1
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: v_mov_b32_e32 v1, s0
; CI-NEXT: v_div_fmas_f32 v1, v1, v1, v1
; CI-NEXT: v_mov_b32_e32 v0, s0
; CI-NEXT: v_div_fmas_f32 v0, v0, v0, v0
; CI-NEXT: s_mov_b32 s0, 0
; CI-NEXT: s_mov_b32 m0, -1
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: s_mov_b32 s1, s0
; CI-NEXT: ds_write_b32 v0, v2 offset:65532
; CI-NEXT: buffer_store_dword v1, off, s[0:3], 0
; CI-NEXT: ds_write_b32 v2, v1
; CI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_endpgm
;
; GFX9-LABEL: write_ds_sub_max_offset_global_clamp_bit:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dword s0, s[0:1], 0x0
; GFX9-NEXT: s_mov_b64 vcc, 0
; GFX9-NEXT: v_not_b32_e32 v0, v0
; GFX9-NEXT: v_lshlrev_b32_e32 v3, 2, v0
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7b
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7b
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX9-NEXT: ds_write_b32 v4, v3
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v1, s0
; GFX9-NEXT: v_div_fmas_f32 v2, v1, v1, v1
; GFX9-NEXT: v_mov_b32_e32 v0, s0
; GFX9-NEXT: v_div_fmas_f32 v2, v0, v0, v0
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: ds_write_b32 v3, v4 offset:65532
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: write_ds_sub_max_offset_global_clamp_bit:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dword s0, s[0:1], 0x0
; GFX10-NEXT: v_not_b32_e32 v0, v0
; GFX10-NEXT: s_mov_b32 vcc_lo, 0
; GFX10-NEXT: v_mov_b32_e32 v3, 0x7b
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 2, v0
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_mov_b32_e32 v2, 0x7b
; GFX10-NEXT: v_mov_b32_e32 v3, 0
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: ds_write_b32 v2, v3 offset:65532
; GFX10-NEXT: ds_write_b32 v3, v2
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_div_fmas_f32 v4, s0, s0, s0
; GFX10-NEXT: global_store_dword v[0:1], v4, off
Expand All @@ -189,13 +186,11 @@ define amdgpu_kernel void @write_ds_sub_max_offset_global_clamp_bit(float %dummy
; GFX11-LABEL: write_ds_sub_max_offset_global_clamp_bit:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0
; GFX11-NEXT: v_not_b32_e32 v0, v0
; GFX11-NEXT: s_mov_b32 vcc_lo, 0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_dual_mov_b32 v3, 0x7b :: v_dual_lshlrev_b32 v2, 2, v0
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: v_dual_mov_b32 v2, 0x7b :: v_dual_mov_b32 v3, 0
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: ds_store_b32 v2, v3 offset:65532
; GFX11-NEXT: ds_store_b32 v3, v2
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_div_fmas_f32 v4, s0, s0, s0
; GFX11-NEXT: global_store_b32 v[0:1], v4, off dlc
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/InstCombine/fold-log2-ceil-idiom.ll
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ define i64 @log2_ceil_idiom_zext(i32 %x) {
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], -1
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false), !range [[RNG0]]
; CHECK-NEXT: [[TMP3:%.*]] = sub nuw nsw i32 32, [[TMP2]]
; CHECK-NEXT: [[RET:%.*]] = zext i32 [[TMP3]] to i64
; CHECK-NEXT: [[RET:%.*]] = zext nneg i32 [[TMP3]] to i64
; CHECK-NEXT: ret i64 [[RET]]
;
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
Expand Down
5 changes: 2 additions & 3 deletions llvm/test/Transforms/InstCombine/icmp-sub.ll
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ define i1 @test_nuw_nsw_and_unsigned_pred(i64 %x) {

define i1 @test_nuw_nsw_and_signed_pred(i64 %x) {
; CHECK-LABEL: @test_nuw_nsw_and_signed_pred(
; CHECK-NEXT: [[Z:%.*]] = icmp sgt i64 [[X:%.*]], 7
; CHECK-NEXT: [[Z:%.*]] = icmp ugt i64 [[X:%.*]], 7
; CHECK-NEXT: ret i1 [[Z]]
;
%y = sub nuw nsw i64 10, %x
Expand All @@ -46,8 +46,7 @@ define i1 @test_nuw_nsw_and_signed_pred(i64 %x) {

define i1 @test_negative_nuw_and_signed_pred(i64 %x) {
; CHECK-LABEL: @test_negative_nuw_and_signed_pred(
; CHECK-NEXT: [[NOTSUB:%.*]] = add nuw i64 [[X:%.*]], -11
; CHECK-NEXT: [[Z:%.*]] = icmp sgt i64 [[NOTSUB]], -4
; CHECK-NEXT: [[Z:%.*]] = icmp ugt i64 [[X:%.*]], 7
; CHECK-NEXT: ret i1 [[Z]]
;
%y = sub nuw i64 10, %x
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/InstCombine/sub.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2367,7 +2367,7 @@ define <2 x i8> @sub_to_and_vector3(<2 x i8> %x) {
; CHECK-LABEL: @sub_to_and_vector3(
; CHECK-NEXT: [[SUB:%.*]] = sub nuw <2 x i8> <i8 71, i8 71>, [[X:%.*]]
; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[SUB]], <i8 120, i8 undef>
; CHECK-NEXT: [[R:%.*]] = sub <2 x i8> <i8 44, i8 44>, [[AND]]
; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i8> <i8 44, i8 44>, [[AND]]
; CHECK-NEXT: ret <2 x i8> [[R]]
;
%sub = sub nuw <2 x i8> <i8 71, i8 71>, %x
Expand Down
80 changes: 65 additions & 15 deletions llvm/unittests/Support/KnownBitsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,41 +169,69 @@ static void TestAddSubExhaustive(bool IsAdd) {
unsigned Bits = 4;
ForeachKnownBits(Bits, [&](const KnownBits &Known1) {
ForeachKnownBits(Bits, [&](const KnownBits &Known2) {
KnownBits Known(Bits), KnownNSW(Bits);
KnownBits Known(Bits), KnownNSW(Bits), KnownNUW(Bits),
KnownNSWAndNUW(Bits);
Known.Zero.setAllBits();
Known.One.setAllBits();
KnownNSW.Zero.setAllBits();
KnownNSW.One.setAllBits();
KnownNUW.Zero.setAllBits();
KnownNUW.One.setAllBits();
KnownNSWAndNUW.Zero.setAllBits();
KnownNSWAndNUW.One.setAllBits();

ForeachNumInKnownBits(Known1, [&](const APInt &N1) {
ForeachNumInKnownBits(Known2, [&](const APInt &N2) {
bool Overflow;
bool SignedOverflow;
bool UnsignedOverflow;
APInt Res;
if (IsAdd)
Res = N1.sadd_ov(N2, Overflow);
else
Res = N1.ssub_ov(N2, Overflow);
if (IsAdd) {
Res = N1.uadd_ov(N2, UnsignedOverflow);
Res = N1.sadd_ov(N2, SignedOverflow);
} else {
Res = N1.usub_ov(N2, UnsignedOverflow);
Res = N1.ssub_ov(N2, SignedOverflow);
}

Known.One &= Res;
Known.Zero &= ~Res;

if (!Overflow) {
if (!SignedOverflow) {
KnownNSW.One &= Res;
KnownNSW.Zero &= ~Res;
}

if (!UnsignedOverflow) {
KnownNUW.One &= Res;
KnownNUW.Zero &= ~Res;
}

if (!UnsignedOverflow && !SignedOverflow) {
KnownNSWAndNUW.One &= Res;
KnownNSWAndNUW.Zero &= ~Res;
}
});
});

KnownBits KnownComputed =
KnownBits::computeForAddSub(IsAdd, /*NSW*/ false, Known1, Known2);
EXPECT_EQ(Known, KnownComputed);
KnownBits KnownComputed = KnownBits::computeForAddSub(
IsAdd, /*NSW=*/false, /*NUW=*/false, Known1, Known2);
EXPECT_TRUE(isOptimal(Known, KnownComputed, {Known1, Known2}));

// The NSW calculation is not precise, only check that it's
// conservatively correct.
KnownBits KnownNSWComputed = KnownBits::computeForAddSub(
IsAdd, /*NSW*/true, Known1, Known2);
EXPECT_TRUE(KnownNSWComputed.Zero.isSubsetOf(KnownNSW.Zero));
EXPECT_TRUE(KnownNSWComputed.One.isSubsetOf(KnownNSW.One));
IsAdd, /*NSW=*/true, /*NUW=*/false, Known1, Known2);
if (!KnownNSW.hasConflict())
EXPECT_TRUE(isOptimal(KnownNSW, KnownNSWComputed, {Known1, Known2}));

KnownBits KnownNUWComputed = KnownBits::computeForAddSub(
IsAdd, /*NSW=*/false, /*NUW=*/true, Known1, Known2);
if (!KnownNUW.hasConflict())
EXPECT_TRUE(isOptimal(KnownNUW, KnownNUWComputed, {Known1, Known2}));

KnownBits KnownNSWAndNUWComputed = KnownBits::computeForAddSub(
IsAdd, /*NSW=*/true, /*NUW=*/true, Known1, Known2);
if (!KnownNSWAndNUW.hasConflict())
EXPECT_TRUE(isOptimal(KnownNSWAndNUW, KnownNSWAndNUWComputed,
{Known1, Known2}));
});
});
}
Expand Down Expand Up @@ -244,6 +272,28 @@ TEST(KnownBitsTest, SubBorrowExhaustive) {
});
}

TEST(KnownBitsTest, SignBitUnknown) {
KnownBits Known(2);
EXPECT_TRUE(Known.isSignUnknown());
Known.Zero.setBit(0);
EXPECT_TRUE(Known.isSignUnknown());
Known.Zero.setBit(1);
EXPECT_FALSE(Known.isSignUnknown());
Known.Zero.clearBit(0);
EXPECT_FALSE(Known.isSignUnknown());
Known.Zero.clearBit(1);
EXPECT_TRUE(Known.isSignUnknown());

Known.One.setBit(0);
EXPECT_TRUE(Known.isSignUnknown());
Known.One.setBit(1);
EXPECT_FALSE(Known.isSignUnknown());
Known.One.clearBit(0);
EXPECT_FALSE(Known.isSignUnknown());
Known.One.clearBit(1);
EXPECT_TRUE(Known.isSignUnknown());
}

TEST(KnownBitsTest, AbsDiffSpecialCase) {
// There are 2 implementation of absdiff - both are currently needed to cover
// extra cases.
Expand Down