Skip to content

Commit

Permalink
[ConstantFolding] Fold undef for integer intrinsics
Browse files Browse the repository at this point in the history
This fixes https://bugs.llvm.org/show_bug.cgi?id=40110.

This implements handling of undef operands for integer intrinsics in
ConstantFolding, in particular for the bitcounting intrinsics (ctpop,
cttz, ctlz), the with.overflow intrinsics, the saturating math
intrinsics and the funnel shift intrinsics.

The undef behavior follows what InstSimplify does for the general cas
e of non-constant operands. For the bitcount intrinsics (where
InstSimplify doesn't do undef handling -- there cannot be a combination
of an undef + non-constant operand) I'm using a 0 result if the intrinsic
is defined for zero and undef otherwise.

Differential Revision: https://reviews.llvm.org/D55950

llvm-svn: 350971
  • Loading branch information
nikic committed Jan 11, 2019
1 parent c3399db commit 9f6e9cf
Show file tree
Hide file tree
Showing 6 changed files with 174 additions and 594 deletions.
177 changes: 114 additions & 63 deletions llvm/lib/Analysis/ConstantFolding.cpp
Expand Up @@ -1629,6 +1629,18 @@ static bool isManifestConstant(const Constant *c) {
return false;
}

static bool getConstIntOrUndef(Value *Op, const APInt *&C) {
if (auto *CI = dyn_cast<ConstantInt>(Op)) {
C = &CI->getValue();
return true;
}
if (isa<UndefValue>(Op)) {
C = nullptr;
return true;
}
return false;
}

Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
ArrayRef<Constant *> Operands,
const TargetLibraryInfo *TLI,
Expand All @@ -1643,8 +1655,10 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
return nullptr;
}
if (isa<UndefValue>(Operands[0])) {
// cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN
if (IntrinsicID == Intrinsic::cos)
// cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN.
// ctpop() is between 0 and bitwidth, pick 0 for undef.
if (IntrinsicID == Intrinsic::cos ||
IntrinsicID == Intrinsic::ctpop)
return Constant::getNullValue(Ty);
if (IntrinsicID == Intrinsic::bswap ||
IntrinsicID == Intrinsic::bitreverse ||
Expand Down Expand Up @@ -1995,62 +2009,92 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
return nullptr;
}

if (auto *Op1 = dyn_cast<ConstantInt>(Operands[0])) {
if (auto *Op2 = dyn_cast<ConstantInt>(Operands[1])) {
if (Operands[0]->getType()->isIntegerTy() &&
Operands[1]->getType()->isIntegerTy()) {
const APInt *C0, *C1;
if (!getConstIntOrUndef(Operands[0], C0) ||
!getConstIntOrUndef(Operands[1], C1))
return nullptr;

switch (IntrinsicID) {
default: break;
case Intrinsic::smul_with_overflow:
case Intrinsic::umul_with_overflow:
// Even if both operands are undef, we cannot fold muls to undef
// in the general case. For example, on i2 there are no inputs
// that would produce { i2 -1, i1 true } as the result.
if (!C0 || !C1)
return Constant::getNullValue(Ty);
LLVM_FALLTHROUGH;
case Intrinsic::sadd_with_overflow:
case Intrinsic::uadd_with_overflow:
case Intrinsic::ssub_with_overflow:
case Intrinsic::usub_with_overflow: {
if (!C0 || !C1)
return UndefValue::get(Ty);

APInt Res;
bool Overflow;
switch (IntrinsicID) {
default: break;
default: llvm_unreachable("Invalid case");
case Intrinsic::sadd_with_overflow:
Res = C0->sadd_ov(*C1, Overflow);
break;
case Intrinsic::uadd_with_overflow:
Res = C0->uadd_ov(*C1, Overflow);
break;
case Intrinsic::ssub_with_overflow:
Res = C0->ssub_ov(*C1, Overflow);
break;
case Intrinsic::usub_with_overflow:
Res = C0->usub_ov(*C1, Overflow);
break;
case Intrinsic::smul_with_overflow:
case Intrinsic::umul_with_overflow: {
APInt Res;
bool Overflow;
switch (IntrinsicID) {
default: llvm_unreachable("Invalid case");
case Intrinsic::sadd_with_overflow:
Res = Op1->getValue().sadd_ov(Op2->getValue(), Overflow);
break;
case Intrinsic::uadd_with_overflow:
Res = Op1->getValue().uadd_ov(Op2->getValue(), Overflow);
break;
case Intrinsic::ssub_with_overflow:
Res = Op1->getValue().ssub_ov(Op2->getValue(), Overflow);
break;
case Intrinsic::usub_with_overflow:
Res = Op1->getValue().usub_ov(Op2->getValue(), Overflow);
break;
case Intrinsic::smul_with_overflow:
Res = Op1->getValue().smul_ov(Op2->getValue(), Overflow);
break;
case Intrinsic::umul_with_overflow:
Res = Op1->getValue().umul_ov(Op2->getValue(), Overflow);
break;
}
Constant *Ops[] = {
ConstantInt::get(Ty->getContext(), Res),
ConstantInt::get(Type::getInt1Ty(Ty->getContext()), Overflow)
};
return ConstantStruct::get(cast<StructType>(Ty), Ops);
}
case Intrinsic::uadd_sat:
return ConstantInt::get(Ty, Op1->getValue().uadd_sat(Op2->getValue()));
case Intrinsic::sadd_sat:
return ConstantInt::get(Ty, Op1->getValue().sadd_sat(Op2->getValue()));
case Intrinsic::usub_sat:
return ConstantInt::get(Ty, Op1->getValue().usub_sat(Op2->getValue()));
case Intrinsic::ssub_sat:
return ConstantInt::get(Ty, Op1->getValue().ssub_sat(Op2->getValue()));
case Intrinsic::cttz:
if (Op2->isOne() && Op1->isZero()) // cttz(0, 1) is undef.
return UndefValue::get(Ty);
return ConstantInt::get(Ty, Op1->getValue().countTrailingZeros());
case Intrinsic::ctlz:
if (Op2->isOne() && Op1->isZero()) // ctlz(0, 1) is undef.
return UndefValue::get(Ty);
return ConstantInt::get(Ty, Op1->getValue().countLeadingZeros());
Res = C0->smul_ov(*C1, Overflow);
break;
case Intrinsic::umul_with_overflow:
Res = C0->umul_ov(*C1, Overflow);
break;
}
Constant *Ops[] = {
ConstantInt::get(Ty->getContext(), Res),
ConstantInt::get(Type::getInt1Ty(Ty->getContext()), Overflow)
};
return ConstantStruct::get(cast<StructType>(Ty), Ops);
}
case Intrinsic::uadd_sat:
case Intrinsic::sadd_sat:
if (!C0 && !C1)
return UndefValue::get(Ty);
if (!C0 || !C1)
return Constant::getAllOnesValue(Ty);
if (IntrinsicID == Intrinsic::uadd_sat)
return ConstantInt::get(Ty, C0->uadd_sat(*C1));
else
return ConstantInt::get(Ty, C0->sadd_sat(*C1));
case Intrinsic::usub_sat:
case Intrinsic::ssub_sat:
if (!C0 && !C1)
return UndefValue::get(Ty);
if (!C0 || !C1)
return Constant::getNullValue(Ty);
if (IntrinsicID == Intrinsic::usub_sat)
return ConstantInt::get(Ty, C0->usub_sat(*C1));
else
return ConstantInt::get(Ty, C0->ssub_sat(*C1));
case Intrinsic::cttz:
case Intrinsic::ctlz:
assert(C1 && "Must be constant int");

// cttz(0, 1) and ctlz(0, 1) are undef.
if (C1->isOneValue() && (!C0 || C0->isNullValue()))
return UndefValue::get(Ty);
if (!C0)
return Constant::getNullValue(Ty);
if (IntrinsicID == Intrinsic::cttz)
return ConstantInt::get(Ty, C0->countTrailingZeros());
else
return ConstantInt::get(Ty, C0->countLeadingZeros());
}

return nullptr;
Expand Down Expand Up @@ -2136,26 +2180,33 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
}

if (IntrinsicID == Intrinsic::fshl || IntrinsicID == Intrinsic::fshr) {
auto *C0 = dyn_cast<ConstantInt>(Operands[0]);
auto *C1 = dyn_cast<ConstantInt>(Operands[1]);
auto *C2 = dyn_cast<ConstantInt>(Operands[2]);
if (!(C0 && C1 && C2))
const APInt *C0, *C1, *C2;
if (!getConstIntOrUndef(Operands[0], C0) ||
!getConstIntOrUndef(Operands[1], C1) ||
!getConstIntOrUndef(Operands[2], C2))
return nullptr;

bool IsRight = IntrinsicID == Intrinsic::fshr;
if (!C2)
return Operands[IsRight ? 1 : 0];
if (!C0 && !C1)
return UndefValue::get(Ty);

// The shift amount is interpreted as modulo the bitwidth. If the shift
// amount is effectively 0, avoid UB due to oversized inverse shift below.
unsigned BitWidth = C0->getBitWidth();
unsigned ShAmt = C2->getValue().urem(BitWidth);
bool IsRight = IntrinsicID == Intrinsic::fshr;
unsigned BitWidth = C2->getBitWidth();
unsigned ShAmt = C2->urem(BitWidth);
if (!ShAmt)
return IsRight ? C1 : C0;
return Operands[IsRight ? 1 : 0];

// (X << ShlAmt) | (Y >> LshrAmt)
const APInt &X = C0->getValue();
const APInt &Y = C1->getValue();
// (C0 << ShlAmt) | (C1 >> LshrAmt)
unsigned LshrAmt = IsRight ? ShAmt : BitWidth - ShAmt;
unsigned ShlAmt = !IsRight ? ShAmt : BitWidth - ShAmt;
return ConstantInt::get(Ty->getContext(), X.shl(ShlAmt) | Y.lshr(LshrAmt));
if (!C0)
return ConstantInt::get(Ty, C1->lshr(LshrAmt));
if (!C1)
return ConstantInt::get(Ty, C0->shl(ShlAmt));
return ConstantInt::get(Ty, C0->shl(ShlAmt) | C1->lshr(LshrAmt));
}

return nullptr;
Expand Down
30 changes: 10 additions & 20 deletions llvm/test/Analysis/ConstantFolding/bitcount.ll
Expand Up @@ -74,44 +74,39 @@ define i33 @ctlz_zero_undefined() {

define i31 @ctpop_undef() {
; CHECK-LABEL: @ctpop_undef(
; CHECK-NEXT: [[X:%.*]] = call i31 @llvm.ctpop.i31(i31 undef)
; CHECK-NEXT: ret i31 [[X]]
; CHECK-NEXT: ret i31 0
;
%x = call i31 @llvm.ctpop.i31(i31 undef)
ret i31 %x
}

define i32 @cttz_undef_defined() {
; CHECK-LABEL: @cttz_undef_defined(
; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.cttz.i32(i32 undef, i1 false)
; CHECK-NEXT: ret i32 [[X]]
; CHECK-NEXT: ret i32 0
;
%x = call i32 @llvm.cttz.i32(i32 undef, i1 false)
ret i32 %x
}

define i32 @cttz_undef_undefined() {
; CHECK-LABEL: @cttz_undef_undefined(
; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.cttz.i32(i32 undef, i1 true)
; CHECK-NEXT: ret i32 [[X]]
; CHECK-NEXT: ret i32 undef
;
%x = call i32 @llvm.cttz.i32(i32 undef, i1 true)
ret i32 %x
}

define i33 @ctlz_undef_defined() {
; CHECK-LABEL: @ctlz_undef_defined(
; CHECK-NEXT: [[X:%.*]] = call i33 @llvm.ctlz.i33(i33 undef, i1 false)
; CHECK-NEXT: ret i33 [[X]]
; CHECK-NEXT: ret i33 0
;
%x = call i33 @llvm.ctlz.i33(i33 undef, i1 false)
ret i33 %x
}

define i33 @ctlz_undef_undefined() {
; CHECK-LABEL: @ctlz_undef_undefined(
; CHECK-NEXT: [[X:%.*]] = call i33 @llvm.ctlz.i33(i33 undef, i1 true)
; CHECK-NEXT: ret i33 [[X]]
; CHECK-NEXT: ret i33 undef
;
%x = call i33 @llvm.ctlz.i33(i33 undef, i1 true)
ret i33 %x
Expand All @@ -127,8 +122,7 @@ define <2 x i31> @ctpop_vector() {

define <2 x i31> @ctpop_vector_undef() {
; CHECK-LABEL: @ctpop_vector_undef(
; CHECK-NEXT: [[X:%.*]] = call <2 x i31> @llvm.ctpop.v2i31(<2 x i31> <i31 0, i31 undef>)
; CHECK-NEXT: ret <2 x i31> [[X]]
; CHECK-NEXT: ret <2 x i31> zeroinitializer
;
%x = call <2 x i31> @llvm.ctpop.v2i31(<2 x i31> <i31 0, i31 undef>)
ret <2 x i31> %x
Expand All @@ -144,17 +138,15 @@ define <2 x i32> @cttz_vector() {

define <2 x i32> @cttz_vector_undef_defined() {
; CHECK-LABEL: @cttz_vector_undef_defined(
; CHECK-NEXT: [[X:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> <i32 0, i32 undef>, i1 false)
; CHECK-NEXT: ret <2 x i32> [[X]]
; CHECK-NEXT: ret <2 x i32> <i32 32, i32 0>
;
%x = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> <i32 0, i32 undef>, i1 false)
ret <2 x i32> %x
}

define <2 x i32> @cttz_vector_undef_undefined() {
; CHECK-LABEL: @cttz_vector_undef_undefined(
; CHECK-NEXT: [[X:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> <i32 0, i32 undef>, i1 true)
; CHECK-NEXT: ret <2 x i32> [[X]]
; CHECK-NEXT: ret <2 x i32> undef
;
%x = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> <i32 0, i32 undef>, i1 true)
ret <2 x i32> %x
Expand All @@ -170,17 +162,15 @@ define <2 x i33> @ctlz_vector() {

define <2 x i33> @ctlz_vector_undef_defined() {
; CHECK-LABEL: @ctlz_vector_undef_defined(
; CHECK-NEXT: [[X:%.*]] = call <2 x i33> @llvm.ctlz.v2i33(<2 x i33> <i33 0, i33 undef>, i1 false)
; CHECK-NEXT: ret <2 x i33> [[X]]
; CHECK-NEXT: ret <2 x i33> <i33 33, i33 0>
;
%x = call <2 x i33> @llvm.ctlz.v2i33(<2 x i33> <i33 0, i33 undef>, i1 false)
ret <2 x i33> %x
}

define <2 x i33> @ctlz_vector_undef_undefined() {
; CHECK-LABEL: @ctlz_vector_undef_undefined(
; CHECK-NEXT: [[X:%.*]] = call <2 x i33> @llvm.ctlz.v2i33(<2 x i33> <i33 0, i33 undef>, i1 true)
; CHECK-NEXT: ret <2 x i33> [[X]]
; CHECK-NEXT: ret <2 x i33> undef
;
%x = call <2 x i33> @llvm.ctlz.v2i33(<2 x i33> <i33 0, i33 undef>, i1 true)
ret <2 x i33> %x
Expand Down

0 comments on commit 9f6e9cf

Please sign in to comment.