diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index fba1ccf2c8c9b..5c433277a8f7d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -12,6 +12,7 @@ #include "InstCombineInternal.h" #include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" #include "llvm/ADT/APSInt.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" @@ -22,12 +23,14 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/Utils/Local.h" #include "llvm/Analysis/VectorUtils.h" +#include "llvm/IR/CmpPredicate.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/KnownBits.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" @@ -7161,6 +7164,66 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { return nullptr; } +/// Fold icmp(ult, sub(add(sext(X), Cst1), sext(Y)), Cst2) --> +/// extract(__builtin_ssub_overflow(X, Y), 1) +Instruction *InstCombinerImpl::foldICmpsToSignedSubOverflow(Instruction &I) { + CmpPredicate Pred; + ConstantInt *Cst1, *Cst2; + Value *X, *Y; + + /* + This transformation detects the pattern used to check for + a signed subtraction overflow. + + The matched sequence performs the following steps: + + 1. X = sext(x) + Y = sext(y) + // Sign-extend 32-bit operands to 64 bits. + + 2. Shifted = X + Cst1 + // Shift the signed range [-2^31, 2^31-1] by adding the minimum + // signed value (Cst1 = INT_MIN), producing an unsigned range + // [0, 2^32). + + 3. Sub = Shifted - Y + // Compute the shifted subtraction result. + + 4. icmp ult Sub, Cst2 + // Check whether the result fits in [0, 2^32). + // If not, the subtraction overflowed. + */ + + auto SExtX = m_SExt(m_Value(X)); + auto SExtY = m_SExt(m_Value(Y)); + auto Shifted = m_Add(SExtX, m_ConstantInt(Cst1)); + auto Sub = m_Sub(Shifted, SExtY); + + if (!match(&I, m_ICmp(Pred, Sub, m_ConstantInt(Cst2))) || + Pred != CmpInst::ICMP_ULT) + return nullptr; + + const auto SignedMin = + APInt::getSignedMinValue(X->getType()->getScalarSizeInBits()); + const auto ExpectedRange = SignedMin.getSExtValue() << 1; + + // Cst1 must equal to SignedMin + // Cst2 must equal to ExpectedRange + if (SignedMin.getSExtValue() != Cst1->getValue().getSExtValue() || + ExpectedRange != Cst2->getValue().getSExtValue()) + return nullptr; + + Module *M = I.getModule(); + Function *F = Intrinsic::getOrInsertDeclaration( + M, Intrinsic::ssub_with_overflow, X->getType()); + + Builder.SetInsertPoint(&I); + auto *Call = Builder.CreateCall(F, {X, Y}); + auto *Extract = Builder.CreateExtractValue(Call, 1); + + return replaceInstUsesWith(I, Extract); +} + /// If we have an icmp le or icmp ge instruction with a constant operand, turn /// it into the appropriate icmp lt or icmp gt instruction. This transform /// allows them to be folded in visitICmpInst. @@ -7970,6 +8033,9 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { } } + if (Instruction *R = foldICmpsToSignedSubOverflow(I)) + return R; + return Changed ? &I : nullptr; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 9bdd8cb71f7f3..5791932450711 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -791,6 +791,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final Instruction *foldICmpWithTrunc(ICmpInst &Cmp); Instruction *foldICmpCommutative(CmpPredicate Pred, Value *Op0, Value *Op1, ICmpInst &CxtI); + Instruction *foldICmpsToSignedSubOverflow(Instruction &I); // Helpers of visitSelectInst(). Instruction *foldSelectOfBools(SelectInst &SI); diff --git a/llvm/test/Transforms/InstCombine/icmp-fold-ssub-overflow.ll b/llvm/test/Transforms/InstCombine/icmp-fold-ssub-overflow.ll new file mode 100644 index 0000000000000..1d22781db16f2 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/icmp-fold-ssub-overflow.ll @@ -0,0 +1,103 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +; Fold +; +; int result = x - y; +; return !(INT_MIN <= result && result <= INT_MAX); +; +; into +; +; __builtin_ssub_overflow(x, y) + +define i1 @idiomatic_check_sub_i16(i16 %x, i16 %y) { +; CHECK-LABEL: @idiomatic_check_sub_i16( +; CHECK-NEXT: [[TMP1:%.*]] = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 [[X:%.*]], i16 [[Y:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i16, i1 } [[TMP1]], 1 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %3 = sext i16 %x to i32 + %4 = sext i16 %y to i32 + %5 = add nsw i32 %3, -32768 + %6 = sub nsw i32 %5, %4 + %7 = icmp ult i32 %6, -65536 + ret i1 %7 +} + +define i1 @idiomatic_check_sub_i16_no_flags(i16 %x, i16 %y) { +; CHECK-LABEL: @idiomatic_check_sub_i16_no_flags( +; CHECK-NEXT: [[TMP1:%.*]] = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 [[X:%.*]], i16 [[Y:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i16, i1 } [[TMP1]], 1 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %3 = sext i16 %x to i32 + %4 = sext i16 %y to i32 + %5 = add i32 %3, -32768 + %6 = sub i32 %5, %4 + %7 = icmp ult i32 %6, -65536 + ret i1 %7 +} + +define i1 @idiomatic_check_sub_i32(i32 %x, i32 %y) { +; CHECK-LABEL: @idiomatic_check_sub_i32( +; CHECK-NEXT: [[TMP1:%.*]] = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %3 = sext i32 %x to i64 + %4 = sext i32 %y to i64 + %5 = add nsw i64 %3, -2147483648 + %6 = sub nsw i64 %5, %4 + %7 = icmp ult i64 %6, -4294967296 + ret i1 %7 +} + +define i1 @idiomatic_check_sub_i32_negative_test_1(i32 %x, i32 %y) { +; CHECK-LABEL: @idiomatic_check_sub_i32_negative_test_1( +; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[X:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[Y:%.*]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = sub nsw i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], -4294967296 +; CHECK-NEXT: ret i1 [[TMP4]] +; + %3 = sext i32 %x to i64 + %4 = sext i32 %y to i64 + %5 = add nsw i64 %3, 0 ; Constant wrong + %6 = sub nsw i64 %5, %4 + %7 = icmp ult i64 %6, -4294967296 + ret i1 %7 +} + +define i1 @idiomatic_check_sub_i32_negative_test_2(i32 %x, i32 %y) { +; CHECK-LABEL: @idiomatic_check_sub_i32_negative_test_2( +; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[X:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[Y:%.*]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP1]], -2147483648 +; CHECK-NEXT: [[TMP4:%.*]] = sub nsw i64 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], -4294967295 +; CHECK-NEXT: ret i1 [[TMP5]] +; + %3 = sext i32 %x to i64 + %4 = sext i32 %y to i64 + %5 = add nsw i64 %3, -2147483648 + %6 = sub nsw i64 %5, %4 + %7 = icmp ult i64 %6, -4294967295 ; Constant wrong + ret i1 %7 +} + +define i1 @idiomatic_check_sub_i32_negative_test_3(i32 %x, i32 %y) { +; CHECK-LABEL: @idiomatic_check_sub_i32_negative_test_3( +; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[X:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[Y:%.*]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP1]], -2147483648 +; CHECK-NEXT: [[TMP4:%.*]] = sub nsw i64 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i64 [[TMP4]], -4294967296 +; CHECK-NEXT: ret i1 [[TMP5]] +; + %3 = sext i32 %x to i64 + %4 = sext i32 %y to i64 + %5 = add nsw i64 %3, -2147483648 + %6 = sub nsw i64 %5, %4 + %7 = icmp slt i64 %6, -4294967296 ; wrong Condition + ret i1 %7 +}