Skip to content

Commit

Permalink
[InstCombine] Use zext's nneg flag for icmp folding (#70845)
Browse files Browse the repository at this point in the history
This PR fixes #55013 : the
max intrinsics is not generated for this simple loop case :
https://godbolt.org/z/hxz1xhMPh. This is caused by a ICMP not being
folded into a select, thus not generating the max intrinsics.

For the story :

Since LLVM 14, SCCP pass got smarter by folding sext into zext for
positive ranges : https://reviews.llvm.org/D81756. After this change,
InstCombine was sometimes unable to fold ICMP correctly as both of the
arguments pointed to mismatched zext/sext. To fix this, @rotateright
implemented this fix : https://reviews.llvm.org/D124419 that tries to
resolve the mismatch by knowing if the argument of a zext is positive
(in which case, it is like a sext) by using ValueTracking, however
ValueTracking is not smart enough to infer that the value is positive in
some cases. Recently, @nikic implemented #67982 which keeps the
information that a zext is non-negative. This PR simply uses this
information to do the folding accordingly.

TLDR : This PR uses the recent nneg tag on zext to fold the icmp
accordingly in instcombine.

This PR also contains test cases for sext/zext folding with InstCombine
as well as a x86 regression tests for the max/min case.
  • Loading branch information
leo-ard committed Nov 12, 2023
1 parent d05bada commit ff36411
Show file tree
Hide file tree
Showing 3 changed files with 242 additions and 5 deletions.
15 changes: 10 additions & 5 deletions llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5554,11 +5554,16 @@ Instruction *InstCombinerImpl::foldICmpWithZextOrSext(ICmpInst &ICmp) {
return new ICmpInst(ICmp.getPredicate(), Builder.CreateOr(X, Y),
Constant::getNullValue(X->getType()));

// If we have mismatched casts, treat the zext of a non-negative source as
// a sext to simulate matching casts. Otherwise, we are done.
// TODO: Can we handle some predicates (equality) without non-negative?
if ((IsZext0 && isKnownNonNegative(X, DL, 0, &AC, &ICmp, &DT)) ||
(IsZext1 && isKnownNonNegative(Y, DL, 0, &AC, &ICmp, &DT)))
// If we have mismatched casts and zext has the nneg flag, we can
// treat the "zext nneg" as "sext". Otherwise, we cannot fold and quit.

auto *NonNegInst0 = dyn_cast<PossiblyNonNegInst>(ICmp.getOperand(0));
auto *NonNegInst1 = dyn_cast<PossiblyNonNegInst>(ICmp.getOperand(1));

bool IsNonNeg0 = NonNegInst0 && NonNegInst0->hasNonNeg();
bool IsNonNeg1 = NonNegInst1 && NonNegInst1->hasNonNeg();

if ((IsZext0 && IsNonNeg0) || (IsZext1 && IsNonNeg1))
IsSignedExt = true;
else
return nullptr;
Expand Down
121 changes: 121 additions & 0 deletions llvm/test/Transforms/InstCombine/icmp-ext-ext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,17 @@ define i1 @zext_sext_sgt(i8 %x, i8 %y) {
ret i1 %c
}

define i1 @zext_nneg_sext_sgt(i8 %x, i8 %y) {
; CHECK-LABEL: @zext_nneg_sext_sgt(
; CHECK-NEXT: [[C:%.*]] = icmp sgt i8 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: ret i1 [[C]]
;
%a = zext nneg i8 %x to i32
%b = sext i8 %y to i32
%c = icmp sgt i32 %a, %b
ret i1 %c
}

define i1 @zext_sext_ugt(i8 %x, i8 %y) {
; CHECK-LABEL: @zext_sext_ugt(
; CHECK-NEXT: [[A:%.*]] = zext i8 [[X:%.*]] to i32
Expand All @@ -143,6 +154,18 @@ define i1 @zext_sext_ugt(i8 %x, i8 %y) {
ret i1 %c
}


define i1 @zext_nneg_sext_ugt(i8 %x, i8 %y) {
; CHECK-LABEL: @zext_nneg_sext_ugt(
; CHECK-NEXT: [[C:%.*]] = icmp ugt i8 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: ret i1 [[C]]
;
%a = zext nneg i8 %x to i32
%b = sext i8 %y to i32
%c = icmp ugt i32 %a, %b
ret i1 %c
}

define i1 @zext_sext_eq(i8 %x, i8 %y) {
; CHECK-LABEL: @zext_sext_eq(
; CHECK-NEXT: [[A:%.*]] = zext i8 [[X:%.*]] to i32
Expand All @@ -156,6 +179,18 @@ define i1 @zext_sext_eq(i8 %x, i8 %y) {
ret i1 %c
}

define i1 @zext_nneg_sext_eq(i8 %x, i8 %y) {
; CHECK-LABEL: @zext_nneg_sext_eq(
; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: ret i1 [[C]]
;
%a = zext nneg i8 %x to i32
%b = sext i8 %y to i32
%c = icmp eq i32 %a, %b
ret i1 %c
}


define i1 @zext_sext_sle_op0_narrow(i8 %x, i16 %y) {
; CHECK-LABEL: @zext_sext_sle_op0_narrow(
; CHECK-NEXT: [[A:%.*]] = zext i8 [[X:%.*]] to i32
Expand All @@ -169,6 +204,19 @@ define i1 @zext_sext_sle_op0_narrow(i8 %x, i16 %y) {
ret i1 %c
}


define i1 @zext_nneg_sext_sle_op0_narrow(i8 %x, i16 %y) {
; CHECK-LABEL: @zext_nneg_sext_sle_op0_narrow(
; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[X:%.*]] to i16
; CHECK-NEXT: [[C:%.*]] = icmp sle i16 [[TMP1]], [[Y:%.*]]
; CHECK-NEXT: ret i1 [[C]]
;
%a = zext nneg i8 %x to i32
%b = sext i16 %y to i32
%c = icmp sle i32 %a, %b
ret i1 %c
}

define i1 @zext_sext_ule_op0_wide(i9 %x, i8 %y) {
; CHECK-LABEL: @zext_sext_ule_op0_wide(
; CHECK-NEXT: [[A:%.*]] = zext i9 [[X:%.*]] to i32
Expand All @@ -182,6 +230,18 @@ define i1 @zext_sext_ule_op0_wide(i9 %x, i8 %y) {
ret i1 %c
}

define i1 @zext_nneg_sext_ule_op0_wide(i9 %x, i8 %y) {
; CHECK-LABEL: @zext_nneg_sext_ule_op0_wide(
; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[Y:%.*]] to i9
; CHECK-NEXT: [[C:%.*]] = icmp uge i9 [[TMP1]], [[X:%.*]]
; CHECK-NEXT: ret i1 [[C]]
;
%a = zext nneg i9 %x to i32
%b = sext i8 %y to i32
%c = icmp ule i32 %a, %b
ret i1 %c
}

define i1 @sext_zext_slt(i8 %x, i8 %y) {
; CHECK-LABEL: @sext_zext_slt(
; CHECK-NEXT: [[A:%.*]] = sext i8 [[X:%.*]] to i32
Expand All @@ -195,6 +255,18 @@ define i1 @sext_zext_slt(i8 %x, i8 %y) {
ret i1 %c
}


define i1 @sext_zext_nneg_slt(i8 %x, i8 %y) {
; CHECK-LABEL: @sext_zext_nneg_slt(
; CHECK-NEXT: [[C:%.*]] = icmp slt i8 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: ret i1 [[C]]
;
%a = sext i8 %x to i32
%b = zext nneg i8 %y to i32
%c = icmp slt i32 %a, %b
ret i1 %c
}

define i1 @sext_zext_ult(i8 %x, i8 %y) {
; CHECK-LABEL: @sext_zext_ult(
; CHECK-NEXT: [[A:%.*]] = sext i8 [[X:%.*]] to i32
Expand All @@ -208,6 +280,17 @@ define i1 @sext_zext_ult(i8 %x, i8 %y) {
ret i1 %c
}

define i1 @sext_zext_nneg_ult(i8 %x, i8 %y) {
; CHECK-LABEL: @sext_zext_nneg_ult(
; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: ret i1 [[C]]
;
%a = sext i8 %x to i32
%b = zext nneg i8 %y to i32
%c = icmp ult i32 %a, %b
ret i1 %c
}

define <2 x i1> @sext_zext_ne(<2 x i8> %x, <2 x i8> %y) {
; CHECK-LABEL: @sext_zext_ne(
; CHECK-NEXT: [[A:%.*]] = sext <2 x i8> [[X:%.*]] to <2 x i32>
Expand All @@ -221,6 +304,18 @@ define <2 x i1> @sext_zext_ne(<2 x i8> %x, <2 x i8> %y) {
ret <2 x i1> %c
}


define <2 x i1> @sext_zext_nneg_ne(<2 x i8> %x, <2 x i8> %y) {
; CHECK-LABEL: @sext_zext_nneg_ne(
; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i8> [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: ret <2 x i1> [[C]]
;
%a = sext <2 x i8> %x to <2 x i32>
%b = zext nneg <2 x i8> %y to <2 x i32>
%c = icmp ne <2 x i32> %a, %b
ret <2 x i1> %c
}

define i1 @sext_zext_sge_op0_narrow(i5 %x, i8 %y) {
; CHECK-LABEL: @sext_zext_sge_op0_narrow(
; CHECK-NEXT: [[A:%.*]] = sext i5 [[X:%.*]] to i32
Expand All @@ -234,6 +329,19 @@ define i1 @sext_zext_sge_op0_narrow(i5 %x, i8 %y) {
ret i1 %c
}


define i1 @sext_zext_nneg_sge_op0_narrow(i5 %x, i8 %y) {
; CHECK-LABEL: @sext_zext_nneg_sge_op0_narrow(
; CHECK-NEXT: [[TMP1:%.*]] = sext i5 [[X:%.*]] to i8
; CHECK-NEXT: [[C:%.*]] = icmp sge i8 [[TMP1]], [[Y:%.*]]
; CHECK-NEXT: ret i1 [[C]]
;
%a = sext i5 %x to i32
%b = zext nneg i8 %y to i32
%c = icmp sge i32 %a, %b
ret i1 %c
}

define i1 @sext_zext_uge_op0_wide(i16 %x, i8 %y) {
; CHECK-LABEL: @sext_zext_uge_op0_wide(
; CHECK-NEXT: [[A:%.*]] = sext i16 [[X:%.*]] to i32
Expand All @@ -247,6 +355,19 @@ define i1 @sext_zext_uge_op0_wide(i16 %x, i8 %y) {
ret i1 %c
}


define i1 @sext_zext_nneg_uge_op0_wide(i16 %x, i8 %y) {
; CHECK-LABEL: @sext_zext_nneg_uge_op0_wide(
; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[Y:%.*]] to i16
; CHECK-NEXT: [[C:%.*]] = icmp ule i16 [[TMP1]], [[X:%.*]]
; CHECK-NEXT: ret i1 [[C]]
;
%a = sext i16 %x to i32
%b = zext nneg i8 %y to i32
%c = icmp uge i32 %a, %b
ret i1 %c
}

define i1 @zext_sext_sgt_known_nonneg(i8 %x, i8 %y) {
; CHECK-LABEL: @zext_sext_sgt_known_nonneg(
; CHECK-NEXT: [[N:%.*]] = udiv i8 127, [[X:%.*]]
Expand Down
111 changes: 111 additions & 0 deletions llvm/test/Transforms/PhaseOrdering/min_max_loop.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
; RUN: opt < %s -O3 -S | FileCheck %s
; See issue #55013 and PR #70845 for more details.
; This test comes from the following C program, compiled with clang
;
;; short vecreduce_smin_v2i16(int n, short* v)
;; {
;; short p = 0;
;; for (int i = 0; i < n; ++i)
;; p = p > v[i] ? v[i] : p;
;; return p;
;; }
;
;; short vecreduce_smax_v2i16(int n, short* v)
;; {
;; short p = 0;
;; for (int i = 0; i < n; ++i)
;; p = p < v[i] ? v[i] : p;
;; return p;
;; }

define i16 @vecreduce_smin_v2i16(i32 %n, ptr %v) {
; CHECK-LABEL: define i16 @vecreduce_smin_v2i16(
; CHECK: @llvm.smin.v2i16

entry:
br label %for.cond

for.cond: ; preds = %for.inc, %entry
%p.0 = phi i16 [ 0, %entry ], [ %conv8, %for.inc ]
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%cmp = icmp slt i32 %i.0, %n
br i1 %cmp, label %for.body, label %for.end

for.body: ; preds = %for.cond
%conv = sext i16 %p.0 to i32
%idxprom = sext i32 %i.0 to i64
%arrayidx = getelementptr inbounds i16, ptr %v, i64 %idxprom
%0 = load i16, ptr %arrayidx, align 2
%conv1 = sext i16 %0 to i32
%cmp2 = icmp sgt i32 %conv, %conv1
br i1 %cmp2, label %cond.true, label %cond.false

cond.true: ; preds = %for.body
%idxprom4 = sext i32 %i.0 to i64
%arrayidx5 = getelementptr inbounds i16, ptr %v, i64 %idxprom4
%1 = load i16, ptr %arrayidx5, align 2
%conv6 = sext i16 %1 to i32
br label %cond.end

cond.false: ; preds = %for.body
%conv7 = sext i16 %p.0 to i32
br label %cond.end

cond.end: ; preds = %cond.false, %cond.true
%cond = phi i32 [ %conv6, %cond.true ], [ %conv7, %cond.false ]
%conv8 = trunc i32 %cond to i16
br label %for.inc

for.inc: ; preds = %cond.end
%inc = add nsw i32 %i.0, 1
br label %for.cond

for.end: ; preds = %for.cond
ret i16 %p.0
}

define i16 @vecreduce_smax_v2i16(i32 %n, ptr %v) {
; CHECK-LABEL: define i16 @vecreduce_smax_v2i16(
; CHECK: @llvm.smax.v2i16

entry:
br label %for.cond

for.cond: ; preds = %for.inc, %entry
%p.0 = phi i16 [ 0, %entry ], [ %conv8, %for.inc ]
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%cmp = icmp slt i32 %i.0, %n
br i1 %cmp, label %for.body, label %for.end

for.body: ; preds = %for.cond
%conv = sext i16 %p.0 to i32
%idxprom = sext i32 %i.0 to i64
%arrayidx = getelementptr inbounds i16, ptr %v, i64 %idxprom
%0 = load i16, ptr %arrayidx, align 2
%conv1 = sext i16 %0 to i32
%cmp2 = icmp slt i32 %conv, %conv1
br i1 %cmp2, label %cond.true, label %cond.false

cond.true: ; preds = %for.body
%idxprom4 = sext i32 %i.0 to i64
%arrayidx5 = getelementptr inbounds i16, ptr %v, i64 %idxprom4
%1 = load i16, ptr %arrayidx5, align 2
%conv6 = sext i16 %1 to i32
br label %cond.end

cond.false: ; preds = %for.body
%conv7 = sext i16 %p.0 to i32
br label %cond.end

cond.end: ; preds = %cond.false, %cond.true
%cond = phi i32 [ %conv6, %cond.true ], [ %conv7, %cond.false ]
%conv8 = trunc i32 %cond to i16
br label %for.inc

for.inc: ; preds = %cond.end
%inc = add nsw i32 %i.0, 1
br label %for.cond

for.end: ; preds = %for.cond
ret i16 %p.0
}

0 comments on commit ff36411

Please sign in to comment.