-
Notifications
You must be signed in to change notification settings - Fork 10.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[InstCombine] Use zext's nneg flag for icmp folding (#70845)
This PR fixes #55013 : the max intrinsics is not generated for this simple loop case : https://godbolt.org/z/hxz1xhMPh. This is caused by a ICMP not being folded into a select, thus not generating the max intrinsics. For the story : Since LLVM 14, SCCP pass got smarter by folding sext into zext for positive ranges : https://reviews.llvm.org/D81756. After this change, InstCombine was sometimes unable to fold ICMP correctly as both of the arguments pointed to mismatched zext/sext. To fix this, @rotateright implemented this fix : https://reviews.llvm.org/D124419 that tries to resolve the mismatch by knowing if the argument of a zext is positive (in which case, it is like a sext) by using ValueTracking, however ValueTracking is not smart enough to infer that the value is positive in some cases. Recently, @nikic implemented #67982 which keeps the information that a zext is non-negative. This PR simply uses this information to do the folding accordingly. TLDR : This PR uses the recent nneg tag on zext to fold the icmp accordingly in instcombine. This PR also contains test cases for sext/zext folding with InstCombine as well as a x86 regression tests for the max/min case.
- Loading branch information
Showing
3 changed files
with
242 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
; RUN: opt < %s -O3 -S | FileCheck %s | ||
; See issue #55013 and PR #70845 for more details. | ||
; This test comes from the following C program, compiled with clang | ||
; | ||
;; short vecreduce_smin_v2i16(int n, short* v) | ||
;; { | ||
;; short p = 0; | ||
;; for (int i = 0; i < n; ++i) | ||
;; p = p > v[i] ? v[i] : p; | ||
;; return p; | ||
;; } | ||
; | ||
;; short vecreduce_smax_v2i16(int n, short* v) | ||
;; { | ||
;; short p = 0; | ||
;; for (int i = 0; i < n; ++i) | ||
;; p = p < v[i] ? v[i] : p; | ||
;; return p; | ||
;; } | ||
|
||
define i16 @vecreduce_smin_v2i16(i32 %n, ptr %v) { | ||
; CHECK-LABEL: define i16 @vecreduce_smin_v2i16( | ||
; CHECK: @llvm.smin.v2i16 | ||
|
||
entry: | ||
br label %for.cond | ||
|
||
for.cond: ; preds = %for.inc, %entry | ||
%p.0 = phi i16 [ 0, %entry ], [ %conv8, %for.inc ] | ||
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] | ||
%cmp = icmp slt i32 %i.0, %n | ||
br i1 %cmp, label %for.body, label %for.end | ||
|
||
for.body: ; preds = %for.cond | ||
%conv = sext i16 %p.0 to i32 | ||
%idxprom = sext i32 %i.0 to i64 | ||
%arrayidx = getelementptr inbounds i16, ptr %v, i64 %idxprom | ||
%0 = load i16, ptr %arrayidx, align 2 | ||
%conv1 = sext i16 %0 to i32 | ||
%cmp2 = icmp sgt i32 %conv, %conv1 | ||
br i1 %cmp2, label %cond.true, label %cond.false | ||
|
||
cond.true: ; preds = %for.body | ||
%idxprom4 = sext i32 %i.0 to i64 | ||
%arrayidx5 = getelementptr inbounds i16, ptr %v, i64 %idxprom4 | ||
%1 = load i16, ptr %arrayidx5, align 2 | ||
%conv6 = sext i16 %1 to i32 | ||
br label %cond.end | ||
|
||
cond.false: ; preds = %for.body | ||
%conv7 = sext i16 %p.0 to i32 | ||
br label %cond.end | ||
|
||
cond.end: ; preds = %cond.false, %cond.true | ||
%cond = phi i32 [ %conv6, %cond.true ], [ %conv7, %cond.false ] | ||
%conv8 = trunc i32 %cond to i16 | ||
br label %for.inc | ||
|
||
for.inc: ; preds = %cond.end | ||
%inc = add nsw i32 %i.0, 1 | ||
br label %for.cond | ||
|
||
for.end: ; preds = %for.cond | ||
ret i16 %p.0 | ||
} | ||
|
||
define i16 @vecreduce_smax_v2i16(i32 %n, ptr %v) { | ||
; CHECK-LABEL: define i16 @vecreduce_smax_v2i16( | ||
; CHECK: @llvm.smax.v2i16 | ||
|
||
entry: | ||
br label %for.cond | ||
|
||
for.cond: ; preds = %for.inc, %entry | ||
%p.0 = phi i16 [ 0, %entry ], [ %conv8, %for.inc ] | ||
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] | ||
%cmp = icmp slt i32 %i.0, %n | ||
br i1 %cmp, label %for.body, label %for.end | ||
|
||
for.body: ; preds = %for.cond | ||
%conv = sext i16 %p.0 to i32 | ||
%idxprom = sext i32 %i.0 to i64 | ||
%arrayidx = getelementptr inbounds i16, ptr %v, i64 %idxprom | ||
%0 = load i16, ptr %arrayidx, align 2 | ||
%conv1 = sext i16 %0 to i32 | ||
%cmp2 = icmp slt i32 %conv, %conv1 | ||
br i1 %cmp2, label %cond.true, label %cond.false | ||
|
||
cond.true: ; preds = %for.body | ||
%idxprom4 = sext i32 %i.0 to i64 | ||
%arrayidx5 = getelementptr inbounds i16, ptr %v, i64 %idxprom4 | ||
%1 = load i16, ptr %arrayidx5, align 2 | ||
%conv6 = sext i16 %1 to i32 | ||
br label %cond.end | ||
|
||
cond.false: ; preds = %for.body | ||
%conv7 = sext i16 %p.0 to i32 | ||
br label %cond.end | ||
|
||
cond.end: ; preds = %cond.false, %cond.true | ||
%cond = phi i32 [ %conv6, %cond.true ], [ %conv7, %cond.false ] | ||
%conv8 = trunc i32 %cond to i16 | ||
br label %for.inc | ||
|
||
for.inc: ; preds = %cond.end | ||
%inc = add nsw i32 %i.0, 1 | ||
br label %for.cond | ||
|
||
for.end: ; preds = %for.cond | ||
ret i16 %p.0 | ||
} |