Skip to content

Commit

Permalink
[InstCombine] Extend canonicalizeClampLike to handle truncated inputs
Browse files Browse the repository at this point in the history
This extends the canonicalizeClampLike function to allow cases where the
input is truncated, but still matching on the types of the ICmps. For
example
  %t = trunc i32 %X to i8
  %a = add i32 %X, 128
  %cmp = icmp ult i32 %a, 256
  %c = icmp sgt i32 %X, -1
  %f = select i1 %c, i8 High, i8 Low
  %r = select i1 %cmp, i8 %t, i8 %f
becomes
  %c1 = icmp slt i32 %X, -128
  %c2 = icmp sge i32 %X, 128
  %s1 = select i1 %c1, i32 sext(Low), i32 %X
  %s2 = select i1 %c2, i32 sext(High), i32 %s1
  %t = trunc i32 %s2 to i8
https://alive2.llvm.org/ce/z/vPzfxH

We limit the transform to constant High and Low values, where we know
the sext are free.

Differential Revision: https://reviews.llvm.org/D108049
  • Loading branch information
davemgreen committed Oct 28, 2021
1 parent abb2a91 commit 9358384
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 32 deletions.
33 changes: 26 additions & 7 deletions llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1289,8 +1289,8 @@ Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel,
// Iff -C1 s<= C2 s<= C0-C1
// Also ULT predicate can also be UGT iff C0 != -1 (+invert result)
// SLT predicate can also be SGT iff C2 != INT_MAX (+invert res.)
static Instruction *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
InstCombiner::BuilderTy &Builder) {
static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
InstCombiner::BuilderTy &Builder) {
Value *X = Sel0.getTrueValue();
Value *Sel1 = Sel0.getFalseValue();

Expand Down Expand Up @@ -1344,6 +1344,11 @@ static Instruction *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
if (!Sel1->hasOneUse())
return nullptr;

// If the types do not match, look through any truncs to the underlying
// instruction.
if (Cmp00->getType() != X->getType() && X->hasOneUse())
match(X, m_TruncOrSelf(m_Value(X)));

// We now can finish matching the condition of the outermost select:
// it should either be the X itself, or an addition of some constant to X.
Constant *C1;
Expand Down Expand Up @@ -1414,15 +1419,29 @@ static Instruction *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
if (!match(Precond2, m_One()))
return nullptr;

// If we are matching from a truncated input, we need to sext the
// ReplacementLow and ReplacementHigh values. Only do the transform if they
// are free to extend due to being constants.
if (X->getType() != Sel0.getType()) {
Constant *LowC, *HighC;
if (!match(ReplacementLow, m_ImmConstant(LowC)) ||
!match(ReplacementHigh, m_ImmConstant(HighC)))
return nullptr;
ReplacementLow = ConstantExpr::getSExt(LowC, X->getType());
ReplacementHigh = ConstantExpr::getSExt(HighC, X->getType());
}

// All good, finally emit the new pattern.
Value *ShouldReplaceLow = Builder.CreateICmpSLT(X, ThresholdLowIncl);
Value *ShouldReplaceHigh = Builder.CreateICmpSGE(X, ThresholdHighExcl);
Value *MaybeReplacedLow =
Builder.CreateSelect(ShouldReplaceLow, ReplacementLow, X);
Instruction *MaybeReplacedHigh =
SelectInst::Create(ShouldReplaceHigh, ReplacementHigh, MaybeReplacedLow);

return MaybeReplacedHigh;
// Create the final select. If we looked through a truncate above, we will
// need to retruncate the result.
Value *MaybeReplacedHigh = Builder.CreateSelect(
ShouldReplaceHigh, ReplacementHigh, MaybeReplacedLow);
return Builder.CreateTrunc(MaybeReplacedHigh, Sel0.getType());
}

// If we have
Expand Down Expand Up @@ -1513,8 +1532,8 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,
if (Instruction *NewAbs = canonicalizeAbsNabs(SI, *ICI, *this))
return NewAbs;

if (Instruction *NewAbs = canonicalizeClampLike(SI, *ICI, Builder))
return NewAbs;
if (Value *V = canonicalizeClampLike(SI, *ICI, Builder))
return replaceInstUsesWith(SI, V);

if (Instruction *NewSel =
tryToReuseConstantFromSelectInComparison(SI, *ICI, *this))
Expand Down
114 changes: 89 additions & 25 deletions llvm/test/Transforms/InstCombine/truncating-saturate.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"

declare void @use(i32)
declare void @use16(i16)
declare void @use1(i1)

define i8 @testi16i8(i16 %add) {
Expand Down Expand Up @@ -57,13 +58,12 @@ define i32 @testi64i32(i64 %add) {

define i16 @testi32i16i8(i32 %add) {
; CHECK-LABEL: @testi32i16i8(
; CHECK-NEXT: [[A:%.*]] = add i32 [[ADD:%.*]], 128
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[A]], 256
; CHECK-NEXT: [[T:%.*]] = trunc i32 [[ADD]] to i16
; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[ADD]], -1
; CHECK-NEXT: [[F:%.*]] = select i1 [[C]], i16 127, i16 -128
; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i16 [[T]], i16 [[F]]
; CHECK-NEXT: ret i16 [[R]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[ADD:%.*]], -128
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[ADD]], i32 -128
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 127
; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 127
; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
; CHECK-NEXT: ret i16 [[TMP5]]
;
%a = add i32 %add, 128
%cmp = icmp ult i32 %a, 256
Expand All @@ -76,13 +76,12 @@ define i16 @testi32i16i8(i32 %add) {

define <4 x i16> @testv4i32i16i8(<4 x i32> %add) {
; CHECK-LABEL: @testv4i32i16i8(
; CHECK-NEXT: [[A:%.*]] = add <4 x i32> [[ADD:%.*]], <i32 128, i32 128, i32 128, i32 128>
; CHECK-NEXT: [[CMP:%.*]] = icmp ult <4 x i32> [[A]], <i32 256, i32 256, i32 256, i32 256>
; CHECK-NEXT: [[T:%.*]] = trunc <4 x i32> [[ADD]] to <4 x i16>
; CHECK-NEXT: [[C:%.*]] = icmp sgt <4 x i32> [[ADD]], <i32 -1, i32 -1, i32 -1, i32 -1>
; CHECK-NEXT: [[F:%.*]] = select <4 x i1> [[C]], <4 x i16> <i16 127, i16 127, i16 127, i16 127>, <4 x i16> <i16 -128, i16 -128, i16 -128, i16 -128>
; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[CMP]], <4 x i16> [[T]], <4 x i16> [[F]]
; CHECK-NEXT: ret <4 x i16> [[R]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[ADD:%.*]], <i32 -128, i32 -128, i32 -128, i32 -128>
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[ADD]], <4 x i32> <i32 -128, i32 -128, i32 -128, i32 -128>
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[TMP2]], <i32 127, i32 127, i32 127, i32 127>
; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP2]], <4 x i32> <i32 127, i32 127, i32 127, i32 127>
; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i16>
; CHECK-NEXT: ret <4 x i16> [[TMP5]]
;
%a = add <4 x i32> %add, <i32 128, i32 128, i32 128, i32 128>
%cmp = icmp ult <4 x i32> %a, <i32 256, i32 256, i32 256, i32 256>
Expand All @@ -98,8 +97,8 @@ define i32 @testi32i32i8(i32 %add) {
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[ADD:%.*]], -128
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[ADD]], i32 -128
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 127
; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 127
; CHECK-NEXT: ret i32 [[R]]
; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 127
; CHECK-NEXT: ret i32 [[TMP4]]
;
%a = add i32 %add, 128
%cmp = icmp ult i32 %a, 256
Expand All @@ -115,8 +114,8 @@ define i16 @test_truncfirst(i32 %add) {
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i16 [[T]], -128
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i16 [[T]], i16 -128
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i16 [[TMP2]], 127
; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP3]], i16 [[TMP2]], i16 127
; CHECK-NEXT: ret i16 [[R]]
; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i16 [[TMP2]], i16 127
; CHECK-NEXT: ret i16 [[TMP4]]
;
%t = trunc i32 %add to i16
%a = add i16 %t, 128
Expand Down Expand Up @@ -310,13 +309,12 @@ define i8 @testi32i8(i32 %add) {

define i16 @differentconsts(i32 %x, i16 %replacement_low, i16 %replacement_high) {
; CHECK-LABEL: @differentconsts(
; CHECK-NEXT: [[T0:%.*]] = icmp slt i32 [[X:%.*]], 128
; CHECK-NEXT: [[T1:%.*]] = select i1 [[T0]], i16 256, i16 -1
; CHECK-NEXT: [[T2:%.*]] = add i32 [[X]], 16
; CHECK-NEXT: [[T3:%.*]] = icmp ult i32 [[T2]], 144
; CHECK-NEXT: [[T4:%.*]] = trunc i32 [[X]] to i16
; CHECK-NEXT: [[R:%.*]] = select i1 [[T3]], i16 [[T4]], i16 [[T1]]
; CHECK-NEXT: ret i16 [[R]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], -16
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[X]], 127
; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[X]] to i16
; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], i16 256, i16 [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], i16 -1, i16 [[TMP4]]
; CHECK-NEXT: ret i16 [[TMP5]]
;
%t0 = icmp slt i32 %x, 128
%t1 = select i1 %t0, i16 256, i16 65535
Expand Down Expand Up @@ -570,6 +568,72 @@ define i32 @oneusethree(i64 %add) {
ret i32 %cond.i
}

define i16 @differentconsts_usetrunc(i32 %x, i16 %replacement_low, i16 %replacement_high) {
; CHECK-LABEL: @differentconsts_usetrunc(
; CHECK-NEXT: [[T0:%.*]] = icmp slt i32 [[X:%.*]], 128
; CHECK-NEXT: [[T1:%.*]] = select i1 [[T0]], i16 256, i16 -1
; CHECK-NEXT: [[T2:%.*]] = add i32 [[X]], 16
; CHECK-NEXT: [[T3:%.*]] = icmp ult i32 [[T2]], 144
; CHECK-NEXT: [[T4:%.*]] = trunc i32 [[X]] to i16
; CHECK-NEXT: [[R:%.*]] = select i1 [[T3]], i16 [[T4]], i16 [[T1]]
; CHECK-NEXT: call void @use16(i16 [[T4]])
; CHECK-NEXT: ret i16 [[R]]
;
%t0 = icmp slt i32 %x, 128
%t1 = select i1 %t0, i16 256, i16 65535
%t2 = add i32 %x, 16
%t3 = icmp ult i32 %t2, 144
%t4 = trunc i32 %x to i16
%r = select i1 %t3, i16 %t4, i16 %t1
call void @use16(i16 %t4)
ret i16 %r
}

define i16 @differentconsts_useadd(i32 %x, i16 %replacement_low, i16 %replacement_high) {
; CHECK-LABEL: @differentconsts_useadd(
; CHECK-NEXT: [[T2:%.*]] = add i32 [[X:%.*]], 16
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X]], -16
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[X]], 127
; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[X]] to i16
; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], i16 256, i16 [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], i16 -1, i16 [[TMP4]]
; CHECK-NEXT: call void @use(i32 [[T2]])
; CHECK-NEXT: ret i16 [[TMP5]]
;
%t0 = icmp slt i32 %x, 128
%t1 = select i1 %t0, i16 256, i16 65535
%t2 = add i32 %x, 16
%t3 = icmp ult i32 %t2, 144
%t4 = trunc i32 %x to i16
%r = select i1 %t3, i16 %t4, i16 %t1
call void @use(i32 %t2)
ret i16 %r
}

define i16 @differentconsts_useaddtrunc(i32 %x, i16 %replacement_low, i16 %replacement_high) {
; CHECK-LABEL: @differentconsts_useaddtrunc(
; CHECK-NEXT: [[T0:%.*]] = icmp slt i32 [[X:%.*]], 128
; CHECK-NEXT: [[T1:%.*]] = select i1 [[T0]], i16 256, i16 -1
; CHECK-NEXT: [[T2:%.*]] = add i32 [[X]], 16
; CHECK-NEXT: [[T3:%.*]] = icmp ult i32 [[T2]], 144
; CHECK-NEXT: [[T4:%.*]] = trunc i32 [[X]] to i16
; CHECK-NEXT: [[R:%.*]] = select i1 [[T3]], i16 [[T4]], i16 [[T1]]
; CHECK-NEXT: call void @use16(i16 [[T4]])
; CHECK-NEXT: call void @use(i32 [[T2]])
; CHECK-NEXT: ret i16 [[R]]
;
%t0 = icmp slt i32 %x, 128
%t1 = select i1 %t0, i16 256, i16 65535
%t2 = add i32 %x, 16
%t3 = icmp ult i32 %t2, 144
%t4 = trunc i32 %x to i16
%r = select i1 %t3, i16 %t4, i16 %t1
call void @use16(i16 %t4)
call void @use(i32 %t2)
ret i16 %r
}


define i8 @C0zero(i8 %X, i8 %y, i8 %z) {
; CHECK-LABEL: @C0zero(
; CHECK-NEXT: [[C:%.*]] = icmp slt i8 [[X:%.*]], -10
Expand Down

0 comments on commit 9358384

Please sign in to comment.