Skip to content

Commit

Permalink
[InstCombine] reduce even more unsigned saturated add with 'not' op
Browse files Browse the repository at this point in the history
We want to use the sum in the icmp to allow matching with
m_UAddWithOverflow and eliminate the 'not'. This is discussed
in D51929 and is another step towards solving PR14613:
https://bugs.llvm.org/show_bug.cgi?id=14613

  Name: uaddsat, -1 fval
  %notx = xor i32 %x, -1
  %a = add i32 %x, %y
  %c = icmp ugt i32 %notx, %y
  %r = select i1 %c, i32 %a, i32 -1
  =>
  %a = add i32 %x, %y
  %c2 = icmp ugt i32 %y, %a
  %r = select i1 %c2, i32 -1, i32 %a

  Name: uaddsat, -1 fval + ult
  %notx = xor i32 %x, -1
  %a = add i32 %x, %y
  %c = icmp ult i32 %y, %notx
  %r = select i1 %c, i32 %a, i32 -1
  =>
  %a = add i32 %x, %y
  %c2 = icmp ugt i32 %y, %a
  %r = select i1 %c2, i32 -1, i32 %a

https://rise4fun.com/Alive/nTp

llvm-svn: 354393
  • Loading branch information
rotateright committed Feb 19, 2019
1 parent bf223e9 commit c1e0184
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 23 deletions.
18 changes: 13 additions & 5 deletions llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
Expand Up @@ -695,22 +695,30 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
return Builder.CreateSelect(NewCmp, FVal, TVal);
}

// Canonicalize to 'ULT' to simplify matching below.
// Match unsigned saturated add of 2 variables with an unnecessary 'not'.
// There are 8 commuted variants.
// Canonicalize -1 (saturated result) to true value of the select.
if (match(FVal, m_AllOnes())) {
std::swap(TVal, FVal);
std::swap(Cmp0, Cmp1);
}
if (!match(TVal, m_AllOnes()))
return nullptr;

// Canonicalize predicate to 'ULT'.
if (Pred == ICmpInst::ICMP_UGT) {
Pred = ICmpInst::ICMP_ULT;
std::swap(Cmp0, Cmp1);
}

if (Pred != ICmpInst::ICMP_ULT)
return nullptr;

// Match unsigned saturated add of 2 variables with an unnecessary 'not'.
// TODO: There are more variations of this pattern.
Value *Y;
if (match(TVal, m_AllOnes()) && match(Cmp0, m_Not(m_Value(X))) &&
if (match(Cmp0, m_Not(m_Value(X))) &&
match(FVal, m_c_Add(m_Specific(X), m_Value(Y))) && Y == Cmp1) {
// Change the comparison to use the sum (false value of the select). That is
// the canonical pattern match form for uadd.with.overflow and eliminates a
// a canonical pattern match form for uadd.with.overflow and eliminates a
// use of the 'not' op:
// (~X u< Y) ? -1 : (X + Y) --> ((X + Y) u< Y) ? -1 : (X + Y)
// (~X u< Y) ? -1 : (Y + X) --> ((Y + X) u< Y) ? -1 : (Y + X)
Expand Down
32 changes: 14 additions & 18 deletions llvm/test/Transforms/InstCombine/saturating-add-sub.ll
Expand Up @@ -706,11 +706,10 @@ define <2 x i32> @uadd_sat_ugt_commute_add(<2 x i32> %xp, <2 x i32> %yp) {
define i32 @uadd_sat_commute_select(i32 %x, i32 %yp) {
; CHECK-LABEL: @uadd_sat_commute_select(
; CHECK-NEXT: [[Y:%.*]] = sdiv i32 [[YP:%.*]], 2442
; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X:%.*]], -1
; CHECK-NEXT: [[A:%.*]] = add i32 [[Y]], [[X]]
; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[Y]], [[NOTX]]
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 -1
; CHECK-NEXT: ret i32 [[R]]
; CHECK-NEXT: [[A:%.*]] = add i32 [[Y]], [[X:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
; CHECK-NEXT: ret i32 [[TMP2]]
;
%y = sdiv i32 %yp, 2442 ; thwart complexity-based-canonicalization
%notx = xor i32 %x, -1
Expand All @@ -724,11 +723,10 @@ define i32 @uadd_sat_commute_select_commute_add(i32 %xp, i32 %yp) {
; CHECK-LABEL: @uadd_sat_commute_select_commute_add(
; CHECK-NEXT: [[X:%.*]] = urem i32 42, [[XP:%.*]]
; CHECK-NEXT: [[Y:%.*]] = sdiv i32 [[YP:%.*]], 2442
; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X]], -1
; CHECK-NEXT: [[A:%.*]] = add nsw i32 [[X]], [[Y]]
; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[Y]], [[NOTX]]
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 -1
; CHECK-NEXT: ret i32 [[R]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
; CHECK-NEXT: ret i32 [[TMP2]]
;
%x = urem i32 42, %xp ; thwart complexity-based-canonicalization
%y = sdiv i32 %yp, 2442 ; thwart complexity-based-canonicalization
Expand All @@ -741,11 +739,10 @@ define i32 @uadd_sat_commute_select_commute_add(i32 %xp, i32 %yp) {

define <2 x i32> @uadd_sat_commute_select_ugt(<2 x i32> %x, <2 x i32> %y) {
; CHECK-LABEL: @uadd_sat_commute_select_ugt(
; CHECK-NEXT: [[NOTX:%.*]] = xor <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[Y:%.*]], [[X]]
; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i32> [[NOTX]], [[Y]]
; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C]], <2 x i32> [[A]], <2 x i32> <i32 -1, i32 -1>
; CHECK-NEXT: ret <2 x i32> [[R]]
; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i32> [[A]], [[Y]]
; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> <i32 -1, i32 -1>, <2 x i32> [[A]]
; CHECK-NEXT: ret <2 x i32> [[TMP2]]
;
%notx = xor <2 x i32> %x, <i32 -1, i32 -1>
%a = add <2 x i32> %y, %x
Expand All @@ -757,11 +754,10 @@ define <2 x i32> @uadd_sat_commute_select_ugt(<2 x i32> %x, <2 x i32> %y) {
define i32 @uadd_sat_commute_select_ugt_commute_add(i32 %xp, i32 %y) {
; CHECK-LABEL: @uadd_sat_commute_select_ugt_commute_add(
; CHECK-NEXT: [[X:%.*]] = srem i32 42, [[XP:%.*]]
; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X]], -1
; CHECK-NEXT: [[A:%.*]] = add i32 [[X]], [[Y:%.*]]
; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[NOTX]], [[Y]]
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 -1
; CHECK-NEXT: ret i32 [[R]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
; CHECK-NEXT: ret i32 [[TMP2]]
;
%x = srem i32 42, %xp ; thwart complexity-based-canonicalization
%notx = xor i32 %x, -1
Expand Down

0 comments on commit c1e0184

Please sign in to comment.