Skip to content

Commit

Permalink
[InstCombine] fold sub of min/max intrinsics with invertible ops
Browse files Browse the repository at this point in the history
This is a translation of the existing code to handle the intrinsics
and another step towards D98152.

https://alive2.llvm.org/ce/z/jA7eBC

This pattern is already handled by underlying folds if there are
less uses, so the minimal tests in this case have extra uses.

The larger cmyk tests show the motivation - when combined with
other folds, we invert a larger sequence and eliminate 'not' ops.
  • Loading branch information
rotateright committed Sep 11, 2021
1 parent c55e021 commit 28afaed
Show file tree
Hide file tree
Showing 3 changed files with 112 additions and 117 deletions.
33 changes: 25 additions & 8 deletions llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
Expand Up @@ -2057,12 +2057,31 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
return BinaryOperator::CreateAnd(
Op0, Builder.CreateNot(Y, Y->getName() + ".not"));

// ~X - Min/Max(~X, O) -> Max/Min(X, ~O) - X
// ~X - Min/Max(O, ~X) -> Max/Min(X, ~O) - X
// Min/Max(~X, O) - ~X -> A - Max/Min(X, ~O)
// Min/Max(O, ~X) - ~X -> A - Max/Min(X, ~O)
// So long as O here is freely invertible, this will be neutral or a win.
// Note: We don't generate the inverse max/min, just create the 'not' of
// it and let other folds do the rest.
if (match(Op0, m_Not(m_Value(X))) &&
match(Op1, m_c_MaxOrMin(m_Specific(Op0), m_Value(Y))) &&
!Op0->hasNUsesOrMore(3) && isFreeToInvert(Y, Y->hasOneUse())) {
Value *Not = Builder.CreateNot(Op1);
return BinaryOperator::CreateSub(Not, X);
}
if (match(Op1, m_Not(m_Value(X))) &&
match(Op0, m_c_MaxOrMin(m_Specific(Op1), m_Value(Y))) &&
!Op1->hasNUsesOrMore(3) && isFreeToInvert(Y, Y->hasOneUse())) {
Value *Not = Builder.CreateNot(Op0);
return BinaryOperator::CreateSub(X, Not);
}

// TODO: This is the same logic as above but handles the cmp-select idioms
// for min/max, so the use checks are increased to account for the
// extra instructions. If we canonicalize to intrinsics, this block
// can likely be removed.
{
// ~A - Min/Max(~A, O) -> Max/Min(A, ~O) - A
// ~A - Min/Max(O, ~A) -> Max/Min(A, ~O) - A
// Min/Max(~A, O) - ~A -> A - Max/Min(A, ~O)
// Min/Max(O, ~A) - ~A -> A - Max/Min(A, ~O)
// So long as O here is freely invertible, this will be neutral or a win.
Value *LHS, *RHS, *A;
Value *NotA = Op0, *MinMax = Op1;
SelectPatternFlavor SPF = matchSelectPattern(MinMax, LHS, RHS).Flavor;
Expand All @@ -2076,11 +2095,9 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
if (NotA == LHS)
std::swap(LHS, RHS);
// LHS is now O above and expected to have at least 2 uses (the min/max)
// NotA is epected to have 2 uses from the min/max and 1 from the sub.
// NotA is expected to have 2 uses from the min/max and 1 from the sub.
if (isFreeToInvert(LHS, !LHS->hasNUsesOrMore(3)) &&
!NotA->hasNUsesOrMore(4)) {
// Note: We don't generate the inverse max/min, just create the not of
// it and let other folds do the rest.
Value *Not = Builder.CreateNot(MinMax);
if (NotA == Op0)
return BinaryOperator::CreateSub(Not, A);
Expand Down
168 changes: 72 additions & 96 deletions llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
Expand Up @@ -1569,14 +1569,12 @@ declare void @use4(i8, i8, i8, i8)

define void @cmyk(i8 %r, i8 %g, i8 %b) {
; CHECK-LABEL: @cmyk(
; CHECK-NEXT: [[NOTR:%.*]] = xor i8 [[R:%.*]], -1
; CHECK-NEXT: [[NOTG:%.*]] = xor i8 [[G:%.*]], -1
; CHECK-NEXT: [[NOTB:%.*]] = xor i8 [[B:%.*]], -1
; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[NOTR]], i8 [[NOTG]])
; CHECK-NEXT: [[K:%.*]] = call i8 @llvm.smin.i8(i8 [[M]], i8 [[NOTB]])
; CHECK-NEXT: [[CK:%.*]] = sub i8 [[NOTR]], [[K]]
; CHECK-NEXT: [[MK:%.*]] = sub i8 [[NOTG]], [[K]]
; CHECK-NEXT: [[YK:%.*]] = sub i8 [[NOTB]], [[K]]
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[R:%.*]], i8 [[G:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.smax.i8(i8 [[B:%.*]], i8 [[TMP1]])
; CHECK-NEXT: [[K:%.*]] = xor i8 [[TMP2]], -1
; CHECK-NEXT: [[CK:%.*]] = sub i8 [[TMP2]], [[R]]
; CHECK-NEXT: [[MK:%.*]] = sub i8 [[TMP2]], [[G]]
; CHECK-NEXT: [[YK:%.*]] = sub i8 [[TMP2]], [[B]]
; CHECK-NEXT: call void @use4(i8 [[CK]], i8 [[MK]], i8 [[YK]], i8 [[K]])
; CHECK-NEXT: ret void
;
Expand All @@ -1596,14 +1594,12 @@ define void @cmyk(i8 %r, i8 %g, i8 %b) {

define void @cmyk_commute1(i8 %r, i8 %g, i8 %b) {
; CHECK-LABEL: @cmyk_commute1(
; CHECK-NEXT: [[NOTR:%.*]] = xor i8 [[R:%.*]], -1
; CHECK-NEXT: [[NOTG:%.*]] = xor i8 [[G:%.*]], -1
; CHECK-NEXT: [[NOTB:%.*]] = xor i8 [[B:%.*]], -1
; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[NOTR]], i8 [[NOTG]])
; CHECK-NEXT: [[K:%.*]] = call i8 @llvm.smin.i8(i8 [[NOTB]], i8 [[M]])
; CHECK-NEXT: [[CK:%.*]] = sub i8 [[NOTR]], [[K]]
; CHECK-NEXT: [[MK:%.*]] = sub i8 [[NOTG]], [[K]]
; CHECK-NEXT: [[YK:%.*]] = sub i8 [[NOTB]], [[K]]
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[R:%.*]], i8 [[G:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.smax.i8(i8 [[B:%.*]], i8 [[TMP1]])
; CHECK-NEXT: [[K:%.*]] = xor i8 [[TMP2]], -1
; CHECK-NEXT: [[CK:%.*]] = sub i8 [[TMP2]], [[R]]
; CHECK-NEXT: [[MK:%.*]] = sub i8 [[TMP2]], [[G]]
; CHECK-NEXT: [[YK:%.*]] = sub i8 [[TMP2]], [[B]]
; CHECK-NEXT: call void @use4(i8 [[CK]], i8 [[MK]], i8 [[YK]], i8 [[K]])
; CHECK-NEXT: ret void
;
Expand All @@ -1623,14 +1619,12 @@ define void @cmyk_commute1(i8 %r, i8 %g, i8 %b) {

define void @cmyk_commute2(i8 %r, i8 %g, i8 %b) {
; CHECK-LABEL: @cmyk_commute2(
; CHECK-NEXT: [[NOTR:%.*]] = xor i8 [[R:%.*]], -1
; CHECK-NEXT: [[NOTG:%.*]] = xor i8 [[G:%.*]], -1
; CHECK-NEXT: [[NOTB:%.*]] = xor i8 [[B:%.*]], -1
; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[NOTG]], i8 [[NOTR]])
; CHECK-NEXT: [[K:%.*]] = call i8 @llvm.smin.i8(i8 [[NOTB]], i8 [[M]])
; CHECK-NEXT: [[CK:%.*]] = sub i8 [[NOTR]], [[K]]
; CHECK-NEXT: [[MK:%.*]] = sub i8 [[NOTG]], [[K]]
; CHECK-NEXT: [[YK:%.*]] = sub i8 [[NOTB]], [[K]]
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[G:%.*]], i8 [[R:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.smax.i8(i8 [[B:%.*]], i8 [[TMP1]])
; CHECK-NEXT: [[K:%.*]] = xor i8 [[TMP2]], -1
; CHECK-NEXT: [[CK:%.*]] = sub i8 [[TMP2]], [[R]]
; CHECK-NEXT: [[MK:%.*]] = sub i8 [[TMP2]], [[G]]
; CHECK-NEXT: [[YK:%.*]] = sub i8 [[TMP2]], [[B]]
; CHECK-NEXT: call void @use4(i8 [[CK]], i8 [[MK]], i8 [[YK]], i8 [[K]])
; CHECK-NEXT: ret void
;
Expand All @@ -1650,14 +1644,12 @@ define void @cmyk_commute2(i8 %r, i8 %g, i8 %b) {

define void @cmyk_commute3(i8 %r, i8 %g, i8 %b) {
; CHECK-LABEL: @cmyk_commute3(
; CHECK-NEXT: [[NOTR:%.*]] = xor i8 [[R:%.*]], -1
; CHECK-NEXT: [[NOTG:%.*]] = xor i8 [[G:%.*]], -1
; CHECK-NEXT: [[NOTB:%.*]] = xor i8 [[B:%.*]], -1
; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[NOTG]], i8 [[NOTR]])
; CHECK-NEXT: [[K:%.*]] = call i8 @llvm.smin.i8(i8 [[M]], i8 [[NOTB]])
; CHECK-NEXT: [[CK:%.*]] = sub i8 [[NOTR]], [[K]]
; CHECK-NEXT: [[MK:%.*]] = sub i8 [[NOTG]], [[K]]
; CHECK-NEXT: [[YK:%.*]] = sub i8 [[NOTB]], [[K]]
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[G:%.*]], i8 [[R:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.smax.i8(i8 [[B:%.*]], i8 [[TMP1]])
; CHECK-NEXT: [[K:%.*]] = xor i8 [[TMP2]], -1
; CHECK-NEXT: [[CK:%.*]] = sub i8 [[TMP2]], [[R]]
; CHECK-NEXT: [[MK:%.*]] = sub i8 [[TMP2]], [[G]]
; CHECK-NEXT: [[YK:%.*]] = sub i8 [[TMP2]], [[B]]
; CHECK-NEXT: call void @use4(i8 [[CK]], i8 [[MK]], i8 [[YK]], i8 [[K]])
; CHECK-NEXT: ret void
;
Expand All @@ -1678,14 +1670,12 @@ define void @cmyk_commute3(i8 %r, i8 %g, i8 %b) {

define void @cmyk_commute4(i8 %r, i8 %g, i8 %b) {
; CHECK-LABEL: @cmyk_commute4(
; CHECK-NEXT: [[NOTR:%.*]] = xor i8 [[R:%.*]], -1
; CHECK-NEXT: [[NOTG:%.*]] = xor i8 [[G:%.*]], -1
; CHECK-NEXT: [[NOTB:%.*]] = xor i8 [[B:%.*]], -1
; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[NOTG]], i8 [[NOTR]])
; CHECK-NEXT: [[K:%.*]] = call i8 @llvm.umin.i8(i8 [[M]], i8 [[NOTB]])
; CHECK-NEXT: [[YK:%.*]] = sub i8 [[NOTB]], [[K]]
; CHECK-NEXT: [[CK:%.*]] = sub i8 [[NOTR]], [[K]]
; CHECK-NEXT: [[MK:%.*]] = sub i8 [[NOTG]], [[K]]
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[G:%.*]], i8 [[R:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.umax.i8(i8 [[B:%.*]], i8 [[TMP1]])
; CHECK-NEXT: [[K:%.*]] = xor i8 [[TMP2]], -1
; CHECK-NEXT: [[YK:%.*]] = sub i8 [[TMP2]], [[B]]
; CHECK-NEXT: [[CK:%.*]] = sub i8 [[TMP2]], [[R]]
; CHECK-NEXT: [[MK:%.*]] = sub i8 [[TMP2]], [[G]]
; CHECK-NEXT: call void @use4(i8 [[CK]], i8 [[MK]], i8 [[YK]], i8 [[K]])
; CHECK-NEXT: ret void
;
Expand All @@ -1706,14 +1696,12 @@ define void @cmyk_commute4(i8 %r, i8 %g, i8 %b) {

define void @cmyk_commute5(i8 %r, i8 %g, i8 %b) {
; CHECK-LABEL: @cmyk_commute5(
; CHECK-NEXT: [[NOTR:%.*]] = xor i8 [[R:%.*]], -1
; CHECK-NEXT: [[NOTG:%.*]] = xor i8 [[G:%.*]], -1
; CHECK-NEXT: [[NOTB:%.*]] = xor i8 [[B:%.*]], -1
; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[NOTG]], i8 [[NOTR]])
; CHECK-NEXT: [[K:%.*]] = call i8 @llvm.smin.i8(i8 [[M]], i8 [[NOTB]])
; CHECK-NEXT: [[CK:%.*]] = sub i8 [[NOTR]], [[K]]
; CHECK-NEXT: [[YK:%.*]] = sub i8 [[NOTB]], [[K]]
; CHECK-NEXT: [[MK:%.*]] = sub i8 [[NOTG]], [[K]]
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smin.i8(i8 [[G:%.*]], i8 [[R:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.smax.i8(i8 [[B:%.*]], i8 [[TMP1]])
; CHECK-NEXT: [[K:%.*]] = xor i8 [[TMP2]], -1
; CHECK-NEXT: [[CK:%.*]] = sub i8 [[TMP2]], [[R]]
; CHECK-NEXT: [[YK:%.*]] = sub i8 [[TMP2]], [[B]]
; CHECK-NEXT: [[MK:%.*]] = sub i8 [[TMP2]], [[G]]
; CHECK-NEXT: call void @use4(i8 [[CK]], i8 [[MK]], i8 [[YK]], i8 [[K]])
; CHECK-NEXT: ret void
;
Expand All @@ -1731,14 +1719,12 @@ define void @cmyk_commute5(i8 %r, i8 %g, i8 %b) {

define void @cmyk_commute6(i8 %r, i8 %g, i8 %b) {
; CHECK-LABEL: @cmyk_commute6(
; CHECK-NEXT: [[NOTR:%.*]] = xor i8 [[R:%.*]], -1
; CHECK-NEXT: [[NOTG:%.*]] = xor i8 [[G:%.*]], -1
; CHECK-NEXT: [[NOTB:%.*]] = xor i8 [[B:%.*]], -1
; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[NOTR]], i8 [[NOTG]])
; CHECK-NEXT: [[K:%.*]] = call i8 @llvm.smin.i8(i8 [[M]], i8 [[NOTB]])
; CHECK-NEXT: [[CK:%.*]] = sub i8 [[K]], [[NOTR]]
; CHECK-NEXT: [[MK:%.*]] = sub i8 [[K]], [[NOTG]]
; CHECK-NEXT: [[YK:%.*]] = sub i8 [[K]], [[NOTB]]
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[R:%.*]], i8 [[G:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.smax.i8(i8 [[B:%.*]], i8 [[TMP1]])
; CHECK-NEXT: [[K:%.*]] = xor i8 [[TMP2]], -1
; CHECK-NEXT: [[CK:%.*]] = sub i8 [[R]], [[TMP2]]
; CHECK-NEXT: [[MK:%.*]] = sub i8 [[G]], [[TMP2]]
; CHECK-NEXT: [[YK:%.*]] = sub i8 [[B]], [[TMP2]]
; CHECK-NEXT: call void @use4(i8 [[CK]], i8 [[MK]], i8 [[YK]], i8 [[K]])
; CHECK-NEXT: ret void
;
Expand All @@ -1758,14 +1744,12 @@ define void @cmyk_commute6(i8 %r, i8 %g, i8 %b) {

define void @cmyk_commute7(i8 %r, i8 %g, i8 %b) {
; CHECK-LABEL: @cmyk_commute7(
; CHECK-NEXT: [[NOTR:%.*]] = xor i8 [[R:%.*]], -1
; CHECK-NEXT: [[NOTG:%.*]] = xor i8 [[G:%.*]], -1
; CHECK-NEXT: [[NOTB:%.*]] = xor i8 [[B:%.*]], -1
; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[NOTR]], i8 [[NOTG]])
; CHECK-NEXT: [[K:%.*]] = call i8 @llvm.smin.i8(i8 [[NOTB]], i8 [[M]])
; CHECK-NEXT: [[CK:%.*]] = sub i8 [[K]], [[NOTR]]
; CHECK-NEXT: [[MK:%.*]] = sub i8 [[K]], [[NOTG]]
; CHECK-NEXT: [[YK:%.*]] = sub i8 [[K]], [[NOTB]]
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[R:%.*]], i8 [[G:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.smax.i8(i8 [[B:%.*]], i8 [[TMP1]])
; CHECK-NEXT: [[K:%.*]] = xor i8 [[TMP2]], -1
; CHECK-NEXT: [[CK:%.*]] = sub i8 [[R]], [[TMP2]]
; CHECK-NEXT: [[MK:%.*]] = sub i8 [[G]], [[TMP2]]
; CHECK-NEXT: [[YK:%.*]] = sub i8 [[B]], [[TMP2]]
; CHECK-NEXT: call void @use4(i8 [[CK]], i8 [[MK]], i8 [[YK]], i8 [[K]])
; CHECK-NEXT: ret void
;
Expand All @@ -1785,14 +1769,12 @@ define void @cmyk_commute7(i8 %r, i8 %g, i8 %b) {

define void @cmyk_commute8(i8 %r, i8 %g, i8 %b) {
; CHECK-LABEL: @cmyk_commute8(
; CHECK-NEXT: [[NOTR:%.*]] = xor i8 [[R:%.*]], -1
; CHECK-NEXT: [[NOTG:%.*]] = xor i8 [[G:%.*]], -1
; CHECK-NEXT: [[NOTB:%.*]] = xor i8 [[B:%.*]], -1
; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[NOTG]], i8 [[NOTR]])
; CHECK-NEXT: [[K:%.*]] = call i8 @llvm.smin.i8(i8 [[NOTB]], i8 [[M]])
; CHECK-NEXT: [[CK:%.*]] = sub i8 [[K]], [[NOTR]]
; CHECK-NEXT: [[MK:%.*]] = sub i8 [[K]], [[NOTG]]
; CHECK-NEXT: [[YK:%.*]] = sub i8 [[K]], [[NOTB]]
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[G:%.*]], i8 [[R:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.smax.i8(i8 [[B:%.*]], i8 [[TMP1]])
; CHECK-NEXT: [[K:%.*]] = xor i8 [[TMP2]], -1
; CHECK-NEXT: [[CK:%.*]] = sub i8 [[R]], [[TMP2]]
; CHECK-NEXT: [[MK:%.*]] = sub i8 [[G]], [[TMP2]]
; CHECK-NEXT: [[YK:%.*]] = sub i8 [[B]], [[TMP2]]
; CHECK-NEXT: call void @use4(i8 [[CK]], i8 [[MK]], i8 [[YK]], i8 [[K]])
; CHECK-NEXT: ret void
;
Expand All @@ -1812,14 +1794,12 @@ define void @cmyk_commute8(i8 %r, i8 %g, i8 %b) {

define void @cmyk_commute9(i8 %r, i8 %g, i8 %b) {
; CHECK-LABEL: @cmyk_commute9(
; CHECK-NEXT: [[NOTR:%.*]] = xor i8 [[R:%.*]], -1
; CHECK-NEXT: [[NOTG:%.*]] = xor i8 [[G:%.*]], -1
; CHECK-NEXT: [[NOTB:%.*]] = xor i8 [[B:%.*]], -1
; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[NOTG]], i8 [[NOTR]])
; CHECK-NEXT: [[K:%.*]] = call i8 @llvm.smin.i8(i8 [[M]], i8 [[NOTB]])
; CHECK-NEXT: [[CK:%.*]] = sub i8 [[K]], [[NOTR]]
; CHECK-NEXT: [[MK:%.*]] = sub i8 [[K]], [[NOTG]]
; CHECK-NEXT: [[YK:%.*]] = sub i8 [[K]], [[NOTB]]
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[G:%.*]], i8 [[R:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.smax.i8(i8 [[B:%.*]], i8 [[TMP1]])
; CHECK-NEXT: [[K:%.*]] = xor i8 [[TMP2]], -1
; CHECK-NEXT: [[CK:%.*]] = sub i8 [[R]], [[TMP2]]
; CHECK-NEXT: [[MK:%.*]] = sub i8 [[G]], [[TMP2]]
; CHECK-NEXT: [[YK:%.*]] = sub i8 [[B]], [[TMP2]]
; CHECK-NEXT: call void @use4(i8 [[CK]], i8 [[MK]], i8 [[YK]], i8 [[K]])
; CHECK-NEXT: ret void
;
Expand All @@ -1840,14 +1820,12 @@ define void @cmyk_commute9(i8 %r, i8 %g, i8 %b) {

define void @cmyk_commute10(i8 %r, i8 %g, i8 %b) {
; CHECK-LABEL: @cmyk_commute10(
; CHECK-NEXT: [[NOTR:%.*]] = xor i8 [[R:%.*]], -1
; CHECK-NEXT: [[NOTG:%.*]] = xor i8 [[G:%.*]], -1
; CHECK-NEXT: [[NOTB:%.*]] = xor i8 [[B:%.*]], -1
; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[NOTG]], i8 [[NOTR]])
; CHECK-NEXT: [[K:%.*]] = call i8 @llvm.umin.i8(i8 [[M]], i8 [[NOTB]])
; CHECK-NEXT: [[YK:%.*]] = sub i8 [[NOTB]], [[K]]
; CHECK-NEXT: [[CK:%.*]] = sub i8 [[K]], [[NOTR]]
; CHECK-NEXT: [[MK:%.*]] = sub i8 [[NOTG]], [[K]]
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[G:%.*]], i8 [[R:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.umax.i8(i8 [[B:%.*]], i8 [[TMP1]])
; CHECK-NEXT: [[K:%.*]] = xor i8 [[TMP2]], -1
; CHECK-NEXT: [[YK:%.*]] = sub i8 [[TMP2]], [[B]]
; CHECK-NEXT: [[CK:%.*]] = sub i8 [[R]], [[TMP2]]
; CHECK-NEXT: [[MK:%.*]] = sub i8 [[TMP2]], [[G]]
; CHECK-NEXT: call void @use4(i8 [[CK]], i8 [[MK]], i8 [[YK]], i8 [[K]])
; CHECK-NEXT: ret void
;
Expand All @@ -1868,14 +1846,12 @@ define void @cmyk_commute10(i8 %r, i8 %g, i8 %b) {

define void @cmyk_commute11(i8 %r, i8 %g, i8 %b) {
; CHECK-LABEL: @cmyk_commute11(
; CHECK-NEXT: [[NOTR:%.*]] = xor i8 [[R:%.*]], -1
; CHECK-NEXT: [[NOTG:%.*]] = xor i8 [[G:%.*]], -1
; CHECK-NEXT: [[NOTB:%.*]] = xor i8 [[B:%.*]], -1
; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[NOTG]], i8 [[NOTR]])
; CHECK-NEXT: [[K:%.*]] = call i8 @llvm.smin.i8(i8 [[M]], i8 [[NOTB]])
; CHECK-NEXT: [[CK:%.*]] = sub i8 [[NOTR]], [[K]]
; CHECK-NEXT: [[YK:%.*]] = sub i8 [[K]], [[NOTB]]
; CHECK-NEXT: [[MK:%.*]] = sub i8 [[K]], [[NOTG]]
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smin.i8(i8 [[G:%.*]], i8 [[R:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.smax.i8(i8 [[B:%.*]], i8 [[TMP1]])
; CHECK-NEXT: [[K:%.*]] = xor i8 [[TMP2]], -1
; CHECK-NEXT: [[CK:%.*]] = sub i8 [[TMP2]], [[R]]
; CHECK-NEXT: [[YK:%.*]] = sub i8 [[B]], [[TMP2]]
; CHECK-NEXT: [[MK:%.*]] = sub i8 [[G]], [[TMP2]]
; CHECK-NEXT: call void @use4(i8 [[CK]], i8 [[MK]], i8 [[YK]], i8 [[K]])
; CHECK-NEXT: ret void
;
Expand Down
28 changes: 15 additions & 13 deletions llvm/test/Transforms/InstCombine/sub-minmax.ll
Expand Up @@ -393,20 +393,20 @@ define void @umin3_not_all_ops_extra_uses_invert_subs(i8 %x, i8 %y, i8 %z) {
ret void
}

; TODO: Handle this pattern with extra uses because it shows up in benchmarks.
; Handle this pattern with extra uses because it shows up in benchmarks.
; ~X - Min/Max(~X, O) -> Max/Min(X, ~O) - X
; ~X - Min/Max(O, ~X) -> Max/Min(X, ~O) - X
; Min/Max(~X, O) - ~X -> A - Max/Min(X, ~O)
; Min/Max(O, ~X) - ~X -> A - Max/Min(X, ~O)

define i8 @umin_not_sub_intrinsic_commute0(i8 %x, i8 %y) {
; CHECK-LABEL: @umin_not_sub_intrinsic_commute0(
; CHECK-NEXT: [[NX:%.*]] = xor i8 [[X:%.*]], -1
; CHECK-NEXT: [[NY:%.*]] = xor i8 [[Y:%.*]], -1
; CHECK-NEXT: call void @use8(i8 [[NY]])
; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[NX]], i8 [[NY]])
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 [[Y]])
; CHECK-NEXT: [[M:%.*]] = xor i8 [[TMP1]], -1
; CHECK-NEXT: call void @use8(i8 [[M]])
; CHECK-NEXT: [[SUBX:%.*]] = sub i8 [[NX]], [[M]]
; CHECK-NEXT: [[SUBX:%.*]] = sub i8 [[TMP1]], [[X]]
; CHECK-NEXT: ret i8 [[SUBX]]
;
%nx = xor i8 %x, -1
Expand All @@ -420,12 +420,12 @@ define i8 @umin_not_sub_intrinsic_commute0(i8 %x, i8 %y) {

define i8 @umax_not_sub_intrinsic_commute1(i8 %x, i8 %y) {
; CHECK-LABEL: @umax_not_sub_intrinsic_commute1(
; CHECK-NEXT: [[NX:%.*]] = xor i8 [[X:%.*]], -1
; CHECK-NEXT: [[NY:%.*]] = xor i8 [[Y:%.*]], -1
; CHECK-NEXT: call void @use8(i8 [[NY]])
; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[NY]], i8 [[NX]])
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.umin.i8(i8 [[X:%.*]], i8 [[Y]])
; CHECK-NEXT: [[M:%.*]] = xor i8 [[TMP1]], -1
; CHECK-NEXT: call void @use8(i8 [[M]])
; CHECK-NEXT: [[SUBX:%.*]] = sub i8 [[NX]], [[M]]
; CHECK-NEXT: [[SUBX:%.*]] = sub i8 [[TMP1]], [[X]]
; CHECK-NEXT: ret i8 [[SUBX]]
;
%nx = xor i8 %x, -1
Expand All @@ -439,12 +439,12 @@ define i8 @umax_not_sub_intrinsic_commute1(i8 %x, i8 %y) {

define i8 @smin_not_sub_intrinsic_commute2(i8 %x, i8 %y) {
; CHECK-LABEL: @smin_not_sub_intrinsic_commute2(
; CHECK-NEXT: [[NX:%.*]] = xor i8 [[X:%.*]], -1
; CHECK-NEXT: [[NY:%.*]] = xor i8 [[Y:%.*]], -1
; CHECK-NEXT: call void @use8(i8 [[NY]])
; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[NX]], i8 [[NY]])
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 [[Y]])
; CHECK-NEXT: [[M:%.*]] = xor i8 [[TMP1]], -1
; CHECK-NEXT: call void @use8(i8 [[M]])
; CHECK-NEXT: [[SUBX:%.*]] = sub i8 [[M]], [[NX]]
; CHECK-NEXT: [[SUBX:%.*]] = sub i8 [[X]], [[TMP1]]
; CHECK-NEXT: ret i8 [[SUBX]]
;
%nx = xor i8 %x, -1
Expand All @@ -458,12 +458,12 @@ define i8 @smin_not_sub_intrinsic_commute2(i8 %x, i8 %y) {

define i8 @smax_not_sub_intrinsic_commute3(i8 %x, i8 %y) {
; CHECK-LABEL: @smax_not_sub_intrinsic_commute3(
; CHECK-NEXT: [[NX:%.*]] = xor i8 [[X:%.*]], -1
; CHECK-NEXT: [[NY:%.*]] = xor i8 [[Y:%.*]], -1
; CHECK-NEXT: call void @use8(i8 [[NY]])
; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[NY]], i8 [[NX]])
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smin.i8(i8 [[X:%.*]], i8 [[Y]])
; CHECK-NEXT: [[M:%.*]] = xor i8 [[TMP1]], -1
; CHECK-NEXT: call void @use8(i8 [[M]])
; CHECK-NEXT: [[SUBX:%.*]] = sub i8 [[M]], [[NX]]
; CHECK-NEXT: [[SUBX:%.*]] = sub i8 [[X]], [[TMP1]]
; CHECK-NEXT: ret i8 [[SUBX]]
;
%nx = xor i8 %x, -1
Expand All @@ -475,6 +475,8 @@ define i8 @smax_not_sub_intrinsic_commute3(i8 %x, i8 %y) {
ret i8 %subx
}

; negative test - don't increase instruction count

define i8 @umin_not_sub_intrinsic_uses(i8 %x, i8 %y) {
; CHECK-LABEL: @umin_not_sub_intrinsic_uses(
; CHECK-NEXT: [[NX:%.*]] = xor i8 [[X:%.*]], -1
Expand Down

0 comments on commit 28afaed

Please sign in to comment.