Skip to content

Commit

Permalink
[InstCombine] generalize subtract with 'not' operands; 2nd try
Browse files Browse the repository at this point in the history
This is a re-try of 3aa009c which was reverted at
9577fac because it caused an infinite loop.

For the extra test case, either re-ordering the transforms
or adding the extra clause to avoid sub-of-sub is enough
to prevent the infinite compile, but I'm doing both to be
safer.

Original commit message:
The motivation was to get min/max intrinsics to parity
with cmp+select idioms, but this unlocks a few more
folds because isFreeToInvert recognizes add/sub with
constants too.

In the min/max example, we have too many extra uses
for smaller folds to improve things, but this fold
is able to eliminate uses even though we can't reduce
the number of instructions.
  • Loading branch information
rotateright committed Aug 23, 2021
1 parent ba6e15d commit cc9c545
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 17 deletions.
17 changes: 12 additions & 5 deletions llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
Expand Up @@ -1828,12 +1828,8 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
if (match(Op0, m_AllOnes()))
return BinaryOperator::CreateNot(Op1);

// (~X) - (~Y) --> Y - X
Value *X, *Y;
if (match(Op0, m_Not(m_Value(X))) && match(Op1, m_Not(m_Value(Y))))
return BinaryOperator::CreateSub(Y, X);

// (X + -1) - Y --> ~Y + X
Value *X, *Y;
if (match(Op0, m_OneUse(m_Add(m_Value(X), m_AllOnes()))))
return BinaryOperator::CreateAdd(Builder.CreateNot(Op1), X);

Expand All @@ -1854,6 +1850,17 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
return BinaryOperator::CreateSub(X, Add);
}

// (~X) - (~Y) --> Y - X
// This is placed after the other reassociations and explicitly excludes a
// sub-of-sub pattern to avoid infinite looping.
if (isFreeToInvert(Op0, Op0->hasOneUse()) &&
isFreeToInvert(Op1, Op1->hasOneUse()) &&
!match(Op0, m_Sub(m_ImmConstant(), m_Value()))) {
Value *NotOp0 = Builder.CreateNot(Op0);
Value *NotOp1 = Builder.CreateNot(Op1);
return BinaryOperator::CreateSub(NotOp1, NotOp0);
}

auto m_AddRdx = [](Value *&Vec) {
return m_OneUse(m_Intrinsic<Intrinsic::vector_reduce_add>(m_Value(Vec)));
};
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
Expand Up @@ -1157,8 +1157,8 @@ define i8 @freeToInvertSub(i8 %x, i8 %y, i8 %z) {
; CHECK-NEXT: call void @use(i8 [[NX]])
; CHECK-NEXT: call void @use(i8 [[NY]])
; CHECK-NEXT: call void @use(i8 [[NZ]])
; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[NX]], i8 [[NY]])
; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[NZ]], [[M]]
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.umin.i8(i8 [[X]], i8 [[Y]])
; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[TMP1]], [[Z]]
; CHECK-NEXT: ret i8 [[SUB]]
;
%nx = xor i8 %x, -1
Expand Down
5 changes: 2 additions & 3 deletions llvm/test/Transforms/InstCombine/reassociate-nuw.ll
Expand Up @@ -79,9 +79,8 @@ define i32 @reassoc_x2_mul_nuw(i32 %x, i32 %y) {

define i32 @reassoc_x2_sub_nuw(i32 %x, i32 %y) {
; CHECK-LABEL: @reassoc_x2_sub_nuw(
; CHECK-NEXT: [[SUB0:%.*]] = add i32 [[X:%.*]], -4
; CHECK-NEXT: [[SUB1:%.*]] = add i32 [[Y:%.*]], -8
; CHECK-NEXT: [[SUB2:%.*]] = sub nuw i32 [[SUB0]], [[SUB1]]
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[SUB2:%.*]] = add i32 [[TMP1]], 4
; CHECK-NEXT: ret i32 [[SUB2]]
;
%sub0 = sub nuw i32 %x, 4
Expand Down
9 changes: 2 additions & 7 deletions llvm/test/Transforms/InstCombine/sub.ll
Expand Up @@ -1109,14 +1109,9 @@ define i32 @test57(i32 %A, i32 %B) {
@dummy_global2 = external global i8*

define i64 @test58([100 x [100 x i8]]* %foo, i64 %i, i64 %j) {
; Note the reassociate pass and another instcombine pass will further optimize this to
; "%sub = i64 %i, %j, ret i64 %sub"
; gep1 and gep2 have only one use
; CHECK-LABEL: @test58(
; CHECK-NEXT: [[GEP1_OFFS:%.*]] = add nsw i64 [[I:%.*]], 4200
; CHECK-NEXT: [[GEP2_OFFS:%.*]] = add nsw i64 [[J:%.*]], 4200
; CHECK-NEXT: [[GEPDIFF:%.*]] = sub nsw i64 [[GEP1_OFFS]], [[GEP2_OFFS]]
; CHECK-NEXT: ret i64 [[GEPDIFF]]
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[I:%.*]], [[J:%.*]]
; CHECK-NEXT: ret i64 [[TMP1]]
;
%gep1 = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 42, i64 %i
%gep2 = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 42, i64 %j
Expand Down

0 comments on commit cc9c545

Please sign in to comment.