[InstCombine] Change order of canonicalization of ADD and AND

Canonicalize ((x + C1) & C2) --> ((x & C2) + C1) for suitable constants C1 and C2, instead of the other way round. This should allow more constant ADDs to be matched as part of addressing modes for loads and stores. Differential Revision: https://reviews.llvm.org/D130080
llvm · Aug 22, 2022 · f82c55f · f82c55f
1 parent 2754ff8
commit f82c55f
Show file tree

Hide file tree

Showing 7 changed files with 46 additions and 39 deletions.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -969,15 +969,6 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) {
     }
   }
 
-  // If all bits affected by the add are included in a high-bit-mask, do the
-  // add before the mask op:
-  // (X & 0xFF00) + xx00 --> (X + xx00) & 0xFF00
-  if (match(Op0, m_OneUse(m_And(m_Value(X), m_APInt(C2)))) &&
-      C2->isNegative() && C2->isShiftedMask() && *C == (*C & *C2)) {
-    Value *NewAdd = Builder.CreateAdd(X, ConstantInt::get(Ty, *C));
-    return BinaryOperator::CreateAnd(NewAdd, ConstantInt::get(Ty, *C2));
-  }
-
   return nullptr;
 }
 

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1888,6 +1888,14 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
         Value *NewAnd = Builder.CreateAnd(X, Op1);
         return BinaryOperator::CreateXor(NewAnd, Op1);
       }
+
+      // If all bits affected by the add are included in a high-bit-mask, do the
+      // mask op before the add. Example:
+      // (X + 16) & -4 --> (X & -4) + 16
+      if (Op0->hasOneUse() && C->isNegatedPowerOf2() && *AddC == (*AddC & *C)) {
+        Value *NewAnd = Builder.CreateAnd(X, Op1);
+        return BinaryOperator::CreateAdd(NewAnd, ConstantInt::get(Ty, *AddC));
+      }
     }
 
     // ((C1 OP zext(X)) & C2) -> zext((C1 OP X) & C2) if C2 fits in the

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -2602,10 +2602,14 @@ foldRoundUpIntegerWithPow2Alignment(SelectInst &SI,
   if (!match(XLowBits, m_And(m_Specific(X), m_APIntAllowUndef(LowBitMaskCst))))
     return nullptr;
 
+  // Match even if the AND and ADD are swapped.
   const APInt *BiasCst, *HighBitMaskCst;
   if (!match(XBiasedHighBits,
              m_And(m_Add(m_Specific(X), m_APIntAllowUndef(BiasCst)),
-                   m_APIntAllowUndef(HighBitMaskCst))))
+                   m_APIntAllowUndef(HighBitMaskCst))) &&
+      !match(XBiasedHighBits,
+             m_Add(m_And(m_Specific(X), m_APIntAllowUndef(HighBitMaskCst)),
+                   m_APIntAllowUndef(BiasCst))))
     return nullptr;
 
   if (!LowBitMaskCst->isMask())

diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll
@@ -724,12 +724,12 @@ define i8 @test34(i8 %A) {
 }
 
 ; If all bits affected by the add are included
-; in the mask, do the add before the mask op.
+; in the mask, do the mask op before the add.
 
 define i8 @masked_add(i8 %x) {
 ; CHECK-LABEL: @masked_add(
-; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X:%.*]], 96
-; CHECK-NEXT:    [[R:%.*]] = and i8 [[TMP1]], -16
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X:%.*]], -16
+; CHECK-NEXT:    [[R:%.*]] = add i8 [[AND]], 96
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %and = and i8 %x, 240 ; 0xf0
@@ -739,8 +739,8 @@ define i8 @masked_add(i8 %x) {
 
 define <2 x i8> @masked_add_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @masked_add_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], <i8 64, i8 64>
-; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[TMP1]], <i8 -64, i8 -64>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i8> [[X:%.*]], <i8 -64, i8 -64>
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[AND]], <i8 64, i8 64>
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %and = and <2 x i8> %x, <i8 192, i8 192> ; 0xc0
@@ -761,14 +761,14 @@ define i8 @not_masked_add(i8 %x) {
 
 define i8 @masked_add_multi_use(i8 %x) {
 ; CHECK-LABEL: @masked_add_multi_use(
-; CHECK-NEXT:    [[TMP:%.*]] = add i8 [[X:%.*]], 96
-; CHECK-NEXT:    [[R:%.*]] = and i8 [[TMP:%.*]], -16
-; CHECK-NEXT:    call void @use(i8 [[X]])
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X:%.*]], -16
+; CHECK-NEXT:    [[R:%.*]] = add i8 [[AND]], 96
+; CHECK-NEXT:    call void @use(i8 [[AND]])
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %and = and i8 %x, -16 ; 0xf0
   %r = add i8 %and, 96  ; 0x60
-  call void @use(i8 %x) ; extra use
+  call void @use(i8 %and) ; extra use
   ret i8 %r
 }
 

diff --git a/llvm/test/Transforms/InstCombine/integer-round-up-pow2-alignment.ll b/llvm/test/Transforms/InstCombine/integer-round-up-pow2-alignment.ll
@@ -247,8 +247,8 @@ define i8 @n9_wrong_x0(i8 %x.0, i8 %x.1) {
 ; CHECK-LABEL: @n9_wrong_x0(
 ; CHECK-NEXT:    [[X_LOWBITS:%.*]] = and i8 [[X_0:%.*]], 15
 ; CHECK-NEXT:    [[X_LOWBITS_ARE_ZERO:%.*]] = icmp eq i8 [[X_LOWBITS]], 0
-; CHECK-NEXT:    [[X_BIASED:%.*]] = add i8 [[X_0]], 16
-; CHECK-NEXT:    [[X_BIASED_HIGHBITS:%.*]] = and i8 [[X_BIASED]], -16
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X_0]], -16
+; CHECK-NEXT:    [[X_BIASED_HIGHBITS:%.*]] = add i8 [[TMP1]], 16
 ; CHECK-NEXT:    [[X_ROUNDEDUP:%.*]] = select i1 [[X_LOWBITS_ARE_ZERO]], i8 [[X_1:%.*]], i8 [[X_BIASED_HIGHBITS]]
 ; CHECK-NEXT:    ret i8 [[X_ROUNDEDUP]]
 ;
@@ -263,8 +263,8 @@ define i8 @n9_wrong_x1(i8 %x.0, i8 %x.1) {
 ; CHECK-LABEL: @n9_wrong_x1(
 ; CHECK-NEXT:    [[X_LOWBITS:%.*]] = and i8 [[X_0:%.*]], 15
 ; CHECK-NEXT:    [[X_LOWBITS_ARE_ZERO:%.*]] = icmp eq i8 [[X_LOWBITS]], 0
-; CHECK-NEXT:    [[X_BIASED:%.*]] = add i8 [[X_1:%.*]], 16
-; CHECK-NEXT:    [[X_BIASED_HIGHBITS:%.*]] = and i8 [[X_BIASED]], -16
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X_1:%.*]], -16
+; CHECK-NEXT:    [[X_BIASED_HIGHBITS:%.*]] = add i8 [[TMP1]], 16
 ; CHECK-NEXT:    [[X_ROUNDEDUP:%.*]] = select i1 [[X_LOWBITS_ARE_ZERO]], i8 [[X_0]], i8 [[X_BIASED_HIGHBITS]]
 ; CHECK-NEXT:    ret i8 [[X_ROUNDEDUP]]
 ;
@@ -279,8 +279,8 @@ define i8 @n9_wrong_x2(i8 %x.0, i8 %x.1) {
 ; CHECK-LABEL: @n9_wrong_x2(
 ; CHECK-NEXT:    [[X_LOWBITS:%.*]] = and i8 [[X_1:%.*]], 15
 ; CHECK-NEXT:    [[X_LOWBITS_ARE_ZERO:%.*]] = icmp eq i8 [[X_LOWBITS]], 0
-; CHECK-NEXT:    [[X_BIASED:%.*]] = add i8 [[X_0:%.*]], 16
-; CHECK-NEXT:    [[X_BIASED_HIGHBITS:%.*]] = and i8 [[X_BIASED]], -16
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X_0:%.*]], -16
+; CHECK-NEXT:    [[X_BIASED_HIGHBITS:%.*]] = add i8 [[TMP1]], 16
 ; CHECK-NEXT:    [[X_ROUNDEDUP:%.*]] = select i1 [[X_LOWBITS_ARE_ZERO]], i8 [[X_0]], i8 [[X_BIASED_HIGHBITS]]
 ; CHECK-NEXT:    ret i8 [[X_ROUNDEDUP]]
 ;
@@ -297,8 +297,8 @@ define i8 @n10_wrong_low_bit_mask(i8 %x) {
 ; CHECK-LABEL: @n10_wrong_low_bit_mask(
 ; CHECK-NEXT:    [[X_LOWBITS:%.*]] = and i8 [[X:%.*]], 31
 ; CHECK-NEXT:    [[X_LOWBITS_ARE_ZERO:%.*]] = icmp eq i8 [[X_LOWBITS]], 0
-; CHECK-NEXT:    [[X_BIASED:%.*]] = add i8 [[X]], 16
-; CHECK-NEXT:    [[X_BIASED_HIGHBITS:%.*]] = and i8 [[X_BIASED]], -16
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X]], -16
+; CHECK-NEXT:    [[X_BIASED_HIGHBITS:%.*]] = add i8 [[TMP1]], 16
 ; CHECK-NEXT:    [[X_ROUNDEDUP:%.*]] = select i1 [[X_LOWBITS_ARE_ZERO]], i8 [[X]], i8 [[X_BIASED_HIGHBITS]]
 ; CHECK-NEXT:    ret i8 [[X_ROUNDEDUP]]
 ;
@@ -333,8 +333,8 @@ define i8 @n12_wrong_bias(i8 %x) {
 ; CHECK-LABEL: @n12_wrong_bias(
 ; CHECK-NEXT:    [[X_LOWBITS:%.*]] = and i8 [[X:%.*]], 15
 ; CHECK-NEXT:    [[X_LOWBITS_ARE_ZERO:%.*]] = icmp eq i8 [[X_LOWBITS]], 0
-; CHECK-NEXT:    [[X_BIASED:%.*]] = add i8 [[X]], 32
-; CHECK-NEXT:    [[X_BIASED_HIGHBITS:%.*]] = and i8 [[X_BIASED]], -16
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X]], -16
+; CHECK-NEXT:    [[X_BIASED_HIGHBITS:%.*]] = add i8 [[TMP1]], 32
 ; CHECK-NEXT:    [[X_ROUNDEDUP:%.*]] = select i1 [[X_LOWBITS_ARE_ZERO]], i8 [[X]], i8 [[X_BIASED_HIGHBITS]]
 ; CHECK-NEXT:    ret i8 [[X_ROUNDEDUP]]
 ;
@@ -369,8 +369,8 @@ define i8 @n14_wrong_comparison_constant(i8 %x) {
 ; CHECK-LABEL: @n14_wrong_comparison_constant(
 ; CHECK-NEXT:    [[X_LOWBITS:%.*]] = and i8 [[X:%.*]], 15
 ; CHECK-NEXT:    [[X_LOWBITS_ARE_ZERO:%.*]] = icmp eq i8 [[X_LOWBITS]], 1
-; CHECK-NEXT:    [[X_BIASED:%.*]] = add i8 [[X]], 16
-; CHECK-NEXT:    [[X_BIASED_HIGHBITS:%.*]] = and i8 [[X_BIASED]], -16
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X]], -16
+; CHECK-NEXT:    [[X_BIASED_HIGHBITS:%.*]] = add i8 [[TMP1]], 16
 ; CHECK-NEXT:    [[X_ROUNDEDUP:%.*]] = select i1 [[X_LOWBITS_ARE_ZERO]], i8 [[X]], i8 [[X_BIASED_HIGHBITS]]
 ; CHECK-NEXT:    ret i8 [[X_ROUNDEDUP]]
 ;
@@ -387,8 +387,8 @@ define i8 @n15_wrong_comparison_predicate_and_constant(i8 %x) {
 ; CHECK-LABEL: @n15_wrong_comparison_predicate_and_constant(
 ; CHECK-NEXT:    [[X_LOWBITS:%.*]] = and i8 [[X:%.*]], 14
 ; CHECK-NEXT:    [[X_LOWBITS_ARE_ZERO:%.*]] = icmp eq i8 [[X_LOWBITS]], 0
-; CHECK-NEXT:    [[X_BIASED:%.*]] = add i8 [[X]], 16
-; CHECK-NEXT:    [[X_BIASED_HIGHBITS:%.*]] = and i8 [[X_BIASED]], -16
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X]], -16
+; CHECK-NEXT:    [[X_BIASED_HIGHBITS:%.*]] = add i8 [[TMP1]], 16
 ; CHECK-NEXT:    [[X_ROUNDEDUP:%.*]] = select i1 [[X_LOWBITS_ARE_ZERO]], i8 [[X]], i8 [[X_BIASED_HIGHBITS]]
 ; CHECK-NEXT:    ret i8 [[X_ROUNDEDUP]]
 ;
@@ -405,8 +405,8 @@ define i8 @n16_oneuse(i8 %x) {
 ; CHECK-LABEL: @n16_oneuse(
 ; CHECK-NEXT:    [[X_LOWBITS:%.*]] = and i8 [[X:%.*]], 15
 ; CHECK-NEXT:    [[X_LOWBITS_ARE_ZERO:%.*]] = icmp eq i8 [[X_LOWBITS]], 0
-; CHECK-NEXT:    [[X_BIASED:%.*]] = add i8 [[X]], 16
-; CHECK-NEXT:    [[X_BIASED_HIGHBITS:%.*]] = and i8 [[X_BIASED]], -16
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X]], -16
+; CHECK-NEXT:    [[X_BIASED_HIGHBITS:%.*]] = add i8 [[TMP1]], 16
 ; CHECK-NEXT:    call void @use.i8(i8 [[X_BIASED_HIGHBITS]])
 ; CHECK-NEXT:    [[X_ROUNDEDUP:%.*]] = select i1 [[X_LOWBITS_ARE_ZERO]], i8 [[X]], i8 [[X_BIASED_HIGHBITS]]
 ; CHECK-NEXT:    ret i8 [[X_ROUNDEDUP]]

diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll
@@ -154,10 +154,14 @@ define i32 @test20(i32 %x) {
   ret i32 %z
 }
 
+; TODO: This should combine to t1 + 2.
 define i32 @test21(i32 %t1) {
 ; CHECK-LABEL: @test21(
-; CHECK-NEXT:    [[T1_MASK1:%.*]] = add i32 [[T1:%.*]], 2
-; CHECK-NEXT:    ret i32 [[T1_MASK1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[T1:%.*]], -2
+; CHECK-NEXT:    [[T3:%.*]] = add i32 [[TMP1]], 2
+; CHECK-NEXT:    [[T5:%.*]] = and i32 [[T1]], 1
+; CHECK-NEXT:    [[T6:%.*]] = or i32 [[T5]], [[T3]]
+; CHECK-NEXT:    ret i32 [[T6]]
 ;
   %t1.mask1 = add i32 %t1, 2
   %t3 = and i32 %t1.mask1, -2

diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll
@@ -1490,8 +1490,8 @@ define i8 @sub_add_sub_reassoc_use2(i8 %w, i8 %x, i8 %y, i8 %z) {
 
 define i8 @sub_mask_lowbits(i8 %x) {
 ; CHECK-LABEL: @sub_mask_lowbits(
-; CHECK-NEXT:    [[A1:%.*]] = add i8 [[X:%.*]], -108
-; CHECK-NEXT:    [[R:%.*]] = and i8 [[A1]], -4
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X:%.*]], -4
+; CHECK-NEXT:    [[R:%.*]] = add i8 [[TMP1]], -108
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %a1 = add i8 %x, 148 ; 0x94