[PATCH] [llvm] [InstCombine] Canonicalise ADD+GEP

This patch tries to canonicalise add + gep to gep + gep. Co-authored-by: Paul Walker <paul.walker@arm.com> Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D155688
llvm · Oct 6, 2023 · e13bed4 · e13bed4
1 parent a16f646
commit e13bed4
Show file tree

Hide file tree

Showing 14 changed files with 266 additions and 247 deletions.
diff --git a/clang/test/CodeGenCXX/microsoft-abi-dynamic-cast.cpp b/clang/test/CodeGenCXX/microsoft-abi-dynamic-cast.cpp
@@ -94,9 +94,9 @@ void* test9(B* x) { return dynamic_cast<void*>(x); }
 // CHECK-NEXT:   [[VBTBL:%.*]] = load ptr, ptr [[VBPTR]], align 4
 // CHECK-NEXT:   [[VBOFFP:%.*]] = getelementptr inbounds i32, ptr [[VBTBL]], i32 1
 // CHECK-NEXT:   [[VBOFFS:%.*]] = load i32, ptr [[VBOFFP]], align 4
-// CHECK-NEXT:   [[DELTA:%.*]] = add nsw i32 [[VBOFFS]], 4
-// CHECK-NEXT:   [[ADJ:%.*]] = getelementptr inbounds i8, ptr %x, i32 [[DELTA]]
-// CHECK-NEXT:   [[CALL:%.*]] = tail call ptr @__RTCastToVoid(ptr nonnull [[ADJ]])
+// CHECK-NEXT:   [[BASE:%.*]] = getelementptr i8, ptr %x, i32 [[VBOFFS]]
+// CHECK-NEXT:   [[ADJ:%.*]] = getelementptr i8, ptr [[BASE]], i32 4
+// CHECK-NEXT:   [[CALL:%.*]] = tail call ptr @__RTCastToVoid(ptr [[ADJ]])
 // CHECK-NEXT:   br label
 // CHECK:        [[RET:%.*]] = phi ptr
 // CHECK-NEXT:   ret ptr [[RET]]

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2316,11 +2316,27 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         return CastInst::CreatePointerBitCastOrAddrSpaceCast(Y, GEPType);
     }
   }
-
   // We do not handle pointer-vector geps here.
   if (GEPType->isVectorTy())
     return nullptr;
 
+  if (GEP.getNumIndices() == 1) {
+    // Try to replace ADD + GEP with GEP + GEP.
+    Value *Idx1, *Idx2;
+    if (match(GEP.getOperand(1),
+              m_OneUse(m_Add(m_Value(Idx1), m_Value(Idx2))))) {
+      //   %idx = add i64 %idx1, %idx2
+      //   %gep = getelementptr i32, i32* %ptr, i64 %idx
+      // as:
+      //   %newptr = getelementptr i32, i32* %ptr, i64 %idx1
+      //   %newgep = getelementptr i32, i32* %newptr, i64 %idx2
+      auto *NewPtr = Builder.CreateGEP(GEP.getResultElementType(),
+                                       GEP.getPointerOperand(), Idx1);
+      return GetElementPtrInst::Create(GEP.getResultElementType(), NewPtr,
+                                       Idx2);
+    }
+  }
+
   if (!GEP.isInBounds()) {
     unsigned IdxWidth =
         DL.getIndexSizeInBits(PtrOp->getType()->getPointerAddressSpace());

diff --git a/llvm/test/CodeGen/Hexagon/autohvx/vector-align-tbaa.ll b/llvm/test/CodeGen/Hexagon/autohvx/vector-align-tbaa.ll
diff --git a/llvm/test/Transforms/InstCombine/align-addr.ll b/llvm/test/Transforms/InstCombine/align-addr.ll
@@ -2,6 +2,9 @@
 ; RUN: opt < %s -passes=instcombine -S | FileCheck %s
 target datalayout = "E-p:64:64:64-p1:32:32:32-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
+; Instcombine should be able to prove vector alignment in the
+; presence of a few mild address computation tricks.
+
 define void @test0(ptr %b, i64 %n, i64 %u, i64 %y) nounwind  {
 ; CHECK-LABEL: @test0(
 ; CHECK-NEXT:  entry:
@@ -15,8 +18,8 @@ define void @test0(ptr %b, i64 %n, i64 %u, i64 %y) nounwind  {
 ; CHECK:       bb:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[BB]] ], [ 20, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[J:%.*]] = mul i64 [[I]], [[V]]
-; CHECK-NEXT:    [[H:%.*]] = add i64 [[J]], [[Z]]
-; CHECK-NEXT:    [[T8:%.*]] = getelementptr double, ptr [[E]], i64 [[H]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr double, ptr [[E]], i64 [[J]]
+; CHECK-NEXT:    [[T8:%.*]] = getelementptr double, ptr [[TMP0]], i64 [[Z]]
 ; CHECK-NEXT:    store <2 x double> zeroinitializer, ptr [[T8]], align 8
 ; CHECK-NEXT:    [[INDVAR_NEXT]] = add i64 [[I]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[N]]

diff --git a/llvm/test/Transforms/InstCombine/mem-par-metadata-memcpy.ll b/llvm/test/Transforms/InstCombine/mem-par-metadata-memcpy.ll
@@ -23,10 +23,10 @@ define void @_Z4testPcl(ptr %out, i64 %size) {
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[OUT:%.*]], i64 [[I_0]]
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw i64 [[I_0]], [[SIZE]]
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[OUT]], i64 [[ADD]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX1]], align 1, !llvm.access.group [[ACC_GRP0:![0-9]+]]
-; CHECK-NEXT:    store i16 [[TMP0]], ptr [[ARRAYIDX]], align 1, !llvm.access.group [[ACC_GRP0]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[OUT]], i64 [[I_0]]
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[SIZE]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX1]], align 1, !llvm.access.group [[ACC_GRP0:![0-9]+]]
+; CHECK-NEXT:    store i16 [[TMP1]], ptr [[ARRAYIDX]], align 1, !llvm.access.group [[ACC_GRP0]]
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
 ; CHECK-NEXT:    [[ADD2]] = add nuw nsw i64 [[I_0]], 2

diff --git a/llvm/test/Transforms/InstCombine/memrchr-4.ll b/llvm/test/Transforms/InstCombine/memrchr-4.ll
@@ -34,8 +34,8 @@ define ptr @fold_memrchr_a11111_c_n(i32 %C, i64 %N) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[C:%.*]] to i8
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i8 [[TMP2]], 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP1]], i1 [[TMP3]], i1 false
-; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[N]], -1
-; CHECK-NEXT:    [[MEMRCHR_PTR_PLUS:%.*]] = getelementptr inbounds i8, ptr @a11111, i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr @a11111, i64 [[N]]
+; CHECK-NEXT:    [[MEMRCHR_PTR_PLUS:%.*]] = getelementptr i8, ptr [[TMP5]], i64 -1
 ; CHECK-NEXT:    [[MEMRCHR_SEL:%.*]] = select i1 [[TMP4]], ptr [[MEMRCHR_PTR_PLUS]], ptr null
 ; CHECK-NEXT:    ret ptr [[MEMRCHR_SEL]]
 ;

diff --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll
@@ -1755,8 +1755,8 @@ define void @ashr_out_of_range_1(ptr %A) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i177 [[L_FROZEN]], -1
 ; CHECK-NEXT:    [[B:%.*]] = select i1 [[TMP1]], i177 0, i177 [[L_FROZEN]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = trunc i177 [[B]] to i64
-; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[TMP2]], -1
-; CHECK-NEXT:    [[G11:%.*]] = getelementptr i177, ptr [[A]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i177, ptr [[A]], i64 [[TMP2]]
+; CHECK-NEXT:    [[G11:%.*]] = getelementptr i177, ptr [[TMP3]], i64 -1
 ; CHECK-NEXT:    [[C17:%.*]] = icmp sgt i177 [[B]], [[L_FROZEN]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = sext i1 [[C17]] to i64
 ; CHECK-NEXT:    [[G62:%.*]] = getelementptr i177, ptr [[G11]], i64 [[TMP4]]

diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
@@ -1402,11 +1402,11 @@ define void @PR27626_5(i32 *%a, i32 %x, i32 %y, i32 %z, i64 %n) #1 {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[I_MINUS_1:%.*]] = add i64 [[I]], -1
-; CHECK-NEXT:    [[I_MINUS_3:%.*]] = add i64 [[I]], -3
 ; CHECK-NEXT:    [[A_I:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I]]
-; CHECK-NEXT:    [[A_I_MINUS_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_MINUS_1]]
-; CHECK-NEXT:    [[A_I_MINUS_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_MINUS_3]]
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr i32, ptr [[A]], i64 [[I]]
+; CHECK-NEXT:    [[A_I_MINUS_1:%.*]] = getelementptr i32, ptr [[TMP19]], i64 -1
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr i32, ptr [[A]], i64 [[I]]
+; CHECK-NEXT:    [[A_I_MINUS_3:%.*]] = getelementptr i32, ptr [[TMP20]], i64 -3
 ; CHECK-NEXT:    store i32 [[X]], ptr [[A_I_MINUS_1]], align 4
 ; CHECK-NEXT:    store i32 [[Y]], ptr [[A_I_MINUS_3]], align 4
 ; CHECK-NEXT:    store i32 [[Z]], ptr [[A_I]], align 4
@@ -1459,11 +1459,11 @@ define void @PR34743(i16* %a, i32* %b, i64 %n) #1 {
 ; CHECK:       vector.memcheck:
 ; CHECK-NEXT:    [[TMP4:%.*]] = shl i64 [[N]], 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], -4
-; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[TMP5]], 4
-; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[TMP6]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[TMP5]]
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[TMP6]], i64 4
 ; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 2
-; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[TMP5]], 6
-; CHECK-NEXT:    [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP5]]
+; CHECK-NEXT:    [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[TMP7]], i64 6
 ; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ugt ptr [[SCEVGEP2]], [[B]]
 ; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[SCEVGEP1]], [[SCEVGEP]]
 ; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]

diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
@@ -35,8 +35,8 @@ define void @widen_ptr_phi_unrolled(ptr noalias nocapture %a, ptr noalias nocapt
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 5
 ; CHECK-NEXT:    [[TMP7:%.*]] = shl i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[TMP6]], [[TMP7]]
-; CHECK-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[C]], i64 [[TMP8]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[C]], i64 [[TMP6]]
+; CHECK-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[TMP8]], i64 [[TMP7]]
 ; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[NEXT_GEP]], align 4
 ; CHECK-NEXT:    [[WIDE_VEC3:%.*]] = load <vscale x 8 x i32>, ptr [[NEXT_GEP2]], align 4
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.experimental.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])