[InstCombine] Canonicalise SextADD + GEP #69581

LiqinWeng · 2023-10-19T10:03:11Z

For a two-dimensional array, the base address does not need to be recalculated for the second access. It can be obtained through offset. I will submit it later: fold (mul (sext (add_nsw x, c1)), c2) -> (add (mul (sext x), c2), c1*c2)
The code scenario is as follows

typedef int array1[50];
void test(int array2[50][50], int a, int b) {
    int loc;
    loc = a + 5;
    array2[loc][loc] += 1;
    array2[loc + 20][loc] = loc;
}

form dhrystone.

llvmbot · 2023-10-19T10:04:19Z

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-amdgpu

Author: LiqinWeng (LiqinWeng)

Changes

ref to https://reviews.llvm.org/D155688

Patch is 34.04 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/69581.diff

5 Files Affected:

(modified) llvm/lib/Transforms/InstCombine/InstructionCombining.cpp (+23)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll (+1-1)
(added) llvm/test/Transforms/InstCombine/array.ll (+38)
(modified) llvm/test/Transforms/LoopVectorize/induction.ll (+105-90)
(modified) llvm/test/Transforms/PhaseOrdering/AArch64/loopflatten.ll (+20-11)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 8a6f66e36bd80e9..017989c3c186769 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2335,6 +2335,29 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       return GetElementPtrInst::Create(GEP.getResultElementType(), NewPtr,
                                        Idx2);
     }
+
+    Value *SIdx;
+    auto SextIntrinsic = dyn_cast<SExtInst>(GEP.getOperand(1));
+    if (match(GEP.getOperand(1), m_OneUse(m_SExt(m_Value(SIdx)))) &&
+        match(SextIntrinsic->getOperand(0),
+              m_OneUse(m_Add(m_Value(Idx1), m_Value(Idx2))))) {
+      // %idx = add nsw i32 %idx1, %idx2
+      // %sidx = sext i32 %idx to i64
+      // %gep = getelementptr inbounds [50 x i32], ptr %array2, i64 %idx_sext
+      // as :
+      // %sidx1 = sext i32 %idx1 to i64
+      // %sidx2 = sext i32 %idx2 to i64
+      // %newptr = getelementptr i32, i32* %ptr, i64 %sidx1
+      // %newgep = getelementptr i32, i32* %newptr, i64 %sidx2
+      auto SIdx1 = Builder.CreateSExt(Idx1, // Add->getOperand(0),
+                                      GEP.getOperand(1)->getType());
+      auto SIdx2 = Builder.CreateSExt(Idx2, // Add->getOperand(1),
+                                      GEP.getOperand(1)->getType());
+      auto *NewPtr = Builder.CreateGEP(GEP.getResultElementType(),
+                                       GEP.getPointerOperand(), SIdx1);
+      return GetElementPtrInst::Create(GEP.getResultElementType(), NewPtr,
+                                       SIdx2);
+    }
   }
 
   if (!GEP.isInBounds()) {
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll
index b34df3ffca26420..ae934678bbec729 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll
@@ -61,7 +61,7 @@ entry:
 
 ; GCN: define amdgpu_kernel void @test_inliner(
 ; GCN-INL1:     %c1 = tail call coldcc float @foo(
-; GCN-INLDEF:   %cmp.i = fcmp ogt float %tmp2, 0.000000e+00
+; GCN-INLDEF:   %cmp.i = fcmp ogt float %2, 0.000000e+00
 ; GCN-MAXBBDEF: %div.i{{[0-9]*}} = fdiv float 1.000000e+00, %c
 ; GCN-MAXBBDEF: %div.i{{[0-9]*}} = fdiv float 2.000000e+00, %tmp1.i
 ; GCN-MAXBB1:   call coldcc void @foo_private_ptr
diff --git a/llvm/test/Transforms/InstCombine/array.ll b/llvm/test/Transforms/InstCombine/array.ll
new file mode 100644
index 000000000000000..bf768d1f8d9173c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/array.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define void @test(ptr noundef %array2, i32 noundef signext %a, i32 noundef signext %b) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr noundef [[ARRAY2:%.*]], i32 noundef signext [[A:%.*]], i32 noundef signext [[B:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[A]], 5
+; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[ADD]] to i64
+; CHECK-NEXT:    [[SUB:%.*]] = add nsw i32 [[A]], 4
+; CHECK-NEXT:    [[IDXPROM1:%.*]] = sext i32 [[SUB]] to i64
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [50 x i32], ptr [[ARRAY2]], i64 [[IDXPROM]], i64 [[IDXPROM1]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP0]], 1
+; CHECK-NEXT:    store i32 [[ADD3]], ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[A]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr [50 x i32], ptr [[ARRAY2]], i64 [[TMP1]]
+; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr [50 x i32], ptr [[TMP2]], i64 25, i64 [[IDXPROM]]
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX8]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %add = add nsw i32 %a, 5
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds [50 x i32], ptr %array2, i64 %idxprom
+  %sub = sub nsw i32 %add, 1
+  %idxprom1 = sext i32 %sub to i64
+  %arrayidx2 = getelementptr inbounds [50 x i32], ptr %arrayidx, i64 0, i64 %idxprom1
+  %0 = load i32, ptr %arrayidx2, align 4
+  %add3 = add nsw i32 %0, 1
+  store i32 %add3, ptr %arrayidx2, align 4
+  %add4 = add nsw i32 %add, 20
+  %idxprom5 = sext i32 %add4 to i64
+  %arrayidx6 = getelementptr inbounds [50 x i32], ptr %array2, i64 %idxprom5
+  %arrayidx8 = getelementptr inbounds [50 x i32], ptr %arrayidx6, i64 0, i64 %idxprom
+  store i32 %add, ptr %arrayidx8, align 4
+  ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
index 90ad054c5a22e51..a0cd4a9a98c5a7c 100644
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll
@@ -3536,15 +3536,17 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
 ; IND:       vector.body:
 ; IND-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; IND-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IND-NEXT:    [[DOTCAST4:%.*]] = trunc i32 [[INDEX]] to i8
-; IND-NEXT:    [[OFFSET_IDX:%.*]] = add i8 [[DOTCAST4]], [[T]]
-; IND-NEXT:    [[TMP10:%.*]] = sext i8 [[OFFSET_IDX]] to i64
-; IND-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]]
-; IND-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP11]], align 4
+; IND-NEXT:    [[DOTCAST4:%.*]] = zext i32 [[INDEX]] to i64
+; IND-NEXT:    [[SEXT:%.*]] = shl i64 [[DOTCAST4]], 56
+; IND-NEXT:    [[TMP10:%.*]] = ashr exact i64 [[SEXT]], 56
+; IND-NEXT:    [[TMP11:%.*]] = sext i8 [[T]] to i64
+; IND-NEXT:    [[TMP12:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP10]]
+; IND-NEXT:    [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i64 [[TMP11]]
+; IND-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP13]], align 4
 ; IND-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
 ; IND-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
-; IND-NEXT:    [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; IND-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
+; IND-NEXT:    [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; IND-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
 ; IND:       middle.block:
 ; IND-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
 ; IND-NEXT:    br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -3557,8 +3559,8 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
 ; IND-NEXT:    [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; IND-NEXT:    [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
 ; IND-NEXT:    [[SPHI:%.*]] = phi i32 [ [[IDX_INC_EXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ]
-; IND-NEXT:    [[TMP13:%.*]] = sext i8 [[IDX]] to i64
-; IND-NEXT:    [[PTR:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]]
+; IND-NEXT:    [[TMP15:%.*]] = sext i8 [[IDX]] to i64
+; IND-NEXT:    [[PTR:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP15]]
 ; IND-NEXT:    store i32 [[SPHI]], ptr [[PTR]], align 4
 ; IND-NEXT:    [[IDX_INC]] = add i8 [[IDX]], 1
 ; IND-NEXT:    [[IDX_INC_EXT]] = zext i8 [[IDX_INC]] to i32
@@ -3603,17 +3605,19 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
 ; UNROLL-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
-; UNROLL-NEXT:    [[DOTCAST5:%.*]] = trunc i32 [[INDEX]] to i8
-; UNROLL-NEXT:    [[OFFSET_IDX:%.*]] = add i8 [[DOTCAST5]], [[T]]
-; UNROLL-NEXT:    [[TMP10:%.*]] = sext i8 [[OFFSET_IDX]] to i64
-; UNROLL-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]]
-; UNROLL-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP11]], align 4
-; UNROLL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 2
-; UNROLL-NEXT:    store <2 x i32> [[STEP_ADD]], ptr [[TMP12]], align 4
+; UNROLL-NEXT:    [[DOTCAST5:%.*]] = zext i32 [[INDEX]] to i64
+; UNROLL-NEXT:    [[SEXT:%.*]] = shl i64 [[DOTCAST5]], 56
+; UNROLL-NEXT:    [[TMP10:%.*]] = ashr exact i64 [[SEXT]], 56
+; UNROLL-NEXT:    [[TMP11:%.*]] = sext i8 [[T]] to i64
+; UNROLL-NEXT:    [[TMP12:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP10]]
+; UNROLL-NEXT:    [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i64 [[TMP11]]
+; UNROLL-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP13]], align 4
+; UNROLL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 2
+; UNROLL-NEXT:    store <2 x i32> [[STEP_ADD]], ptr [[TMP14]], align 4
 ; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; UNROLL-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4>
-; UNROLL-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
+; UNROLL-NEXT:    [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; UNROLL-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
 ; UNROLL:       middle.block:
 ; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
 ; UNROLL-NEXT:    br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -3626,8 +3630,8 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
 ; UNROLL-NEXT:    [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; UNROLL-NEXT:    [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
 ; UNROLL-NEXT:    [[SPHI:%.*]] = phi i32 [ [[IDX_INC_EXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ]
-; UNROLL-NEXT:    [[TMP14:%.*]] = sext i8 [[IDX]] to i64
-; UNROLL-NEXT:    [[PTR:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]]
+; UNROLL-NEXT:    [[TMP16:%.*]] = sext i8 [[IDX]] to i64
+; UNROLL-NEXT:    [[PTR:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP16]]
 ; UNROLL-NEXT:    store i32 [[SPHI]], ptr [[PTR]], align 4
 ; UNROLL-NEXT:    [[IDX_INC]] = add i8 [[IDX]], 1
 ; UNROLL-NEXT:    [[IDX_INC_EXT]] = zext i8 [[IDX_INC]] to i32
@@ -3747,17 +3751,19 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
 ; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; INTERLEAVE-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; INTERLEAVE-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
-; INTERLEAVE-NEXT:    [[DOTCAST5:%.*]] = trunc i32 [[INDEX]] to i8
-; INTERLEAVE-NEXT:    [[OFFSET_IDX:%.*]] = add i8 [[DOTCAST5]], [[T]]
-; INTERLEAVE-NEXT:    [[TMP10:%.*]] = sext i8 [[OFFSET_IDX]] to i64
-; INTERLEAVE-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]]
-; INTERLEAVE-NEXT:    store <4 x i32> [[VEC_IND]], ptr [[TMP11]], align 4
-; INTERLEAVE-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 4
-; INTERLEAVE-NEXT:    store <4 x i32> [[STEP_ADD]], ptr [[TMP12]], align 4
+; INTERLEAVE-NEXT:    [[DOTCAST5:%.*]] = zext i32 [[INDEX]] to i64
+; INTERLEAVE-NEXT:    [[SEXT:%.*]] = shl i64 [[DOTCAST5]], 56
+; INTERLEAVE-NEXT:    [[TMP10:%.*]] = ashr exact i64 [[SEXT]], 56
+; INTERLEAVE-NEXT:    [[TMP11:%.*]] = sext i8 [[T]] to i64
+; INTERLEAVE-NEXT:    [[TMP12:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP10]]
+; INTERLEAVE-NEXT:    [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i64 [[TMP11]]
+; INTERLEAVE-NEXT:    store <4 x i32> [[VEC_IND]], ptr [[TMP13]], align 4
+; INTERLEAVE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 4
+; INTERLEAVE-NEXT:    store <4 x i32> [[STEP_ADD]], ptr [[TMP14]], align 4
 ; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
 ; INTERLEAVE-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8>
-; INTERLEAVE-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; INTERLEAVE-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
+; INTERLEAVE-NEXT:    [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; INTERLEAVE-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
 ; INTERLEAVE:       middle.block:
 ; INTERLEAVE-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
 ; INTERLEAVE-NEXT:    br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -3770,8 +3776,8 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
 ; INTERLEAVE-NEXT:    [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; INTERLEAVE-NEXT:    [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
 ; INTERLEAVE-NEXT:    [[SPHI:%.*]] = phi i32 [ [[IDX_INC_EXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ]
-; INTERLEAVE-NEXT:    [[TMP14:%.*]] = sext i8 [[IDX]] to i64
-; INTERLEAVE-NEXT:    [[PTR:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]]
+; INTERLEAVE-NEXT:    [[TMP16:%.*]] = sext i8 [[IDX]] to i64
+; INTERLEAVE-NEXT:    [[PTR:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP16]]
 ; INTERLEAVE-NEXT:    store i32 [[SPHI]], ptr [[PTR]], align 4
 ; INTERLEAVE-NEXT:    [[IDX_INC]] = add i8 [[IDX]], 1
 ; INTERLEAVE-NEXT:    [[IDX_INC_EXT]] = zext i8 [[IDX_INC]] to i32
@@ -3921,15 +3927,17 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
 ; IND:       vector.body:
 ; IND-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; IND-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IND-NEXT:    [[DOTCAST4:%.*]] = trunc i32 [[INDEX]] to i8
-; IND-NEXT:    [[OFFSET_IDX:%.*]] = add i8 [[DOTCAST4]], [[T]]
-; IND-NEXT:    [[TMP10:%.*]] = sext i8 [[OFFSET_IDX]] to i64
-; IND-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]]
-; IND-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP11]], align 4
+; IND-NEXT:    [[DOTCAST4:%.*]] = zext i32 [[INDEX]] to i64
+; IND-NEXT:    [[SEXT:%.*]] = shl i64 [[DOTCAST4]], 56
+; IND-NEXT:    [[TMP10:%.*]] = ashr exact i64 [[SEXT]], 56
+; IND-NEXT:    [[TMP11:%.*]] = sext i8 [[T]] to i64
+; IND-NEXT:    [[TMP12:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP10]]
+; IND-NEXT:    [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i64 [[TMP11]]
+; IND-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP13]], align 4
 ; IND-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
 ; IND-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 8, i32 8>
-; IND-NEXT:    [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; IND-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
+; IND-NEXT:    [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; IND-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
 ; IND:       middle.block:
 ; IND-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
 ; IND-NEXT:    br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -3942,8 +3950,8 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
 ; IND-NEXT:    [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; IND-NEXT:    [[SPHI:%.*]] = phi i32 [ [[MUL:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
 ; IND-NEXT:    [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ]
-; IND-NEXT:    [[TMP13:%.*]] = sext i8 [[IDX]] to i64
-; IND-NEXT:    [[PTR:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]]
+; IND-NEXT:    [[TMP15:%.*]] = sext i8 [[IDX]] to i64
+; IND-NEXT:    [[PTR:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP15]]
 ; IND-NEXT:    store i32 [[SPHI]], ptr [[PTR]], align 4
 ; IND-NEXT:    [[IDX_INC]] = add i8 [[IDX]], 1
 ; IND-NEXT:    [[IDX_INC_EXT:%.*]] = zext i8 [[IDX_INC]] to i32
@@ -3991,17 +3999,19 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
 ; UNROLL-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 8, i32 8>
-; UNROLL-NEXT:    [[DOTCAST5:%.*]] = trunc i32 [[INDEX]] to i8
-; UNROLL-NEXT:    [[OFFSET_IDX:%.*]] = add i8 [[DOTCAST5]], [[T]]
-; UNROLL-NEXT:    [[TMP10:%.*]] = sext i8 [[OFFSET_IDX]] to i64
-; UNROLL-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]]
-; UNROLL-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP11]], align 4
-; UNROLL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 2
-; UNROLL-NEXT:    store <2 x i32> [[STEP_ADD]], ptr [[TMP12]], align 4
+; UNROLL-NEXT:    [[DOTCAST5:%.*]] = zext i32 [[INDEX]] to i64
+; UNROLL-NEXT:    [[SEXT:%.*]] = shl i64 [[DOTCAST5]], 56
+; UNROLL-NEXT:    [[TMP10:%.*]] = ashr exact i64 [[SEXT]], 56
+; UNROLL-NEXT:    [[TMP11:%.*]] = sext i8 [[T]] to i64
+; UNROLL-NEXT:    [[TMP12:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP10]]
+; UNROLL-NEXT:    [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i64 [[TMP11]]
+; UNROLL-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP13]], align 4
+; UNROLL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 2
+; UNROLL-NEXT:    store <2 x i32> [[STEP_ADD]], ptr [[TMP14]], align 4
 ; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; UNROLL-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 16, i32 16>
-; UNROLL-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
+; UNROLL-NEXT:    [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; UNROLL-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
 ; UNROLL:       middle.block:
 ; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
 ; UNROLL-NEXT:    br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -4014,8 +4024,8 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
 ; UNROLL-NEXT:    [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; UNROLL-NEXT:    [[SPHI:%.*]] = phi i32 [ [[MUL:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
 ; UNROLL-NEXT:    [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ]
-; UNROLL-NEXT:    [[TMP14:%.*]] = sext i8 [[IDX]] to i64
-; UNROLL-NEXT:    [[PTR:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]]
+; UNROLL-NEXT:    [[TMP16:%.*]] = sext i8 [[IDX]] to i64
+; UNROLL-NEXT:    [[PTR:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP16]]
 ; UNROLL-NEXT:    store i32 [[SPHI]], ptr [[PTR]], align 4
 ; UNROLL-NEXT:    [[IDX_INC]] = add i8 [[IDX]], 1
 ; UNROLL-NEXT:    [[IDX_INC_EXT:%.*]] = zext i8 [[IDX_INC]] to i32
@@ -4141,17 +4151,19 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
 ; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; INTERLEAVE-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; INTERLEAVE-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 16, i32 16, i32 16, i32 16>
-; INTERLEAVE-NEXT:    [[DOTCAST5:%.*]] = trunc i32 [[INDEX]] to i8
-; INTERLEAVE-NEXT:    [[OFFSET_IDX:%.*]] = add i8 [[DOTCAST5]], [[T]]
-; INTERLEAVE-NEXT:    [[TMP10:%.*]] = sext i8 [[OFFSET_IDX]] to i64
-; INTERLEAVE-NEXT:    [[TMP11:%.*]] ...
[truncated]

goldsteinn · 2023-10-19T17:46:06Z

Can you add alive2 proof?

LiqinWeng · 2023-10-20T00:58:33Z

prepare for #69667

LiqinWeng · 2023-10-20T06:16:44Z

Can you add alive2 proof?

https://alive2.llvm.org/ce/z/epfrzW

nikic

It's not at all clear to me that this is a generally profitable transform. Just based on the test changes here, loopflatten.ll is an obvious regression (loop no longer flattened), while induction.ll also looks like a regression to me.

This is also a regression on the simple instruction count heuristic, as we go from add+sext+gep to 2sext+2gep. Possibly what you want to do is limit this to the case where the RHS of the add is a constant, so the sext folds away?

LiqinWeng · 2023-10-20T12:56:25Z

It's not at all clear to me that this is a generally profitable transform. Just based on the test changes here, loopflatten.ll is an obvious regression (loop no longer flattened), while induction.ll also looks like a regression to me.

This is also a regression on the simple instruction count heuristic, as we go from add+sext+gep to 2_sext+2_gep. Possibly what you want to do is limit this to the case where the RHS of the add is a constant, so the sext folds away?

Extracting constants for memory offset operations in advance can be helpful for subsequent related optimizations. like：

void test(int array1[50], int a, int b) {
    int loc;
    loc = a + 5;

    array1[loc] = b;
    array1[loc + 1] = array1[loc];
    array1[loc + 30] = loc;
}

DAGTODAG has creating constant, do not wait until the combine phase to perform the transformation of '(shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)'

LiqinWeng · 2023-10-21T02:32:01Z

It's not at all clear to me that this is a generally profitable transform. Just based on the test changes here, loopflatten.ll is an obvious regression (loop no longer flattened), while induction.ll also looks like a regression to me.

This is also a regression on the simple instruction count heuristic, as we go from add+sext+gep to 2_sext+2_gep. Possibly what you want to do is limit this to the case where the RHS of the add is a constant, so the sext folds away?

yes, I have fix it , thanks

nikic · 2023-10-21T08:08:37Z

Please, can you take a bit more care? Your proof is very obviously broken if you just look at the result.

Here is a fixed version: https://alive2.llvm.org/ce/z/FSNnoo Note that the nsw on the add is required for correctness, which you don't check.

LiqinWeng · 2023-10-22T03:48:45Z

Please, can you take a bit more care? Your proof is very obviously broken if you just look at the result.

Here is a fixed version: https://alive2.llvm.org/ce/z/FSNnoo Note that the nsw on the add is required for correctness, which you don't check.

Pls see: https://alive2.llvm.org/ce/z/vkCK92. The transformation only occurs when the second operand of the add is a constant.
I will add the nsw on the add

goldsteinn · 2023-10-22T05:21:31Z

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

+      // %sidx = sext i32 %idx to i64
+      // %newptr = getelementptr i32, i32* %ptr, i64 %sidx
+      // %newgep = getelementptr i32, i32* %newptr, i64 C
+      auto SIdx1 = Builder.CreateSExt(Idx1, GEP.getOperand(1)->getType());


Do you even need the new sext?
https://alive2.llvm.org/ce/z/dgfX5w

Yes, in the sense that it would just get canonicalized to add the sext anyway.

the test: loopflatten.ll is regression? Should this implementation be placed in licm？

goldsteinn · 2023-10-22T05:22:21Z

Please, can you take a bit more care? Your proof is very obviously broken if you just look at the result.
Here is a fixed version: https://alive2.llvm.org/ce/z/FSNnoo Note that the nsw on the add is required for correctness, which you don't check.

Pls see: https://alive2.llvm.org/ce/z/vkCK92. The transformation only occurs when the second operand of the add is a constant. I will add the nsw on the add

The proof is only for constant 5. In general you should have no constants and impl any constraints with assume:
https://alive2.llvm.org/ce/z/dgfX5w

LiqinWeng · 2023-10-25T12:55:06Z

ping

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

polly/test/Support/dumpmodule.ll

llvm/test/Transforms/PhaseOrdering/AArch64/loopflatten.ll

LiqinWeng · 2023-10-29T11:05:29Z

Also, why did you drop the limitation to having one constant operand again?

Sorry，I misunderstood what you meant, and I have corrected it

LiqinWeng · 2023-10-30T03:21:58Z

Also, why did you drop the limitation to having one constant operand again?

I have a question: Why is there no nuw flag on test1, but test2 has nsw flag.
command: clang --target=riscv64 shadd.c -O3 -emit-llvm -S -o -
code：

void test1(unsigned int array1[50], unsigned int a, unsigned int b) {
    unsigned int loc;
    loc = a + 5;
    array1[loc] = b;
    array1[loc + 1] = array1[loc];
    array1[loc + 30] = loc;
}

void test2(int array1[50], int a, int b) {
    int loc;
    loc = a + 5;

    array1[loc] = b;
    array1[loc + 1] = array1[loc];
    array1[loc + 30] = loc;
}

LiqinWeng · 2023-10-30T03:22:16Z

Also, why did you drop the limitation to having one constant operand again?

I have a question: Why is there no nuw flag on test1, but test2 has nsw flag.
command: clang --target=riscv64 shadd.c -O3 -emit-llvm -S -o -
code：

void test1(unsigned int array1[50], unsigned int a, unsigned int b) {
    unsigned int loc;
    loc = a + 5;
    array1[loc] = b;
    array1[loc + 1] = array1[loc];
    array1[loc + 30] = loc;
}

void test2(int array1[50], int a, int b) {
    int loc;
    loc = a + 5;

    array1[loc] = b;
    array1[loc + 1] = array1[loc];
    array1[loc + 30] = loc;
}

LiqinWeng · 2023-10-31T00:25:56Z

Also, why did you drop the limitation to having one constant operand again?
For this code scenario, if the constants are not extracted in the instcombine stage, the base address needs to be recalculated the second time the array is accessed. How transform to this scenario
code：
command: clang --target=riscv64 shadd.c -O3 -emit-llvm -S -o -

void test1(unsigned int array1[50], unsigned int a, unsigned int b) {
    unsigned int loc;
    loc = a + 5;
    array1[loc] = b;
    array1[loc + 1] = array1[loc];
    array1[loc + 30] = loc;
}

@goldsteinn @nikic

LiqinWeng · 2023-11-02T09:49:37Z

ping

LiqinWeng · 2023-11-16T08:50:08Z

Are there any issues with this patch? Can it be merge？
@nikic

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

llvm/test/Transforms/InstCombine/array.ll

polly/test/Support/dumpmodule.ll

LiqinWeng · 2023-11-24T07:10:55Z

friendly ping @nikic

nikic

This looks fine to me. @topperc Any concerns about this? I think you reported some RISCV regressions for the main add->gep change, so this might make things worse...

llvm/test/Transforms/InstCombine/array.ll

LiqinWeng · 2023-11-28T08:11:29Z

friendly ping O(∩_∩)O @nikic

nikic

LGTM

brunodf-snps · 2024-04-22T09:24:33Z

We observe that the add + gep -> gep + gep rewriting introduced in D155688 (by @d-smirnov and @paulwalker-arm) and continued in #69581 here, breaks the LoopFlatten pass on loops with the following pattern from coremark:

for (int i = 0; i < n; i++)
  for (int j = 0; j < n; j++)
     ... read/write A[i*n + j] ...

Godbolt link: https://godbolt.org/z/j3dKqj757 where clang/LLVM 18.1.0 has no loop flattening on 32 bit (bottom right output).

(See similar report from @DragonDisciple which explains that the rewrite requires dropping inbounds from the original gep which blocks the optimization of the loop.)

It would seem from bug 40581 and from test pr40581.ll that @LebedevRI @sjoerdmeijer explicitly intended LoopFlatten to support this loop pattern, but unfortunately, there is no test with a complete optimization pipeline for such a loop, except for llvm/test/Transforms/PhaseOrdering/AArch64/loopflatten.ll.

But since the latter test is only for a 64 bit target (where there is a sext to convert an int to the pointer index type), it did not fire at the time of D155688. Based on the discussion above, it seemed that the test did fire here, but it was fixed (inadvertently?) when @nikic requested that the rewriting here be limited to the case where the RHS of the add is a constant (so the sext folds away).

That explains why loop flattening is not broken on 64 bit in the godbolt example above (j is not constant so the rewrite does not trigger), but D155688 did not have this restriction, and this gives rise to the situation that add + gep is only canonicalized to gep + gep based on a complex combination of conditions. Or it is at least very hard to explain under what conditions LLVM can still perform loop flattening of the above loop.

nikic · 2024-04-25T06:36:38Z

@brunodf-snps Assuming this is also an inbounds preservation problem as in the other reports, I think it should be possible to retain it in this case based on https://alive2.llvm.org/ce/z/wVFHGk. As far as I can see we do know that the add operands are non-negative here. I'll take a look at this.

Edit: Basically what nikic@a7b153c does.

brunodf-snps · 2024-04-25T20:24:21Z

@nikic Thanks! When the geps would retain the inbounds property, the loop flatten pass would indeed succeed:
https://godbolt.org/z/K64q46ah1
I shortly tried your patch with my C loop as input, but did not get it to retain the inbounds, even when the loop variables are unsigned. What is a scenario where you expect isKnownNonNegative to be derived?

nikic · 2024-04-26T03:23:42Z

Patch for the inbounds preservation: #90160

@nikic Thanks! When the geps would retain the inbounds property, the loop flatten pass would indeed succeed: https://godbolt.org/z/K64q46ah1 I shortly tried your patch with my C loop as input, but did not get it to retain the inbounds, even when the loop variables are unsigned. What is a scenario where you expect isKnownNonNegative to be derived?

The problem here is that the IR at the time of the first InstCombine run is in non-rotated form (https://gist.github.com/nikic/e634b018bf63ae01c648516326f89faa). Lateron, we can very easily see that %n is non-negative, because there is an explicit condition for it in IR. This early, it's theoretically possible to derive this, but likely isn't feasible for compile-time reasons. So this may be a dead end for this example.

Worth noting that since #78576 LoopFlatten will still apply without inbounds via loop versioning, so this is not a question of whether flattening occurs, just of whether it requires versioning or not.

brunodf-snps · 2024-04-26T10:43:11Z

@nikic Thanks for the clarification. If I first perform loop rotation (https://gist.github.com/brunodf-snps/4a045f596a4c568fdeda86f691d9d118), I indeed see that inbounds is retained when instcombine rewrites the gep.

Worth noting that since #78576 LoopFlatten will still apply without inbounds via loop versioning, so this is not a question of whether flattening occurs, just of whether it requires versioning or not.

Thanks for pointing this out. The loop versioning was added after LLVM 18 (see https://godbolt.org/z/xdYdYs3nn) so I had not noticed it yet.

LiqinWeng requested a review from david-salinas October 19, 2023 10:03

LiqinWeng requested a review from nikic as a code owner October 19, 2023 10:03

llvmbot added backend:AMDGPU llvm:transforms labels Oct 19, 2023

LiqinWeng force-pushed the addgep-to-gepandgep branch from 12a4f4c to d8160f8 Compare October 20, 2023 03:37

LiqinWeng requested review from Meinersbur and goldsteinn October 20, 2023 03:38

nikic reviewed Oct 20, 2023

View reviewed changes

LiqinWeng force-pushed the addgep-to-gepandgep branch 2 times, most recently from 91304e5 to 7ce04ce Compare October 20, 2023 12:32

LiqinWeng force-pushed the addgep-to-gepandgep branch 2 times, most recently from b4135f1 to feee18a Compare October 20, 2023 13:20

LiqinWeng force-pushed the addgep-to-gepandgep branch from feee18a to e41ce7f Compare October 21, 2023 02:33

LiqinWeng force-pushed the addgep-to-gepandgep branch from e41ce7f to acc62e5 Compare October 22, 2023 04:56

goldsteinn reviewed Oct 22, 2023

View reviewed changes

LiqinWeng force-pushed the addgep-to-gepandgep branch 2 times, most recently from 3053d0b to 886815c Compare October 23, 2023 14:08

nikic reviewed Oct 26, 2023

View reviewed changes

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp Outdated Show resolved Hide resolved

nikic reviewed Oct 26, 2023

View reviewed changes

polly/test/Support/dumpmodule.ll Outdated Show resolved Hide resolved

nikic reviewed Oct 26, 2023

View reviewed changes

llvm/test/Transforms/PhaseOrdering/AArch64/loopflatten.ll Outdated Show resolved Hide resolved

LiqinWeng force-pushed the addgep-to-gepandgep branch 2 times, most recently from 6d0d41b to c75ad5c Compare October 29, 2023 10:57

LiqinWeng force-pushed the addgep-to-gepandgep branch 2 times, most recently from f50b93c to b5651df Compare October 30, 2023 02:52

LiqinWeng closed this Oct 30, 2023

LiqinWeng reopened this Oct 30, 2023

LiqinWeng force-pushed the addgep-to-gepandgep branch from b5651df to 844f1e7 Compare November 7, 2023 01:19

LiqinWeng requested a review from topperc November 7, 2023 01:46

nikic reviewed Nov 16, 2023

View reviewed changes

LiqinWeng force-pushed the addgep-to-gepandgep branch from 844f1e7 to 063aa73 Compare November 19, 2023 08:23

nikic reviewed Nov 24, 2023

View reviewed changes

llvm/test/Transforms/InstCombine/array.ll Outdated Show resolved Hide resolved

llvm/test/Transforms/InstCombine/array.ll Outdated Show resolved Hide resolved

llvm/test/Transforms/InstCombine/array.ll Outdated Show resolved Hide resolved

[InstCombine] Canonicalise SextADD + GEP

93b460b

LiqinWeng force-pushed the addgep-to-gepandgep branch from 063aa73 to 93b460b Compare November 28, 2023 08:08

nikic approved these changes Nov 28, 2023

View reviewed changes

LiqinWeng merged commit f7247d5 into llvm:main Nov 29, 2023
3 checks passed

kiranchandramohan mentioned this pull request Apr 19, 2024

missed optimization : gep + gep -> add + gep #78214

Open

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[InstCombine] Canonicalise SextADD + GEP #69581

[InstCombine] Canonicalise SextADD + GEP #69581

LiqinWeng commented Oct 19, 2023 •

edited

llvmbot commented Oct 19, 2023 •

edited

goldsteinn commented Oct 19, 2023

LiqinWeng commented Oct 20, 2023

LiqinWeng commented Oct 20, 2023 •

edited

nikic left a comment

LiqinWeng commented Oct 20, 2023 •

edited

LiqinWeng commented Oct 21, 2023 •

edited

nikic commented Oct 21, 2023

LiqinWeng commented Oct 22, 2023

goldsteinn Oct 22, 2023

nikic Oct 22, 2023

LiqinWeng Oct 23, 2023 •

edited

goldsteinn commented Oct 22, 2023

LiqinWeng commented Oct 25, 2023

LiqinWeng commented Oct 29, 2023

LiqinWeng commented Oct 30, 2023

LiqinWeng commented Oct 30, 2023

LiqinWeng commented Oct 31, 2023 •

edited

LiqinWeng commented Nov 2, 2023

LiqinWeng commented Nov 16, 2023

LiqinWeng commented Nov 24, 2023

nikic left a comment

LiqinWeng commented Nov 28, 2023

nikic left a comment

brunodf-snps commented Apr 22, 2024

nikic commented Apr 25, 2024 •

edited

brunodf-snps commented Apr 25, 2024

nikic commented Apr 26, 2024

brunodf-snps commented Apr 26, 2024

[InstCombine] Canonicalise SextADD + GEP #69581

[InstCombine] Canonicalise SextADD + GEP #69581

Conversation

LiqinWeng commented Oct 19, 2023 • edited

llvmbot commented Oct 19, 2023 • edited

goldsteinn commented Oct 19, 2023

LiqinWeng commented Oct 20, 2023

LiqinWeng commented Oct 20, 2023 • edited

nikic left a comment

Choose a reason for hiding this comment

LiqinWeng commented Oct 20, 2023 • edited

LiqinWeng commented Oct 21, 2023 • edited

nikic commented Oct 21, 2023

LiqinWeng commented Oct 22, 2023

goldsteinn Oct 22, 2023

Choose a reason for hiding this comment

nikic Oct 22, 2023

Choose a reason for hiding this comment

LiqinWeng Oct 23, 2023 • edited

Choose a reason for hiding this comment

goldsteinn commented Oct 22, 2023

LiqinWeng commented Oct 25, 2023

LiqinWeng commented Oct 29, 2023

LiqinWeng commented Oct 30, 2023

LiqinWeng commented Oct 30, 2023

LiqinWeng commented Oct 31, 2023 • edited

LiqinWeng commented Nov 2, 2023

LiqinWeng commented Nov 16, 2023

LiqinWeng commented Nov 24, 2023

nikic left a comment

Choose a reason for hiding this comment

LiqinWeng commented Nov 28, 2023

nikic left a comment

Choose a reason for hiding this comment

brunodf-snps commented Apr 22, 2024

nikic commented Apr 25, 2024 • edited

brunodf-snps commented Apr 25, 2024

nikic commented Apr 26, 2024

brunodf-snps commented Apr 26, 2024

LiqinWeng commented Oct 19, 2023 •

edited

llvmbot commented Oct 19, 2023 •

edited

LiqinWeng commented Oct 20, 2023 •

edited

LiqinWeng commented Oct 20, 2023 •

edited

LiqinWeng commented Oct 21, 2023 •

edited

LiqinWeng Oct 23, 2023 •

edited

LiqinWeng commented Oct 31, 2023 •

edited

nikic commented Apr 25, 2024 •

edited