diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 400cb1ecb5e03..41149bcc25b35 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -164,6 +164,9 @@ class IndVarSimplify { bool sinkUnusedInvariants(Loop *L); + bool rewritePtrIncrementWithOffsetAddressing( + Loop *L, SmallVectorImpl &DeadInsts); + public: IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, const DataLayout &DL, TargetLibraryInfo *TLI, @@ -2039,6 +2042,111 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { return Changed; } +bool IndVarSimplify::rewritePtrIncrementWithOffsetAddressing( + Loop *L, SmallVectorImpl &DeadInsts) { + SmallVector LoopPhis( + llvm::make_pointer_range(L->getHeader()->phis())); + bool Changed = false; + + if (!L->getLatchCmpInst()) + return false; + + auto IsPtrToOffsetAddressingCandidate = [&](PHINode *PHI) -> bool { + if (PHI->user_empty()) + return false; + + if (PHI->hasConstantValue()) + return false; + + if (L->getCanonicalInductionVariable() == PHI) + return false; + + // We are only concerned with simple PHI nodes + // with two incoming values. + if (PHI->getNumIncomingValues() != 2) + return false; + + return true; + }; + + auto NonInvariantPHIOp = [&](Value *Op0, Value *Op1) -> Value * { + // One of the two incoming values must be loop invariant + // but not both + if (!(L->isLoopInvariant(Op0) != L->isLoopInvariant(Op1))) + return nullptr; + + return (L->isLoopInvariant(Op0)) ? Op1 : Op0; + }; + + auto IsConstantIncrement = [&](GetElementPtrInst *GEP) -> bool { + if (GEP->getNumIndices() != 1) + return false; + + if (GEP->hasAllConstantIndices()) + return true; + + for (Value *V : GEP->indices()) + if (!L->isLoopInvariant(V)) + return false; + + return true; + }; + + auto AdjustWidth = [&](Value *V, IRBuilder<> &IR, unsigned TargetWidth, + bool IsSigned) -> Value * { + if (V->getType()->getIntegerBitWidth() == TargetWidth) + return V; + if (IsSigned) + return IR.CreateSExt(V, IntegerType::get(V->getContext(), TargetWidth)); + return IR.CreateZExt(V, IntegerType::get(V->getContext(), TargetWidth)); + }; + + IRBuilder<> Builder(&*L->getHeader()->getFirstInsertionPt()); + for (auto *PHI : LoopPhis) { + if (!IsPtrToOffsetAddressingCandidate(PHI)) + continue; + + Value *CanonicalIV = L->getCanonicalInductionVariable(); + if (!CanonicalIV) + continue; + + Value *Op0 = PHI->getIncomingValue(0); + Value *Op1 = PHI->getIncomingValue(1); + Value *LoopDependentIncomingVal = NonInvariantPHIOp(Op0, Op1); + if (!LoopDependentIncomingVal) + continue; + + if (auto *LoopStrideGEPInst = + dyn_cast(LoopDependentIncomingVal)) { + Value *InvariantIncomingVal = + (Op0 == LoopDependentIncomingVal) ? Op1 : Op0; + if (!dyn_cast(InvariantIncomingVal)) + continue; + + if (!IsConstantIncrement(LoopStrideGEPInst)) + continue; + + // Replace PHI with offset addressing GEP + Value *Stride = LoopStrideGEPInst->getOperand(1); + bool IsSigned = L->getLatchCmpInst()->isSigned(); + unsigned MaxWidth = std::max(CanonicalIV->getType()->getIntegerBitWidth(), + Stride->getType()->getIntegerBitWidth()); + + CanonicalIV = AdjustWidth(CanonicalIV, Builder, MaxWidth, IsSigned); + Stride = AdjustWidth(Stride, Builder, MaxWidth, IsSigned); + + Value *Mul = Builder.CreateMul(CanonicalIV, Stride); + Value *NewGEP = + Builder.CreateInBoundsGEP(LoopStrideGEPInst->getResultElementType(), + InvariantIncomingVal, ArrayRef(Mul)); + PHI->replaceAllUsesWith(NewGEP); + DeadInsts.emplace_back(PHI); + Changed = true; + } + } + return Changed; +} + //===----------------------------------------------------------------------===// // IndVarSimplify driver. Manage several subpasses of IV simplification. //===----------------------------------------------------------------------===// @@ -2116,6 +2224,9 @@ bool IndVarSimplify::run(Loop *L) { SE->forgetLoop(L); } + // Try to rewrite ptr increments with ptr offset addressing + Changed |= rewritePtrIncrementWithOffsetAddressing(L, DeadInsts); + // If we have a trip count expression, rewrite the loop's exit condition // using it. if (!DisableLFTR) { diff --git a/llvm/test/Transforms/IndVarSimplify/rewrite-ptr-addr-with-offset-addr.ll b/llvm/test/Transforms/IndVarSimplify/rewrite-ptr-addr-with-offset-addr.ll new file mode 100644 index 0000000000000..c08696f51b076 --- /dev/null +++ b/llvm/test/Transforms/IndVarSimplify/rewrite-ptr-addr-with-offset-addr.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -passes=indvars -S -o - | FileCheck %s + + +define void @mat_transpose(float* %pIn, float* %pOut, i32 %nRows, i32 %nCols) { +; CHECK-LABEL: define void @mat_transpose( +; CHECK-SAME: ptr [[PIN:%.*]], ptr [[POUT:%.*]], i32 [[NROWS:%.*]], i32 [[NCOLS:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP_ROW:.*]] +; CHECK: [[LOOP_ROW]]: +; CHECK-NEXT: [[ROW_IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[ROW_INC:%.*]], %[[LOOP_ROW_END:.*]] ] +; CHECK-NEXT: [[CMP_ROW:%.*]] = icmp ult i32 [[ROW_IV]], [[NROWS]] +; CHECK-NEXT: br i1 [[CMP_ROW]], label %[[LOOP_COL_PRE:.*]], label %[[EXIT:.*]] +; CHECK: [[LOOP_COL_PRE]]: +; CHECK-NEXT: [[PX_BASE:%.*]] = getelementptr inbounds float, ptr [[POUT]], i32 [[ROW_IV]] +; CHECK-NEXT: br label %[[LOOP_COL:.*]] +; CHECK: [[LOOP_COL]]: +; CHECK-NEXT: [[COL_IV:%.*]] = phi i32 [ 0, %[[LOOP_COL_PRE]] ], [ [[COL_INC:%.*]], %[[LOOP_COL]] ] +; CHECK-NEXT: [[PIN_PTR:%.*]] = phi ptr [ [[PIN]], %[[LOOP_COL_PRE]] ], [ [[PIN_NEXT:%.*]], %[[LOOP_COL]] ] +; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[COL_IV]], [[NROWS]] +; CHECK-NEXT: [[PX_PTR:%.*]] = getelementptr inbounds float, ptr [[PX_BASE]], i32 [[TMP0]] +; CHECK-NEXT: [[PIN_LOAD:%.*]] = load float, ptr [[PIN_PTR]], align 4 +; CHECK-NEXT: [[PIN_NEXT]] = getelementptr float, ptr [[PIN_PTR]], i32 1 +; CHECK-NEXT: store float [[PIN_LOAD]], ptr [[PX_PTR]], align 4 +; CHECK-NEXT: [[COL_INC]] = add nuw i32 [[COL_IV]], 1 +; CHECK-NEXT: [[CMP_COL:%.*]] = icmp ult i32 [[COL_INC]], [[NCOLS]] +; CHECK-NEXT: br i1 [[CMP_COL]], label %[[LOOP_COL]], label %[[LOOP_ROW_END]] +; CHECK: [[LOOP_ROW_END]]: +; CHECK-NEXT: [[ROW_INC]] = add nuw i32 [[ROW_IV]], 1 +; CHECK-NEXT: br label %[[LOOP_ROW]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop_row + +loop_row: + %row.iv = phi i32 [ 0, %entry ], [ %row.inc, %loop_row_end ] + %cmp.row = icmp ult i32 %row.iv, %nRows + br i1 %cmp.row, label %loop_col_pre, label %exit + +loop_col_pre: + %px.base = getelementptr inbounds float, float* %pOut, i32 %row.iv + br label %loop_col + +loop_col: + %col.iv = phi i32 [ 0, %loop_col_pre ], [ %col.inc, %loop_col ] + %pIn.ptr = phi float* [ %pIn, %loop_col_pre ], [ %pIn.next, %loop_col ] + %px.ptr = phi float* [ %px.base, %loop_col_pre ], [ %px.next, %loop_col ] + %pIn.load = load float, float* %pIn.ptr + %pIn.next = getelementptr float, float* %pIn.ptr, i32 1 + store float %pIn.load, float* %px.ptr + %px.next = getelementptr float, float* %px.ptr, i32 %nRows + %col.inc = add nuw i32 %col.iv, 1 + %cmp.col = icmp ult i32 %col.inc, %nCols + br i1 %cmp.col, label %loop_col, label %loop_row_end + +loop_row_end: + %row.inc = add nuw i32 %row.iv, 1 + br label %loop_row + +exit: + ret void +}