diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index d6b4dce3834fb..acc85ceb693eb 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5130,8 +5130,14 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) { Instruction *Update = cast( cast(Ptr)->getIncomingValueForBlock(Latch)); - ScalarPtrs.insert(Update); - return; + + // If there is more than one user of Update (Ptr), we shouldn't assume it + // will be scalar after vectorisation as other users of the instruction + // may require widening. Otherwise, add it to ScalarPtrs. + if (Update->hasOneUse() && cast(*Update->user_begin()) == Ptr) { + ScalarPtrs.insert(Update); + return; + } } // We only care about bitcast and getelementptr instructions contained in // the loop. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll index 62271b89f6fb1..c2a7f3edea0c9 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -116,6 +116,51 @@ exit: ; preds = %loop.body ret void } +define void @pointer_induction(i8* noalias %start, i64 %N) { +; CHECK-LABEL: @pointer_induction( +; CHECK: vector.ph: +; CHECK: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8* [[START:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[INDEX1]], i32 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = add shufflevector ( insertelement ( poison, i64 0, i32 0), poison, zeroinitializer), [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = add [[DOTSPLAT]], [[TMP6]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[START]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], 0 +; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, i8* [[START]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], 1 +; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, i8* [[START]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[NEXT_GEP3]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to * +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP12]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, [[NEXT_GEP]], i64 1 +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq [[TMP13]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; +entry: + br label %for.body + +for.body: + %ptr.phi = phi i8* [ %ptr.phi.next, %for.body ], [ %start, %entry ] + %index = phi i64 [ %index_nxt, %for.body ], [ 0, %entry ] + %index_nxt = add i64 %index, 1 + %0 = load i8, i8* %ptr.phi, align 1 + %ptr.phi.next = getelementptr inbounds i8, i8* %ptr.phi, i64 1 + %cmp.i.not = icmp eq i8* %ptr.phi.next, %start + %cmp = icmp ult i64 %index, %N + br i1 %cmp, label %for.body, label %end, !llvm.loop !0 + +end: + ret void +} attributes #0 = {"target-features"="+sve"}