diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 2047de281c6c7..954f83efb174d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -664,6 +664,9 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR, properlyDominates(Previous, SinkCandidate, VPDT)) return true; + if (SinkCandidate->mayHaveSideEffects()) + return false; + WorkList.push_back(SinkCandidate); return true; }; @@ -674,6 +677,7 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR, VPRecipeBase *Current = WorkList[I]; assert(Current->getNumDefinedValues() == 1 && "only recipes with a single defined value expected"); + for (VPUser *User : Current->getVPSingleValue()->users()) { if (auto *R = dyn_cast(User)) if (!TryToPushSinkCandidate(R)) diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll index 99331dd2266aa..59a997a5f3f6b 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll @@ -108,3 +108,33 @@ loop: exit: ret void } + +; This test has two FORs (for.x and for.y) where incoming value from the previous +; iteration (for.x.prev) of one FOR (for.y) depends on another FOR (for.x). Due to +; this dependency all uses of the former FOR (for.y) should be sunk after +; incoming value from the previous iteration (for.x.prev) of te latter FOR (for.y). +; That means side-effecting user (store i64 %for.y.i64, ptr %gep) of the latter +; FOR (for.y) should be moved which is not currently supported. +define i32 @test_chained_first_order_recurrences_4(ptr %base) { +; CHECK-LABEL: 'test_chained_first_order_recurrences_4' +; CHECK: No VPlan could be built for + +entry: + br label %loop + +ret: + ret i32 0 + +loop: + %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] + %for.x = phi i64 [ %for.x.next, %loop ], [ 0, %entry ] + %for.y = phi i32 [ %for.x.prev, %loop ], [ 0, %entry ] + %iv.next = add i64 %iv, 1 + %gep = getelementptr i64, ptr %base, i64 %iv + %for.x.prev = trunc i64 %for.x to i32 + %for.y.i64 = sext i32 %for.y to i64 + store i64 %for.y.i64, ptr %gep + %for.x.next = mul i64 0, 0 + %icmp = icmp ugt i64 %iv, 4096 + br i1 %icmp, label %ret, label %loop +} diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll index aea49ac225de7..3d07e2cb2bc4c 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll @@ -221,55 +221,22 @@ exit: define void @test_pr54233_for_depend_on_each_other(ptr noalias %a, ptr noalias %b) { ; CHECK-LABEL: @test_pr54233_for_depend_on_each_other( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; CHECK: vector.ph: -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[BROADCAST_SPLAT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B:%.*]], align 4 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR1]], <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP2]], -; CHECK-NEXT: [[TMP4]] = xor <4 x i32> , [[TMP2]] -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP4]], <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = shl <4 x i32> [[TMP2]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], -; CHECK-NEXT: [[TMP8:%.*]] = and <4 x i32> [[TMP7]], [[TMP3]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0 -; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr [[TMP10]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1001, 1000 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i32> [[BROADCAST_SPLAT]], i32 3 -; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[SCALAR_RECUR4:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT3]], [[SCALAR_PH]] ], [ [[FOR_2_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[OR:%.*]] = or i32 [[SCALAR_RECUR4]], 10 -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[SCALAR_RECUR4]], [[SCALAR_RECUR]] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[FOR_1:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[FOR_2:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[FOR_2_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[FOR_2]], 10 +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[FOR_2]], [[FOR_1]] ; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[SHL]], 255 ; CHECK-NEXT: [[AND:%.*]] = and i32 [[XOR]], [[OR]] -; CHECK-NEXT: [[FOR_1_NEXT]] = xor i32 12, [[SCALAR_RECUR4]] -; CHECK-NEXT: [[FOR_2_NEXT]] = load i32, ptr [[B]], align 4 -; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[FOR_1_NEXT]] = xor i32 12, [[FOR_2]] +; CHECK-NEXT: [[FOR_2_NEXT]] = load i32, ptr [[B:%.*]], align 4 +; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IV]] ; CHECK-NEXT: store i32 [[AND]], ptr [[A_GEP]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 1000 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ;