diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 6b122e19c0169..099f871f10cad 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1311,14 +1311,15 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) { // Skip recipes that aren't single scalars or don't have only their // scalar results used. In the latter case, we would introduce extra // broadcasts. - if (!vputils::isSingleScalar(RepOrWidenR) || + if ((!vputils::isSingleScalar(RepOrWidenR) && + !vputils::onlyFirstLaneUsed(RepOrWidenR)) || + RepOrWidenR->getNumUsers() == 0 || !all_of(RepOrWidenR->users(), [RepOrWidenR](const VPUser *U) { - return U->usesScalars(RepOrWidenR) || + return U->usesScalars(RepOrWidenR) || isa(U) || match(cast(U), m_ExtractLastElement(m_VPValue())); })) continue; - auto *Clone = new VPReplicateRecipe(RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(), true /*IsSingleScalar*/); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index 884eeac09e1e3..d26a33add96ed 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -428,7 +428,7 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt ; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]] ; DEFAULT: [[VECTOR_BODY]]: -; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE37:.*]] ] +; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE35:.*]] ] ; DEFAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META8:![0-9]+]] ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT28:%.*]] = insertelement <4 x i32> poison, i32 [[TMP9]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT29:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT28]], <4 x i32> poison, <4 x i32> zeroinitializer @@ -436,40 +436,37 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP19]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; DEFAULT-NEXT: [[TMP6:%.*]] = or <4 x i32> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT29]] -; DEFAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[C]], align 4, !alias.scope [[META13:![0-9]+]] -; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT30:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0 -; DEFAULT-NEXT: [[BROADCAST_SPLAT31:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT30]], <4 x i32> poison, <4 x i32> zeroinitializer -; DEFAULT-NEXT: [[TMP8:%.*]] = icmp ugt <4 x i32> [[BROADCAST_SPLAT31]], [[TMP6]] +; DEFAULT-NEXT: [[TMP10:%.*]] = load i32, ptr [[C]], align 4, !alias.scope [[META13:![0-9]+]] +; DEFAULT-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0 +; DEFAULT-NEXT: [[TMP8:%.*]] = icmp ugt i32 [[TMP10]], [[TMP7]] +; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP8]], i64 0 +; DEFAULT-NEXT: [[BROADCAST_SPLAT37:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT36]], <4 x i1> poison, <4 x i32> zeroinitializer ; DEFAULT-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[D]], i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP8]], i32 0 -; DEFAULT-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; DEFAULT-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; DEFAULT: [[PRED_STORE_IF]]: ; DEFAULT-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0 ; DEFAULT-NEXT: store i32 [[TMP11]], ptr [[E]], align 4, !alias.scope [[META15:![0-9]+]], !noalias [[META17:![0-9]+]] ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE]] ; DEFAULT: [[PRED_STORE_CONTINUE]]: -; DEFAULT-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP8]], i32 1 -; DEFAULT-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF32:.*]], label %[[PRED_STORE_CONTINUE33:.*]] -; DEFAULT: [[PRED_STORE_IF32]]: +; DEFAULT-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF30:.*]], label %[[PRED_STORE_CONTINUE31:.*]] +; DEFAULT: [[PRED_STORE_IF30]]: ; DEFAULT-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0 ; DEFAULT-NEXT: store i32 [[TMP13]], ptr [[E]], align 4, !alias.scope [[META15]], !noalias [[META17]] +; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE31]] +; DEFAULT: [[PRED_STORE_CONTINUE31]]: +; DEFAULT-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF32:.*]], label %[[PRED_STORE_CONTINUE33:.*]] +; DEFAULT: [[PRED_STORE_IF32]]: +; DEFAULT-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0 +; DEFAULT-NEXT: store i32 [[TMP15]], ptr [[E]], align 4, !alias.scope [[META15]], !noalias [[META17]] ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE33]] ; DEFAULT: [[PRED_STORE_CONTINUE33]]: -; DEFAULT-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP8]], i32 2 -; DEFAULT-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF34:.*]], label %[[PRED_STORE_CONTINUE35:.*]] +; DEFAULT-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF34:.*]], label %[[PRED_STORE_CONTINUE35]] ; DEFAULT: [[PRED_STORE_IF34]]: -; DEFAULT-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0 -; DEFAULT-NEXT: store i32 [[TMP15]], ptr [[E]], align 4, !alias.scope [[META15]], !noalias [[META17]] -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE35]] -; DEFAULT: [[PRED_STORE_CONTINUE35]]: -; DEFAULT-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP8]], i32 3 -; DEFAULT-NEXT: br i1 [[TMP21]], label %[[PRED_STORE_IF36:.*]], label %[[PRED_STORE_CONTINUE37]] -; DEFAULT: [[PRED_STORE_IF36]]: ; DEFAULT-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0 ; DEFAULT-NEXT: store i32 [[TMP22]], ptr [[E]], align 4, !alias.scope [[META15]], !noalias [[META17]] -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE37]] -; DEFAULT: [[PRED_STORE_CONTINUE37]]: -; DEFAULT-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr [[TMP16]], i32 4, <4 x i1> [[TMP8]]), !alias.scope [[META19:![0-9]+]], !noalias [[META20:![0-9]+]] +; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE35]] +; DEFAULT: [[PRED_STORE_CONTINUE35]]: +; DEFAULT-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr [[TMP16]], i32 4, <4 x i1> [[BROADCAST_SPLAT37]]), !alias.scope [[META19:![0-9]+]], !noalias [[META20:![0-9]+]] ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; DEFAULT-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] @@ -658,15 +655,15 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; COMMON-NEXT: store i8 6, ptr [[TMP6]], align 1 ; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE12]] ; COMMON: [[PRED_STORE_CONTINUE12]]: -; COMMON-NEXT: br i1 false, label %[[PRED_STORE_IF13:.*]], label %[[EXIT:.*]] +; COMMON-NEXT: br i1 false, label %[[PRED_STORE_IF13:.*]], label %[[EXIT1:.*]] ; COMMON: [[PRED_STORE_IF13]]: ; COMMON-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 7 ; COMMON-NEXT: store i8 7, ptr [[TMP7]], align 1 -; COMMON-NEXT: br label %[[EXIT]] -; COMMON: [[EXIT]]: +; COMMON-NEXT: br label %[[EXIT1]] +; COMMON: [[EXIT1]]: ; COMMON-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; COMMON: [[MIDDLE_BLOCK]]: -; COMMON-NEXT: br [[EXIT1:label %.*]] +; COMMON-NEXT: br [[EXIT:label %.*]] ; COMMON: [[SCALAR_PH]]: ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll index efcd810203a44..379825bbbab88 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll @@ -16,7 +16,7 @@ ; CM: vector.ph: ; CM: CLONE ir<%a> = extractvalue ir<%sv> ; CM: CLONE ir<%b> = extractvalue ir<%sv> -; CM: WIDEN ir<%add> = add ir<%a>, ir<%b> +; CM: CLONE ir<%add> = add ir<%a>, ir<%b> ; CM: Successor(s): vector loop ; CM: LV: Scalar loop costs: 5. @@ -30,17 +30,15 @@ define void @test1(ptr %dst, {i64, i64} %sv) { ; FORCED-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; FORCED: [[VECTOR_PH]]: ; FORCED-NEXT: [[TMP0:%.*]] = extractvalue { i64, i64 } [[SV]], 0 -; FORCED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0 -; FORCED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; FORCED-NEXT: [[TMP4:%.*]] = extractvalue { i64, i64 } [[SV]], 1 -; FORCED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i64 0 +; FORCED-NEXT: [[TMP5:%.*]] = add i64 [[TMP0]], [[TMP4]] +; FORCED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i64 0 ; FORCED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer -; FORCED-NEXT: [[TMP1:%.*]] = add <2 x i64> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]] ; FORCED-NEXT: br label %[[VECTOR_BODY:.*]] ; FORCED: [[VECTOR_BODY]]: ; FORCED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[DST]], i32 [[INDEX]] -; FORCED-NEXT: store <2 x i64> [[TMP1]], ptr [[TMP2]], align 4 +; FORCED-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP2]], align 4 ; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll index 0c6a490ddf4ba..eceda0897b174 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll @@ -17,17 +17,15 @@ define void @widen_extractvalue(ptr %dst, {i64, i64} %sv) #0 { ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 1000, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 1000, [[N_MOD_VF]] ; CHECK-NEXT: [[EXTRACT0:%.*]] = extractvalue { i64, i64 } [[SV]], 0 -; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement poison, i64 [[EXTRACT0]], i64 0 -; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector [[DOTSPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { i64, i64 } [[SV]], 1 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[EXTRACT0]], [[TMP10]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = add [[DOTSPLAT2]], [[BROADCAST_SPLAT2]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[DST]], i32 [[INDEX]] -; CHECK-NEXT: store [[TMP7]], ptr [[TMP8]], align 8 +; CHECK-NEXT: store [[BROADCAST_SPLAT2]], ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll index e014b9ddbcd0e..95e68d6cc300a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll @@ -304,9 +304,9 @@ define void @test_phi_in_latch_redundant(ptr %dst, i32 %a) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[A]], i64 0 +; CHECK-NEXT: [[TMP0:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[TMP0]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = xor [[BROADCAST_SPLAT]], splat (i32 -1) ; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP6]], splat (i64 9) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] @@ -320,7 +320,7 @@ define void @test_phi_in_latch_redundant(ptr %dst, i32 %a) { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[DST]], [[VEC_IND]] -; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( [[TMP19]], align 4 [[TMP16]], splat (i1 true), i32 [[TMP8]]) +; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( [[BROADCAST_SPLAT]], align 4 [[TMP16]], splat (i1 true), i32 [[TMP8]]) ; CHECK-NEXT: [[TMP17:%.*]] = zext i32 [[TMP8]] to i64 ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP17]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] diff --git a/llvm/test/Transforms/LoopVectorize/pr50686.ll b/llvm/test/Transforms/LoopVectorize/pr50686.ll index 14bcfde5b4423..dbe7f5874f53e 100644 --- a/llvm/test/Transforms/LoopVectorize/pr50686.ll +++ b/llvm/test/Transforms/LoopVectorize/pr50686.ll @@ -26,12 +26,13 @@ define void @m(ptr nocapture %p, ptr nocapture %p2, i32 %q) { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[TMP2]], [[BROADCAST_SPLAT3]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX9_2]], align 4, !alias.scope [[META0]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX9_2]], align 4, !alias.scope [[META0]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = sub nsw i32 [[TMP9]], [[TMP6]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <4 x i32> [[TMP4]], [[BROADCAST_SPLAT5]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP7]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]] +; CHECK-NEXT: store <4 x i32> [[BROADCAST_SPLAT5]], ptr [[TMP7]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 60 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]