diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h index 0222b0aa81063..3f2111b349b25 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h @@ -74,7 +74,7 @@ inline bool isSingleScalar(const VPValue *VPV) { all_of(Rep->operands(), isSingleScalar)); } if (isa(VPV)) + VPWidenSelectRecipe, VPWidenCastRecipe>(VPV)) return all_of(VPV->getDefiningRecipe()->operands(), isSingleScalar); if (auto *WidenR = dyn_cast(VPV)) { return PreservesUniformity(WidenR->getOpcode()) && diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll index 1dcd665817196..8d610eb9a337c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll @@ -76,15 +76,15 @@ define void @test_predicated_load_cast_hint(ptr %dst.1, ptr %dst.2, ptr %src, i8 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[TMP28]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP25:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i64> +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i64> [[TMP25]], i32 0 +; CHECK-NEXT: [[TMP30:%.*]] = or i64 [[TMP29]], 1 ; CHECK-NEXT: [[TMP26:%.*]] = zext <4 x i8> [[VEC_IND]] to <4 x i64> ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 0 ; CHECK-NEXT: br i1 [[TMP27]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: ; CHECK-NEXT: [[TMP102:%.*]] = extractelement <4 x i64> [[TMP26]], i32 0 ; CHECK-NEXT: [[TMP103:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP102]], i64 [[OFF]] -; CHECK-NEXT: [[TMP104:%.*]] = extractelement <4 x i64> [[TMP25]], i32 0 -; CHECK-NEXT: [[TMP105:%.*]] = or i64 [[TMP104]], 1 -; CHECK-NEXT: store i64 [[TMP105]], ptr [[TMP103]], align 8, !alias.scope [[META3]] +; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP103]], align 8, !alias.scope [[META3]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 1 @@ -92,9 +92,7 @@ define void @test_predicated_load_cast_hint(ptr %dst.1, ptr %dst.2, ptr %src, i8 ; CHECK: [[PRED_STORE_IF17]]: ; CHECK-NEXT: [[TMP108:%.*]] = extractelement <4 x i64> [[TMP26]], i32 1 ; CHECK-NEXT: [[TMP109:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP108]], i64 [[OFF]] -; CHECK-NEXT: [[TMP110:%.*]] = extractelement <4 x i64> [[TMP25]], i32 1 -; CHECK-NEXT: [[TMP111:%.*]] = or i64 [[TMP110]], 1 -; CHECK-NEXT: store i64 [[TMP111]], ptr [[TMP109]], align 8, !alias.scope [[META3]] +; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP109]], align 8, !alias.scope [[META3]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE18]] ; CHECK: [[PRED_STORE_CONTINUE18]]: ; CHECK-NEXT: [[TMP37:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 2 @@ -102,9 +100,7 @@ define void @test_predicated_load_cast_hint(ptr %dst.1, ptr %dst.2, ptr %src, i8 ; CHECK: [[PRED_STORE_IF19]]: ; CHECK-NEXT: [[TMP114:%.*]] = extractelement <4 x i64> [[TMP26]], i32 2 ; CHECK-NEXT: [[TMP115:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP114]], i64 [[OFF]] -; CHECK-NEXT: [[TMP116:%.*]] = extractelement <4 x i64> [[TMP25]], i32 2 -; CHECK-NEXT: [[TMP117:%.*]] = or i64 [[TMP116]], 1 -; CHECK-NEXT: store i64 [[TMP117]], ptr [[TMP115]], align 8, !alias.scope [[META3]] +; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP115]], align 8, !alias.scope [[META3]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE20]] ; CHECK: [[PRED_STORE_CONTINUE20]]: ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 3 @@ -112,9 +108,7 @@ define void @test_predicated_load_cast_hint(ptr %dst.1, ptr %dst.2, ptr %src, i8 ; CHECK: [[PRED_STORE_IF21]]: ; CHECK-NEXT: [[TMP120:%.*]] = extractelement <4 x i64> [[TMP26]], i32 3 ; CHECK-NEXT: [[TMP121:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP120]], i64 [[OFF]] -; CHECK-NEXT: [[TMP122:%.*]] = extractelement <4 x i64> [[TMP25]], i32 3 -; CHECK-NEXT: [[TMP123:%.*]] = or i64 [[TMP122]], 1 -; CHECK-NEXT: store i64 [[TMP123]], ptr [[TMP121]], align 8, !alias.scope [[META3]] +; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP121]], align 8, !alias.scope [[META3]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE22]] ; CHECK: [[PRED_STORE_CONTINUE22]]: ; CHECK-NEXT: store i8 0, ptr [[DST_2]], align 1, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll b/llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll index 9a699826696ec..6c09b02153b34 100644 --- a/llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll +++ b/llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll @@ -18,8 +18,9 @@ define void @minbw_cast(ptr %dst, i64 %n, i1 %bool1, i1 %bool2) { ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = trunc <4 x i32> [[BROADCAST_SPLAT2]] to <4 x i8> ; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i1> [[BROADCAST_SPLAT]] to <4 x i8> -; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i8> [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i8> [[TMP2]], i32 3 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i8> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i8> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = xor i8 [[TMP2]], [[TMP5]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]