diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 852196e589c59..61339f6f32af3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1449,6 +1449,14 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) { return; } + // Replace splice(A, A, -1) with A if A is uniform. + if (match(Def, m_VPInstruction( + m_VPValue(A), m_Deferred(A))) && + vputils::isSingleScalar(A)) { + Def->replaceAllUsesWith(A); + return; + } + // Look through ExtractLastLane. if (match(Def, m_ExtractLastLane(m_VPValue(A)))) { if (match(A, m_BuildVector())) { diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll index f2c0ca30a6c18..5c7dcd716e766 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll @@ -85,16 +85,15 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; VSCALEFORTUNING2: [[VECTOR_BODY]]: ; VSCALEFORTUNING2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VSCALEFORTUNING2-NEXT: [[VECTOR_RECUR:%.*]] = phi [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[BROADCAST_SPLAT7:%.*]], %[[VECTOR_BODY]] ] -; VSCALEFORTUNING2-NEXT: [[VECTOR_RECUR4:%.*]] = phi [ [[VECTOR_RECUR_INIT3]], %[[VECTOR_PH]] ], [ [[TMP26:%.*]], %[[VECTOR_BODY]] ] +; VSCALEFORTUNING2-NEXT: [[VECTOR_RECUR4:%.*]] = phi [ [[VECTOR_RECUR_INIT3]], %[[VECTOR_PH]] ], [ [[BROADCAST_SPLAT7]], %[[VECTOR_BODY]] ] ; VSCALEFORTUNING2-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP47:%.*]], %[[VECTOR_BODY]] ] ; VSCALEFORTUNING2-NEXT: [[VEC_PHI5:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP48:%.*]], %[[VECTOR_BODY]] ] ; VSCALEFORTUNING2-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP8]], align 4 ; VSCALEFORTUNING2-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement poison, i32 [[TMP24]], i64 0 ; VSCALEFORTUNING2-NEXT: [[BROADCAST_SPLAT7]] = shufflevector [[BROADCAST_SPLATINSERT6]], poison, zeroinitializer ; VSCALEFORTUNING2-NEXT: [[TMP25:%.*]] = call @llvm.vector.splice.nxv4i32( [[VECTOR_RECUR]], [[BROADCAST_SPLAT7]], i32 -1) -; VSCALEFORTUNING2-NEXT: [[TMP26]] = call @llvm.vector.splice.nxv4i32( [[BROADCAST_SPLAT7]], [[BROADCAST_SPLAT7]], i32 -1) ; VSCALEFORTUNING2-NEXT: [[TMP27:%.*]] = call @llvm.vector.splice.nxv4i32( [[VECTOR_RECUR4]], [[TMP25]], i32 -1) -; VSCALEFORTUNING2-NEXT: [[TMP28:%.*]] = call @llvm.vector.splice.nxv4i32( [[TMP25]], [[TMP26]], i32 -1) +; VSCALEFORTUNING2-NEXT: [[TMP28:%.*]] = call @llvm.vector.splice.nxv4i32( [[TMP25]], [[BROADCAST_SPLAT7]], i32 -1) ; VSCALEFORTUNING2-NEXT: [[TMP29:%.*]] = or [[TMP27]], [[BROADCAST_SPLAT]] ; VSCALEFORTUNING2-NEXT: [[TMP30:%.*]] = or [[TMP28]], [[BROADCAST_SPLAT]] ; VSCALEFORTUNING2-NEXT: [[TMP31:%.*]] = shl [[TMP29]], splat (i32 1) @@ -125,21 +124,17 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; VSCALEFORTUNING2: [[MIDDLE_BLOCK]]: ; VSCALEFORTUNING2-NEXT: [[BIN_RDX:%.*]] = or [[TMP48]], [[TMP47]] ; VSCALEFORTUNING2-NEXT: [[TMP50:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32( [[BIN_RDX]]) -; VSCALEFORTUNING2-NEXT: [[TMP51:%.*]] = call i32 @llvm.vscale.i32() -; VSCALEFORTUNING2-NEXT: [[TMP52:%.*]] = mul nuw i32 [[TMP51]], 4 -; VSCALEFORTUNING2-NEXT: [[TMP53:%.*]] = sub i32 [[TMP52]], 1 -; VSCALEFORTUNING2-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement [[TMP26]], i32 [[TMP53]] ; VSCALEFORTUNING2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; VSCALEFORTUNING2-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; VSCALEFORTUNING2: [[SCALAR_PH]]: ; VSCALEFORTUNING2-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP24]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; VSCALEFORTUNING2-NEXT: [[SCALAR_RECUR_INIT11:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; VSCALEFORTUNING2-NEXT: [[SCALAR_RECUR_INIT11:%.*]] = phi i32 [ [[TMP24]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; VSCALEFORTUNING2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; VSCALEFORTUNING2-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP50]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; VSCALEFORTUNING2-NEXT: br label %[[LOOP:.*]] ; VSCALEFORTUNING2: [[LOOP]]: -; VSCALEFORTUNING2-NEXT: [[TMP54:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[TMP57:%.*]], %[[LOOP]] ] -; VSCALEFORTUNING2-NEXT: [[TMP55:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT11]], %[[SCALAR_PH]] ], [ [[TMP54]], %[[LOOP]] ] +; VSCALEFORTUNING2-NEXT: [[TMP51:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[TMP57:%.*]], %[[LOOP]] ] +; VSCALEFORTUNING2-NEXT: [[TMP55:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT11]], %[[SCALAR_PH]] ], [ [[TMP51]], %[[LOOP]] ] ; VSCALEFORTUNING2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; VSCALEFORTUNING2-NEXT: [[SUM_RED:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RED_2:%.*]], %[[LOOP]] ] ; VSCALEFORTUNING2-NEXT: [[TMP56:%.*]] = add i64 [[Y]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll index cf2e7ccd1b2f0..8739928d1c747 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll @@ -8,9 +8,6 @@ define i8 @recurrence_phi_with_same_incoming_values_after_simplifications(i8 %fo ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[FOR_START]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLAT]], <4 x i8> [[BROADCAST_SPLAT]], <4 x i32> ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -31,18 +28,14 @@ define i8 @recurrence_phi_with_same_incoming_values_after_simplifications(i8 %fo ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP6]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP7]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP8]] -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i8> [[TMP0]], i32 0 -; CHECK-NEXT: store i8 [[TMP17]], ptr [[TMP9]], align 1 -; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i8> [[TMP0]], i32 1 -; CHECK-NEXT: store i8 [[TMP18]], ptr [[TMP10]], align 1 -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i8> [[TMP0]], i32 2 -; CHECK-NEXT: store i8 [[TMP19]], ptr [[TMP11]], align 1 -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i8> [[TMP0]], i32 3 -; CHECK-NEXT: store i8 [[TMP20]], ptr [[TMP12]], align 1 -; CHECK-NEXT: store i8 [[TMP17]], ptr [[TMP13]], align 1 -; CHECK-NEXT: store i8 [[TMP18]], ptr [[TMP14]], align 1 -; CHECK-NEXT: store i8 [[TMP19]], ptr [[TMP15]], align 1 -; CHECK-NEXT: store i8 [[TMP20]], ptr [[TMP16]], align 1 +; CHECK-NEXT: store i8 [[FOR_START]], ptr [[TMP9]], align 1 +; CHECK-NEXT: store i8 [[FOR_START]], ptr [[TMP10]], align 1 +; CHECK-NEXT: store i8 [[FOR_START]], ptr [[TMP11]], align 1 +; CHECK-NEXT: store i8 [[FOR_START]], ptr [[TMP12]], align 1 +; CHECK-NEXT: store i8 [[FOR_START]], ptr [[TMP13]], align 1 +; CHECK-NEXT: store i8 [[FOR_START]], ptr [[TMP14]], align 1 +; CHECK-NEXT: store i8 [[FOR_START]], ptr [[TMP15]], align 1 +; CHECK-NEXT: store i8 [[FOR_START]], ptr [[TMP16]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT]], -8 ; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]