diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 2368d18b0373c..95d220d53a2a9 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -330,6 +330,8 @@ VPPartialReductionRecipe::computeCost(ElementCount VF, auto HandleWiden = [&](VPWidenRecipe *Widen) { if (match(Widen, m_Sub(m_ZeroInt(), m_VPValue(Op)))) { Widen = dyn_cast(Op->getDefiningRecipe()); + if (!Widen) + return; } Opcode = Widen->getOpcode(); VPRecipeBase *ExtAR = Widen->getOperand(0)->getDefiningRecipe(); @@ -355,10 +357,10 @@ VPPartialReductionRecipe::computeCost(ElementCount VF, ExtAType = GetExtendKind(OpR); } else if (isa(OpR)) { auto RedPhiOp1R = getOperand(1)->getDefiningRecipe(); - if (isa(RedPhiOp1R)) { + if (isa_and_nonnull(RedPhiOp1R)) { InputTypeA = Ctx.Types.inferScalarType(RedPhiOp1R->getOperand(0)); ExtAType = GetExtendKind(RedPhiOp1R); - } else if (auto Widen = dyn_cast(RedPhiOp1R)) + } else if (auto Widen = dyn_cast_if_present(RedPhiOp1R)) HandleWiden(Widen); } else if (auto Widen = dyn_cast(OpR)) { HandleWiden(Widen); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-constant-ops.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-constant-ops.ll index b033f6051f812..bf6967fecbc92 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-constant-ops.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-constant-ops.ll @@ -467,3 +467,61 @@ loop: exit: ret i32 %red.next } + +; Test case for https://github.com/llvm/llvm-project/issues/162902. +define void @partial_reduction_zext_const(i64 %arg, ptr %ptr) { +; CHECK-LABEL: define void @partial_reduction_zext_const( +; CHECK-SAME: i64 [[ARG:%.*]], ptr [[PTR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 100, [[ARG]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[ARG]], [[N_VEC]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i8> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP2]] = add <4 x i8> [[VEC_PHI]], splat (i8 2) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP4:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[TMP2]]) +; CHECK-NEXT: store i8 [[TMP4]], ptr [[PTR]], align 1 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[ARG]], %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i8 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[PARTIAL:%.*]] = phi i8 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[PARTIAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[ZERO_EXT:%.*]] = zext i2 -2 to i8 +; CHECK-NEXT: [[PARTIAL_NEXT]] = add i8 [[PARTIAL]], [[ZERO_EXT]] +; CHECK-NEXT: store i8 [[PARTIAL_NEXT]], ptr [[PTR]], align 1 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 100 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ %arg, %entry ], [ %iv.next, %loop ] + %partial = phi i8 [ 0, %entry ], [ %partial.next, %loop ] + %zero.ext = zext i2 2 to i8 + %partial.next = add i8 %partial, %zero.ext + store i8 %partial.next, ptr %ptr, align 1 + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +}