diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 0f7afa2fc25c2..f98d15c285a69 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -15959,7 +15959,7 @@ class HorizontalReduction { LLVM_DEBUG(dbgs() << "SLP: Found cost = " << Cost << " for reduction\n"); if (!Cost.isValid()) - return nullptr; + break; if (Cost >= -SLPCostThreshold) { V.getORE()->emit([&]() { return OptimizationRemarkMissed( diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/partial-vec-invalid-cost.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/partial-vec-invalid-cost.ll new file mode 100644 index 0000000000000..31f16801b7a64 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/partial-vec-invalid-cost.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -passes=slp-vectorizer -S | FileCheck %s + +target triple = "riscv64-unknown-linux-gnu" + +define void @partial_vec_invalid_cost() #0 { +; CHECK-LABEL: define void @partial_vec_invalid_cost( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LSHR_1:%.*]] = lshr i96 0, 0 +; CHECK-NEXT: [[LSHR_2:%.*]] = lshr i96 0, 0 +; CHECK-NEXT: [[TRUNC_I96_1:%.*]] = trunc i96 [[LSHR_1]] to i32 +; CHECK-NEXT: [[TRUNC_I96_2:%.*]] = trunc i96 [[LSHR_2]] to i32 +; CHECK-NEXT: [[TRUNC_I96_3:%.*]] = trunc i96 0 to i32 +; CHECK-NEXT: [[TRUNC_I96_4:%.*]] = trunc i96 0 to i32 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> zeroinitializer) +; CHECK-NEXT: [[OP_RDX:%.*]] = or i32 [[TMP0]], [[TRUNC_I96_2]] +; CHECK-NEXT: [[OP_RDX1:%.*]] = or i32 [[TRUNC_I96_1]], [[TRUNC_I96_3]] +; CHECK-NEXT: [[OP_RDX2:%.*]] = or i32 [[OP_RDX]], [[OP_RDX1]] +; CHECK-NEXT: [[OP_RDX3:%.*]] = or i32 [[OP_RDX2]], [[TRUNC_I96_4]] +; CHECK-NEXT: [[STORE_THIS:%.*]] = zext i32 [[OP_RDX3]] to i96 +; CHECK-NEXT: store i96 [[STORE_THIS]], ptr null, align 16 +; CHECK-NEXT: ret void +; +entry: + + %lshr.1 = lshr i96 0, 0 ; These ops + %lshr.2 = lshr i96 0, 0 ; return an + %add.0 = add i96 0, 0 ; invalid + %add.1 = add i96 0, 0 ; vector cost. + + %trunc.i96.1 = trunc i96 %lshr.1 to i32 ; These ops + %trunc.i96.2 = trunc i96 %lshr.2 to i32 ; return an + %trunc.i96.3 = trunc i96 %add.0 to i32 ; invalid + %trunc.i96.4 = trunc i96 %add.1 to i32 ; vector cost. + + %or.0 = or i32 %trunc.i96.1, %trunc.i96.2 + %or.1 = or i32 %or.0, %trunc.i96.3 + %or.2 = or i32 %or.1, %trunc.i96.4 + + %zext.0 = zext i1 0 to i32 ; These + %zext.1 = zext i1 0 to i32 ; ops + %zext.2 = zext i1 0 to i32 ; are + %zext.3 = zext i1 0 to i32 ; vectorized + + %or.3 = or i32 %or.2, %zext.0 ; users + %or.4 = or i32 %or.3, %zext.1 ; of + %or.5 = or i32 %or.4, %zext.2 ; vectorized + %or.6 = or i32 %or.5, %zext.3 ; ops + + %store.this = zext i32 %or.6 to i96 + + store i96 %store.this, ptr null, align 16 + ret void +} + +attributes #0 = { "target-features"="+v" }