diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index d4124ae9aeff0..ee25f6918de8b 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -3139,8 +3139,8 @@ bool RISCVTTIImpl::isProfitableToSinkOperands( bool IsVPSplat = match(Op, m_Intrinsic( m_Value(), m_Value(), m_Value())); if (!IsVPSplat && - !match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()), - m_Undef(), m_ZeroMask()))) + !match(Op, m_Shuffle(m_InsertElt(m_Value(), m_Value(), m_ZeroInt()), + m_Value(), m_ZeroMask()))) continue; // Don't sink i1 splats. diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll index 02825b2bda484..19a184148c0b6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -6018,3 +6018,39 @@ vector.latch: ; preds = %for.body419 for.cond.cleanup: ; preds = %vector.latch ret void } + +;; This is exactly like sink_add_splat except that the splat has operands +;; which haven't been converted to undef. +define void @sink_non_canonical_splat(ptr nocapture %a, i32 signext %x) { +; CHECK-LABEL: sink_non_canonical_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a2, 1 +; CHECK-NEXT: add a2, a0, a2 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: .LBB131_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vadd.vx v8, v8, a1 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: bne a0, a2, .LBB131_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0 + %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i32, ptr %a, i64 %index + %wide.load = load <4 x i32>, ptr %0, align 4 + %1 = add <4 x i32> %wide.load, %broadcast.splat + store <4 x i32> %1, ptr %0, align 4 + %index.next = add nuw i64 %index, 4 + %2 = icmp eq i64 %index.next, 1024 + br i1 %2, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +}