diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 41d36e7d16d2e..2a3425a42607e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -26153,10 +26153,11 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { N1, N2); // Eliminate an intermediate insert into an undef vector: - // insert_subvector undef, (insert_subvector undef, X, 0), N2 --> - // insert_subvector undef, X, N2 + // insert_subvector undef, (insert_subvector undef, X, 0), 0 --> + // insert_subvector undef, X, 0 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR && - N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2))) + N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)) && + isNullConstant(N2)) return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0, N1.getOperand(1), N2); diff --git a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll index 57de8341cb89c..8a368e7161c3f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll @@ -505,6 +505,33 @@ define @insert_nxv2i64_nxv3i64(<3 x i64> %sv) #0 { ret %vec } +; This shows a case where we were miscompiling because the index of the +; outer expects a scalable inner and the inner most subvector is fixed length. +; The code generated happens to be correct if VLEN=128, but is wrong if +; VLEN=256. +define @insert_insert_combine(<2 x i32> %subvec) { +; CHECK-LABEL: insert_insert_combine: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v10, v8 +; CHECK-NEXT: ret + %inner = call @llvm.vector.insert.nxv4i32.v2i32( undef, <2 x i32> %subvec, i64 0) + %outer = call @llvm.vector.insert.nxv4i32.nxv8i32( undef, %inner, i64 4) + ret %outer +} + +; We can combine these two (even with non-zero index on the outer) because +; the vector must be an even multiple. +define @insert_insert_combine2( %subvec) { +; CHECK-LABEL: insert_insert_combine2: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v10, v8 +; CHECK-NEXT: ret + %inner = call @llvm.vector.insert.nxv2i32.nxv4i32( undef, %subvec, i64 0) + %outer = call @llvm.vector.insert.nxv4i32.nxv8i32( undef, %inner, i64 4) + ret %outer +} + + attributes #0 = { vscale_range(2,1024) } declare @llvm.vector.insert.nxv1i1.nxv4i1(, , i64) @@ -517,6 +544,9 @@ declare @llvm.vector.insert.nxv2f16.nxv32f16( @llvm.vector.insert.nxv1i8.nxv4i8(, , i64 %idx) +declare @llvm.vector.insert.nxv2i32.nxv4i32(, , i64) +declare @llvm.vector.insert.nxv4i32.v2i32(, <2 x i32>, i64) + declare @llvm.vector.insert.nxv2i32.nxv8i32(, , i64 %idx) declare @llvm.vector.insert.nxv4i32.nxv8i32(, , i64 %idx)