diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c7d619d35ff559..0f53d8dddc02e9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -23040,6 +23040,9 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit); while (StoreSDNode *Chain = dyn_cast(STChain->getChain())) { + if (Chain->getMemoryVT().isScalableVector()) + return false; + // If the chain has more than one use, then we can't reorder the mem ops. if (!SDValue(Chain, 0)->hasOneUse()) break; diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 870c4bf5e5a086..e28da9c37aa568 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -7715,6 +7715,12 @@ static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, EVT IdxVT = Idx.getValueType(); unsigned NElts = VecVT.getVectorMinNumElements(); if (VecVT.isScalableVector()) { + // If this is a constant index and we know the value is less than the + // minimum number of elements then it's safe to return Idx. + if (auto *IdxCst = dyn_cast(Idx)) + if (IdxCst->getZExtValue() < NElts) + return Idx; + SDValue VS = DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts)); diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll index a9941221c4b895..5e1b4b50e99dcd 100644 --- a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll +++ b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll @@ -12,15 +12,11 @@ define @splice_nxv16i8_first_idx( %a, @splice_nxv16i8_last_idx( %a, @splice_nxv8i16_first_idx( %a, @splice_nxv8i16_last_idx( %a, @splice_nxv4i32_first_idx( %a, @splice_nxv4i32_last_idx( %a, @splice_nxv2i64_first_idx( %a, @splice_nxv2i64_last_idx( %a, @splice_nxv8f16_first_idx( %a, @splice_nxv8f16_last_idx( %a, @splice_nxv4f32_first_idx( %a, < ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: cntw x9 -; CHECK-NEXT: sub x9, x9, #1 // =1 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: cmp x9, #0 // =0 ; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl] -; CHECK-NEXT: csel x9, x9, xzr, lo -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2] +; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -362,16 +319,12 @@ define @splice_nxv4f32_last_idx( %a, @splice_nxv2f64_first_idx( %a, ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: cntd x9 -; CHECK-NEXT: sub x9, x9, #1 // =1 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: cmp x9, #0 // =0 ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl] -; CHECK-NEXT: csel x9, x9, xzr, lo -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3] +; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -428,15 +377,12 @@ define @splice_nxv2f64_last_idx( %a, ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: cntd x9 -; CHECK-NEXT: sub x9, x9, #1 // =1 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: cmp x9, #1 // =1 ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl] -; CHECK-NEXT: csinc x9, x9, xzr, lo -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3] +; CHECK-NEXT: orr x8, x8, #0x8 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -473,17 +419,14 @@ define @splice_nxv2i1_idx( %a, @splice_nxv4i1_idx( %a, @splice_nxv8i1_idx( %a, @splice_nxv16i1_idx( %a, @splice_nxv2i8_idx( %a, @splice_nxv8i32_idx( %a, @test_nxv2i64_v8i64( %a, <8 x i64> % ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: cntd x8 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #0 // =0 -; CHECK-NEXT: csel x10, x8, xzr, lo ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: lsl x10, x10, #3 +; CHECK-NEXT: cntd x9 ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: str q1, [x9, x10] +; CHECK-NEXT: str q1, [sp] +; CHECK-NEXT: sub x9, x9, #1 // =1 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] -; CHECK-NEXT: mov w9, #2 -; CHECK-NEXT: cmp x8, #2 // =2 -; CHECK-NEXT: csel x9, x8, x9, lo +; CHECK-NEXT: mov w8, #2 +; CHECK-NEXT: cmp x9, #2 // =2 +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: addvl x10, sp, #1 -; CHECK-NEXT: lsl x9, x9, #3 +; CHECK-NEXT: lsl x8, x8, #3 ; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl] -; CHECK-NEXT: str q2, [x10, x9] +; CHECK-NEXT: str q2, [x10, x8] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #1, mul vl] -; CHECK-NEXT: mov w9, #4 -; CHECK-NEXT: cmp x8, #4 // =4 -; CHECK-NEXT: csel x9, x8, x9, lo +; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: cmp x9, #4 // =4 +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: addvl x10, sp, #2 -; CHECK-NEXT: lsl x9, x9, #3 +; CHECK-NEXT: lsl x8, x8, #3 ; CHECK-NEXT: st1d { z0.d }, p0, [sp, #2, mul vl] -; CHECK-NEXT: str q3, [x10, x9] +; CHECK-NEXT: str q3, [x10, x8] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #2, mul vl] -; CHECK-NEXT: mov w9, #6 -; CHECK-NEXT: cmp x8, #6 // =6 -; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov w8, #6 +; CHECK-NEXT: cmp x9, #6 // =6 +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: addvl x10, sp, #3 ; CHECK-NEXT: lsl x8, x8, #3 ; CHECK-NEXT: st1d { z0.d }, p0, [sp, #3, mul vl] @@ -57,6 +53,7 @@ define @test_nxv2i64_v8i64( %a, <8 x i64> % ; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret + %r = call @llvm.experimental.vector.insert.nxv2i64.v8i64( %a, <8 x i64> %b, i64 0) ret %r } @@ -73,35 +70,31 @@ define @test_nxv2f64_v8f64( %a, <8 x ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: cntd x8 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #0 // =0 -; CHECK-NEXT: csel x10, x8, xzr, lo ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: lsl x10, x10, #3 +; CHECK-NEXT: cntd x9 ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: str q1, [x9, x10] +; CHECK-NEXT: str q1, [sp] +; CHECK-NEXT: sub x9, x9, #1 // =1 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] -; CHECK-NEXT: mov w9, #2 -; CHECK-NEXT: cmp x8, #2 // =2 -; CHECK-NEXT: csel x9, x8, x9, lo +; CHECK-NEXT: mov w8, #2 +; CHECK-NEXT: cmp x9, #2 // =2 +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: addvl x10, sp, #1 -; CHECK-NEXT: lsl x9, x9, #3 +; CHECK-NEXT: lsl x8, x8, #3 ; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl] -; CHECK-NEXT: str q2, [x10, x9] +; CHECK-NEXT: str q2, [x10, x8] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #1, mul vl] -; CHECK-NEXT: mov w9, #4 -; CHECK-NEXT: cmp x8, #4 // =4 -; CHECK-NEXT: csel x9, x8, x9, lo +; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: cmp x9, #4 // =4 +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: addvl x10, sp, #2 -; CHECK-NEXT: lsl x9, x9, #3 +; CHECK-NEXT: lsl x8, x8, #3 ; CHECK-NEXT: st1d { z0.d }, p0, [sp, #2, mul vl] -; CHECK-NEXT: str q3, [x10, x9] +; CHECK-NEXT: str q3, [x10, x8] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #2, mul vl] -; CHECK-NEXT: mov w9, #6 -; CHECK-NEXT: cmp x8, #6 // =6 -; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov w8, #6 +; CHECK-NEXT: cmp x9, #6 // =6 +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: addvl x10, sp, #3 ; CHECK-NEXT: lsl x8, x8, #3 ; CHECK-NEXT: st1d { z0.d }, p0, [sp, #3, mul vl] @@ -110,6 +103,7 @@ define @test_nxv2f64_v8f64( %a, <8 x ; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret + %r = call @llvm.experimental.vector.insert.nxv2f64.v8f64( %a, <8 x double> %b, i64 0) ret %r } diff --git a/llvm/test/CodeGen/AArch64/sve-extract-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll index 62832a8cb80b07..5aa7eab619c023 100644 --- a/llvm/test/CodeGen/AArch64/sve-extract-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll @@ -17,15 +17,9 @@ define <2 x i64> @extract_v2i64_nxv2i64_idx1( %vec) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntd x8 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #1 // =1 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: csinc x8, x8, xzr, lo ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: lsl x8, x8, #3 -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: ldr q0, [x9, x8] +; CHECK-NEXT: ldur q0, [sp, #8] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -49,15 +43,9 @@ define <4 x i32> @extract_v4i32_nxv4i32_idx1( %vec) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntw x8 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #1 // =1 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: csinc x8, x8, xzr, lo ; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: lsl x8, x8, #2 -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: ldr q0, [x9, x8] +; CHECK-NEXT: ldur q0, [sp, #4] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -81,15 +69,9 @@ define <8 x i16> @extract_v8i16_nxv8i16_idx1( %vec) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cnth x8 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #1 // =1 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: csinc x8, x8, xzr, lo ; CHECK-NEXT: st1h { z0.h }, p0, [sp] -; CHECK-NEXT: lsl x8, x8, #1 -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: ldr q0, [x9, x8] +; CHECK-NEXT: ldur q0, [sp, #2] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -113,14 +95,9 @@ define <16 x i8> @extract_v16i8_nxv16i8_idx1( %vec) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: rdvl x8, #1 -; CHECK-NEXT: sub x8, x8, #1 // =1 ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: cmp x8, #1 // =1 ; CHECK-NEXT: st1b { z0.b }, p0, [sp] -; CHECK-NEXT: csinc x8, x8, xzr, lo -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: ldr q0, [x9, x8] +; CHECK-NEXT: ldur q0, [sp, #1] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll index 3950a660306d7c..dd4d646fdec2c5 100644 --- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -6,15 +6,9 @@ define @insert_v2i64_nxv2i64( %vec, <2 x i6 ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntd x8 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #0 // =0 -; CHECK-NEXT: csel x8, x8, xzr, lo ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: lsl x8, x8, #3 -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: str q1, [sp] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -28,15 +22,9 @@ define @insert_v2i64_nxv2i64_idx1( %vec, <2 ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntd x8 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #1 // =1 -; CHECK-NEXT: csinc x8, x8, xzr, lo ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: lsl x8, x8, #3 -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: stur q1, [sp, #8] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -50,15 +38,9 @@ define @insert_v4i32_nxv4i32( %vec, <4 x i3 ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntw x8 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #0 // =0 -; CHECK-NEXT: csel x8, x8, xzr, lo ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: lsl x8, x8, #2 -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: str q1, [sp] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -72,15 +54,9 @@ define @insert_v4i32_nxv4i32_idx1( %vec, <4 ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntw x8 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #1 // =1 -; CHECK-NEXT: csinc x8, x8, xzr, lo ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: lsl x8, x8, #2 -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: stur q1, [sp, #4] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -94,15 +70,9 @@ define @insert_v8i16_nxv8i16( %vec, <8 x i1 ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cnth x8 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #0 // =0 -; CHECK-NEXT: csel x8, x8, xzr, lo ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: lsl x8, x8, #1 -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1h { z0.h }, p0, [sp] -; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: str q1, [sp] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -116,15 +86,9 @@ define @insert_v8i16_nxv8i16_idx1( %vec, <8 ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cnth x8 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #1 // =1 -; CHECK-NEXT: csinc x8, x8, xzr, lo ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: lsl x8, x8, #1 -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1h { z0.h }, p0, [sp] -; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: stur q1, [sp, #2] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -138,14 +102,9 @@ define @insert_v16i8_nxv16i8( %vec, <16 x i ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: rdvl x8, #1 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #0 // =0 ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: csel x8, x8, xzr, lo -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1b { z0.b }, p0, [sp] -; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: str q1, [sp] ; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -159,14 +118,9 @@ define @insert_v16i8_nxv16i8_idx1( %vec, <1 ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: rdvl x8, #1 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #1 // =1 ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: csinc x8, x8, xzr, lo -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1b { z0.b }, p0, [sp] -; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: stur q1, [sp, #1] ; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll index 7978fb773c52af..f5606c4311db69 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll @@ -615,22 +615,12 @@ define void @insert_v2i64_nxv16i64_hi(<2 x i64>* %psv, * %out ; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: vsetivli a2, 2, e64,m1,ta,mu ; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: addi a0, sp, 80 +; CHECK-NEXT: vse64.v v25, (a0) ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: slli a2, a0, 4 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: addi a3, zero, 8 -; CHECK-NEXT: bltu a2, a3, .LBB29_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: addi a2, zero, 8 -; CHECK-NEXT: .LBB29_2: -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: add a2, a3, a2 -; CHECK-NEXT: vsetivli a4, 2, e64,m1,ta,mu -; CHECK-NEXT: vse64.v v25, (a2) -; CHECK-NEXT: slli a0, a0, 6 -; CHECK-NEXT: add a2, a3, a0 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: add a2, a2, a0 ; CHECK-NEXT: vl8re64.v v8, (a2) ; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vl8re64.v v16, (a2)