diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 29b66a72d21df..c16d104c65c76 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -13856,45 +13856,52 @@ AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const { - assert(Op.getValueType().isFixedLengthVector() && + EVT VT = Op.getValueType(); + assert(VT.isFixedLengthVector() && "Only cases that extract a fixed length vector are supported!"); - EVT InVT = Op.getOperand(0).getValueType(); - unsigned Idx = Op.getConstantOperandVal(1); - unsigned Size = Op.getValueSizeInBits(); // If we don't have legal types yet, do nothing - if (!DAG.getTargetLoweringInfo().isTypeLegal(InVT)) + if (!isTypeLegal(InVT)) return SDValue(); - if (InVT.isScalableVector()) { - // This will be matched by custom code during ISelDAGToDAG. - if (Idx == 0 && isPackedVectorType(InVT, DAG)) + if (InVT.is128BitVector()) { + assert(VT.is64BitVector() && "Extracting unexpected vector type!"); + unsigned Idx = Op.getConstantOperandVal(1); + + // This will get lowered to an appropriate EXTRACT_SUBREG in ISel. + if (Idx == 0) return Op; - return SDValue(); + // If this is extracting the upper 64-bits of a 128-bit vector, we match + // that directly. + if (Idx * InVT.getScalarSizeInBits() == 64 && Subtarget->isNeonAvailable()) + return Op; } - // This will get lowered to an appropriate EXTRACT_SUBREG in ISel. - if (Idx == 0 && InVT.getSizeInBits() <= 128) - return Op; - - // If this is extracting the upper 64-bits of a 128-bit vector, we match - // that directly. - if (Size == 64 && Idx * InVT.getScalarSizeInBits() == 64 && - InVT.getSizeInBits() == 128 && Subtarget->isNeonAvailable()) - return Op; - - if (useSVEForFixedLengthVectorVT(InVT, !Subtarget->isNeonAvailable())) { + if (InVT.isScalableVector() || + useSVEForFixedLengthVectorVT(InVT, !Subtarget->isNeonAvailable())) { SDLoc DL(Op); + SDValue Vec = Op.getOperand(0); + SDValue Idx = Op.getOperand(1); - EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT); - SDValue NewInVec = - convertToScalableVector(DAG, ContainerVT, Op.getOperand(0)); + EVT PackedVT = getPackedSVEVectorVT(InVT.getVectorElementType()); + if (PackedVT != InVT) { + // Pack input into the bottom part of an SVE register and try again. + SDValue Container = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PackedVT, + DAG.getUNDEF(PackedVT), Vec, + DAG.getVectorIdxConstant(0, DL)); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Container, Idx); + } + + // This will get matched by custom code during ISelDAGToDAG. + if (isNullConstant(Idx)) + return Op; - SDValue Splice = DAG.getNode(ISD::VECTOR_SPLICE, DL, ContainerVT, NewInVec, - NewInVec, DAG.getConstant(Idx, DL, MVT::i64)); - return convertFromScalableVector(DAG, Op.getValueType(), Splice); + assert(InVT.isScalableVector() && "Unexpected vector type!"); + // Move requested subvector to the start of the vector and try again. + SDValue Splice = DAG.getNode(ISD::VECTOR_SPLICE, DL, InVT, Vec, Vec, Idx); + return convertFromScalableVector(DAG, VT, Splice); } return SDValue(); diff --git a/llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll index e91aac430110c..641050ae69d9b 100644 --- a/llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll @@ -143,15 +143,8 @@ define <4 x float> @extract_v4f32_nxv16f32_12( %arg) { define <2 x float> @extract_v2f32_nxv16f32_2( %arg) { ; CHECK-LABEL: extract_v2f32_nxv16f32_2: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: ldr d0, [sp, #8] -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %ext = call <2 x float> @llvm.vector.extract.v2f32.nxv16f32( %arg, i64 2) ret <2 x float> %ext @@ -274,15 +267,8 @@ define <4 x i3> @extract_v4i3_nxv32i3_16( %arg) { define <2 x i32> @extract_v2i32_nxv16i32_2( %arg) { ; CHECK-LABEL: extract_v2i32_nxv16i32_2: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: ldr d0, [sp, #8] -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %ext = call <2 x i32> @llvm.vector.extract.v2i32.nxv16i32( %arg, i64 2) ret <2 x i32> %ext @@ -314,16 +300,9 @@ define <4 x half> @extract_v4f16_nxv2f16_0( %arg) { ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: cntd x8 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: addpl x9, sp, #6 -; CHECK-NEXT: subs x8, x8, #4 -; CHECK-NEXT: csel x8, xzr, x8, lo -; CHECK-NEXT: st1h { z0.d }, p0, [sp, #3, mul vl] -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: csel x8, x8, xzr, lo -; CHECK-NEXT: lsl x8, x8, #1 -; CHECK-NEXT: ldr d0, [x9, x8] +; CHECK-NEXT: st1h { z0.d }, p0, [sp] +; CHECK-NEXT: ldr d0, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -338,17 +317,12 @@ define <4 x half> @extract_v4f16_nxv2f16_4( %arg) { ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: cntd x8 -; CHECK-NEXT: mov w9, #4 // =0x4 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: subs x8, x8, #4 -; CHECK-NEXT: csel x8, xzr, x8, lo -; CHECK-NEXT: st1h { z0.d }, p0, [sp, #3, mul vl] -; CHECK-NEXT: cmp x8, #4 -; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: addpl x9, sp, #6 -; CHECK-NEXT: lsl x8, x8, #1 -; CHECK-NEXT: ldr d0, [x9, x8] +; CHECK-NEXT: ptrue p1.h +; CHECK-NEXT: st1h { z0.d }, p0, [sp] +; CHECK-NEXT: ld1h { z0.h }, p1/z, [sp] +; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll index 88268104889fd..b05b46a75b698 100644 --- a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll @@ -15,20 +15,8 @@ define <2 x i64> @extract_v2i64_nxv2i64( %vec) nounwind { define <2 x i64> @extract_v2i64_nxv2i64_idx2( %vec) nounwind { ; CHECK-LABEL: extract_v2i64_nxv2i64_idx2: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntd x8 -; CHECK-NEXT: mov w9, #2 // =0x2 -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: sub x8, x8, #2 -; CHECK-NEXT: cmp x8, #2 -; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: lsl x8, x8, #3 -; CHECK-NEXT: ldr q0, [x9, x8] -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ext z0.b, z0.b, z0.b, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %retval = call <2 x i64> @llvm.vector.extract.v2i64.nxv2i64( %vec, i64 2) ret <2 x i64> %retval @@ -48,20 +36,8 @@ define <4 x i32> @extract_v4i32_nxv4i32( %vec) nounwind { define <4 x i32> @extract_v4i32_nxv4i32_idx4( %vec) nounwind { ; CHECK-LABEL: extract_v4i32_nxv4i32_idx4: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntw x8 -; CHECK-NEXT: mov w9, #4 // =0x4 -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: sub x8, x8, #4 -; CHECK-NEXT: cmp x8, #4 -; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: lsl x8, x8, #2 -; CHECK-NEXT: ldr q0, [x9, x8] -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ext z0.b, z0.b, z0.b, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %retval = call <4 x i32> @llvm.vector.extract.v4i32.nxv4i32( %vec, i64 4) ret <4 x i32> %retval @@ -82,18 +58,9 @@ define <4 x i32> @extract_v4i32_nxv2i32( %vec) nounwind #1 { define <4 x i32> @extract_v4i32_nxv2i32_idx4( %vec) nounwind #1 { ; CHECK-LABEL: extract_v4i32_nxv2i32_idx4: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov x8, #4 // =0x4 -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: ptrue p1.d, vl4 -; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: ld1d { z0.d }, p1/z, [x9, x8, lsl #3] +; CHECK-NEXT: ext z0.b, z0.b, z0.b, #32 ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %retval = call <4 x i32> @llvm.vector.extract.v4i32.nxv2i32( %vec, i64 4) ret <4 x i32> %retval @@ -113,20 +80,8 @@ define <8 x i16> @extract_v8i16_nxv8i16( %vec) nounwind { define <8 x i16> @extract_v8i16_nxv8i16_idx8( %vec) nounwind { ; CHECK-LABEL: extract_v8i16_nxv8i16_idx8: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cnth x8 -; CHECK-NEXT: mov w9, #8 // =0x8 -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: sub x8, x8, #8 -; CHECK-NEXT: cmp x8, #8 -; CHECK-NEXT: st1h { z0.h }, p0, [sp] -; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: lsl x8, x8, #1 -; CHECK-NEXT: ldr q0, [x9, x8] -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ext z0.b, z0.b, z0.b, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %retval = call <8 x i16> @llvm.vector.extract.v8i16.nxv8i16( %vec, i64 8) ret <8 x i16> %retval @@ -147,18 +102,9 @@ define <8 x i16> @extract_v8i16_nxv4i16( %vec) nounwind #1 { define <8 x i16> @extract_v8i16_nxv4i16_idx8( %vec) nounwind #1 { ; CHECK-LABEL: extract_v8i16_nxv4i16_idx8: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov x8, #8 // =0x8 -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: ptrue p1.s, vl8 -; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: ld1w { z0.s }, p1/z, [x9, x8, lsl #2] +; CHECK-NEXT: ext z0.b, z0.b, z0.b, #32 ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %retval = call <8 x i16> @llvm.vector.extract.v8i16.nxv4i16( %vec, i64 8) ret <8 x i16> %retval @@ -180,19 +126,10 @@ define <8 x i16> @extract_v8i16_nxv2i16( %vec) nounwind #1 { define <8 x i16> @extract_v8i16_nxv2i16_idx8( %vec) nounwind #1 { ; CHECK-LABEL: extract_v8i16_nxv2i16_idx8: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov x8, #8 // =0x8 -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: ptrue p1.d, vl8 -; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: ld1d { z0.d }, p1/z, [x9, x8, lsl #3] +; CHECK-NEXT: ext z0.b, z0.b, z0.b, #64 ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %retval = call <8 x i16> @llvm.vector.extract.v8i16.nxv2i16( %vec, i64 8) ret <8 x i16> %retval @@ -212,19 +149,8 @@ define <16 x i8> @extract_v16i8_nxv16i8( %vec) nounwind { define <16 x i8> @extract_v16i8_nxv16i8_idx16( %vec) nounwind { ; CHECK-LABEL: extract_v16i8_nxv16i8_idx16: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: rdvl x8, #1 -; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov w9, #16 // =0x10 -; CHECK-NEXT: sub x8, x8, #16 -; CHECK-NEXT: cmp x8, #16 -; CHECK-NEXT: st1b { z0.b }, p0, [sp] -; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: ldr q0, [x9, x8] -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ext z0.b, z0.b, z0.b, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %retval = call <16 x i8> @llvm.vector.extract.v16i8.nxv16i8( %vec, i64 16) ret <16 x i8> %retval @@ -245,18 +171,9 @@ define <16 x i8> @extract_v16i8_nxv8i8( %vec) nounwind #1 { define <16 x i8> @extract_v16i8_nxv8i8_idx16( %vec) nounwind #1 { ; CHECK-LABEL: extract_v16i8_nxv8i8_idx16: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov x8, #16 // =0x10 -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: ptrue p1.h, vl16 -; CHECK-NEXT: st1h { z0.h }, p0, [sp] -; CHECK-NEXT: ld1h { z0.h }, p1/z, [x9, x8, lsl #1] +; CHECK-NEXT: ext z0.b, z0.b, z0.b, #32 ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %retval = call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8( %vec, i64 16) ret <16 x i8> %retval @@ -278,19 +195,10 @@ define <16 x i8> @extract_v16i8_nxv4i8( %vec) nounwind #1 { define <16 x i8> @extract_v16i8_nxv4i8_idx16( %vec) nounwind #1 { ; CHECK-LABEL: extract_v16i8_nxv4i8_idx16: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov x8, #16 // =0x10 -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: ptrue p1.s, vl16 -; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: ld1w { z0.s }, p1/z, [x9, x8, lsl #2] +; CHECK-NEXT: ext z0.b, z0.b, z0.b, #64 ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %retval = call <16 x i8> @llvm.vector.extract.v16i8.nxv4i8( %vec, i64 16) ret <16 x i8> %retval @@ -313,17 +221,11 @@ define <16 x i8> @extract_v16i8_nxv2i8( %vec) nounwind #1 { define <16 x i8> @extract_v16i8_nxv2i8_idx16( %vec) nounwind #1 { ; CHECK-LABEL: extract_v16i8_nxv2i8_idx16: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] +; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128 ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %retval = call <16 x i8> @llvm.vector.extract.v16i8.nxv2i8( %vec, i64 16) ret <16 x i8> %retval @@ -434,13 +336,8 @@ define <16 x i1> @extract_v16i1_nxv16i1( %inmask) { define <2 x i64> @extract_fixed_v2i64_nxv2i64( %vec) nounwind #0 { ; CHECK-LABEL: extract_fixed_v2i64_nxv2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: ldr q0, [sp, #16] -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ext z0.b, z0.b, z0.b, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %retval = call <2 x i64> @llvm.vector.extract.v2i64.nxv2i64( %vec, i64 2) ret <2 x i64> %retval @@ -449,14 +346,9 @@ define <2 x i64> @extract_fixed_v2i64_nxv2i64( %vec) nounwind define void @extract_fixed_v4i64_nxv2i64( %vec, ptr %p) nounwind #0 { ; CHECK-LABEL: extract_fixed_v4i64_nxv2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ext z0.b, z0.b, z0.b, #32 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: st1d { z0.d }, p0, [x0] -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %retval = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64( %vec, i64 4) store <4 x i64> %retval, ptr %p diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll index b285659258f31..a9b52c93006df 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll @@ -177,18 +177,19 @@ define void @ucvtf_v8i16_v8f64(ptr %a, ptr %b) { ; CHECK-NEXT: uunpklo z1.s, z0.h ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: uunpklo z2.d, z1.s -; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 -; CHECK-NEXT: uunpklo z3.d, z0.s +; CHECK-NEXT: mov z3.d, z1.d +; CHECK-NEXT: uunpklo z2.d, z0.s ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 +; CHECK-NEXT: ext z3.b, z3.b, z1.b, #8 ; CHECK-NEXT: uunpklo z1.d, z1.s -; CHECK-NEXT: ucvtf z2.d, p0/m, z2.d ; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: ucvtf z3.d, p0/m, z3.d +; CHECK-NEXT: uunpklo z3.d, z3.s +; CHECK-NEXT: ucvtf z2.d, p0/m, z2.d ; CHECK-NEXT: ucvtf z1.d, p0/m, z1.d ; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d -; CHECK-NEXT: stp q2, q1, [x1] -; CHECK-NEXT: stp q3, q0, [x1, #32] +; CHECK-NEXT: ucvtf z3.d, p0/m, z3.d +; CHECK-NEXT: stp q1, q3, [x1] +; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: ret %op1 = load <8 x i16>, ptr %a %res = uitofp <8 x i16> %op1 to <8 x double> @@ -750,18 +751,19 @@ define void @scvtf_v8i16_v8f64(ptr %a, ptr %b) { ; CHECK-NEXT: sunpklo z1.s, z0.h ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: sunpklo z0.s, z0.h -; CHECK-NEXT: sunpklo z2.d, z1.s -; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 -; CHECK-NEXT: sunpklo z3.d, z0.s +; CHECK-NEXT: mov z3.d, z1.d +; CHECK-NEXT: sunpklo z2.d, z0.s ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 +; CHECK-NEXT: ext z3.b, z3.b, z1.b, #8 ; CHECK-NEXT: sunpklo z1.d, z1.s -; CHECK-NEXT: scvtf z2.d, p0/m, z2.d ; CHECK-NEXT: sunpklo z0.d, z0.s -; CHECK-NEXT: scvtf z3.d, p0/m, z3.d +; CHECK-NEXT: sunpklo z3.d, z3.s +; CHECK-NEXT: scvtf z2.d, p0/m, z2.d ; CHECK-NEXT: scvtf z1.d, p0/m, z1.d ; CHECK-NEXT: scvtf z0.d, p0/m, z0.d -; CHECK-NEXT: stp q2, q1, [x1] -; CHECK-NEXT: stp q3, q0, [x1, #32] +; CHECK-NEXT: scvtf z3.d, p0/m, z3.d +; CHECK-NEXT: stp q1, q3, [x1] +; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: ret %op1 = load <8 x i16>, ptr %a %res = sitofp <8 x i16> %op1 to <8 x double>