diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 8d8280f5b86fe..069f30e912107 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -10719,6 +10719,10 @@ SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, unsigned Idx = cast(Op.getOperand(1))->getZExtValue(); unsigned Size = Op.getValueSizeInBits(); + // If we don't have legal types yet, do nothing + if (!DAG.getTargetLoweringInfo().isTypeLegal(InVT)) + return SDValue(); + if (InVT.isScalableVector()) { // This will be matched by custom code during ISelDAGToDAG. if (Idx == 0 && isPackedVectorType(InVT, DAG)) diff --git a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll index e65d29ed6c732..ff8be096cc410 100644 --- a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll @@ -67,6 +67,43 @@ define <4 x i32> @extract_v4i32_nxv4i32_idx4( %vec) nounwind { ret <4 x i32> %retval } +; Should codegen to uzps, since idx is zero and type is illegal. +define <4 x i32> @extract_v4i32_nxv2i32( %vec) nounwind #1 { +; CHECK-LABEL: extract_v4i32_nxv2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %retval = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv2i32( %vec, i64 0) + ret <4 x i32> %retval +} + +; Goes through memory currently; idx != 0. +define <4 x i32> @extract_v4i32_nxv2i32_idx4( %vec) nounwind #1 { +; CHECK-LABEL: extract_v4i32_nxv2i32_idx4: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntd x8 +; CHECK-NEXT: mov w9, #4 +; CHECK-NEXT: subs x8, x8, #4 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: csel x8, xzr, x8, lo +; CHECK-NEXT: st1d { z0.d }, p0, [sp] +; CHECK-NEXT: cmp x8, #4 +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9, x8, lsl #3] +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %retval = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv2i32( %vec, i64 4) + ret <4 x i32> %retval +} + ; Should codegen to a nop, since idx is zero. define <8 x i16> @extract_v8i16_nxv8i16( %vec) nounwind { ; CHECK-LABEL: extract_v8i16_nxv8i16: @@ -100,6 +137,82 @@ define <8 x i16> @extract_v8i16_nxv8i16_idx8( %vec) nounwind { ret <8 x i16> %retval } +; Should codegen to uzps, since idx is zero and type is illegal. +define <8 x i16> @extract_v8i16_nxv4i16( %vec) nounwind #1 { +; CHECK-LABEL: extract_v8i16_nxv4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %retval = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv4i16( %vec, i64 0) + ret <8 x i16> %retval +} + +; Goes through memory currently; idx != 0. +define <8 x i16> @extract_v8i16_nxv4i16_idx8( %vec) nounwind #1 { +; CHECK-LABEL: extract_v8i16_nxv4i16_idx8: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntw x8 +; CHECK-NEXT: mov w9, #8 +; CHECK-NEXT: subs x8, x8, #8 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: csel x8, xzr, x8, lo +; CHECK-NEXT: st1w { z0.s }, p0, [sp] +; CHECK-NEXT: cmp x8, #8 +; CHECK-NEXT: ptrue p0.s, vl8 +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x8, lsl #2] +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %retval = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv4i16( %vec, i64 8) + ret <8 x i16> %retval +} + +; Should codegen to uzps, since idx is zero and type is illegal. +define <8 x i16> @extract_v8i16_nxv2i16( %vec) nounwind #1 { +; CHECK-LABEL: extract_v8i16_nxv2i16: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %retval = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv2i16( %vec, i64 0) + ret <8 x i16> %retval +} + +; Goes through memory currently; idx != 0. +define <8 x i16> @extract_v8i16_nxv2i16_idx8( %vec) nounwind #1 { +; CHECK-LABEL: extract_v8i16_nxv2i16_idx8: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntd x8 +; CHECK-NEXT: mov w9, #8 +; CHECK-NEXT: subs x8, x8, #8 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: csel x8, xzr, x8, lo +; CHECK-NEXT: st1d { z0.d }, p0, [sp] +; CHECK-NEXT: cmp x8, #8 +; CHECK-NEXT: ptrue p0.d, vl8 +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9, x8, lsl #3] +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %retval = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv2i16( %vec, i64 8) + ret <8 x i16> %retval +} + ; Should codegen to a nop, since idx is zero. define <16 x i8> @extract_v16i8_nxv16i8( %vec) nounwind { ; CHECK-LABEL: extract_v16i8_nxv16i8: @@ -132,6 +245,121 @@ define <16 x i8> @extract_v16i8_nxv16i8_idx16( %vec) nounwind ret <16 x i8> %retval } +; Should codegen to uzps, since idx is zero and type is illegal. +define <16 x i8> @extract_v16i8_nxv8i8( %vec) nounwind #1 { +; CHECK-LABEL: extract_v16i8_nxv8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %retval = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv8i8( %vec, i64 0) + ret <16 x i8> %retval +} + +; Goes through memory currently; idx != 0. +define <16 x i8> @extract_v16i8_nxv8i8_idx16( %vec) nounwind #1 { +; CHECK-LABEL: extract_v16i8_nxv8i8_idx16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cnth x8 +; CHECK-NEXT: mov w9, #16 +; CHECK-NEXT: subs x8, x8, #16 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: csel x8, xzr, x8, lo +; CHECK-NEXT: st1h { z0.h }, p0, [sp] +; CHECK-NEXT: cmp x8, #16 +; CHECK-NEXT: ptrue p0.h, vl16 +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x9, x8, lsl #1] +; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %retval = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv8i8( %vec, i64 16) + ret <16 x i8> %retval +} + +; Should codegen to uzps, since idx is zero and type is illegal. +define <16 x i8> @extract_v16i8_nxv4i8( %vec) nounwind #1 { +; CHECK-LABEL: extract_v16i8_nxv4i8: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %retval = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv4i8( %vec, i64 0) + ret <16 x i8> %retval +} + +; Goes through memory currently; idx != 0. +define <16 x i8> @extract_v16i8_nxv4i8_idx16( %vec) nounwind #1 { +; CHECK-LABEL: extract_v16i8_nxv4i8_idx16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntw x8 +; CHECK-NEXT: mov w9, #16 +; CHECK-NEXT: subs x8, x8, #16 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: csel x8, xzr, x8, lo +; CHECK-NEXT: st1w { z0.s }, p0, [sp] +; CHECK-NEXT: cmp x8, #16 +; CHECK-NEXT: ptrue p0.s, vl16 +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x8, lsl #2] +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %retval = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv4i8( %vec, i64 16) + ret <16 x i8> %retval +} + +; Should codegen to uzps, since idx is zero and type is illegal. +define <16 x i8> @extract_v16i8_nxv2i8( %vec) nounwind #1 { +; CHECK-LABEL: extract_v16i8_nxv2i8: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %retval = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv2i8( %vec, i64 0) + ret <16 x i8> %retval +} + +; Goes through memory currently; idx != 0. +define <16 x i8> @extract_v16i8_nxv2i8_idx16( %vec) nounwind #1 { +; CHECK-LABEL: extract_v16i8_nxv2i8_idx16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntd x8 +; CHECK-NEXT: mov w9, #16 +; CHECK-NEXT: subs x8, x8, #16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: csel x8, xzr, x8, lo +; CHECK-NEXT: st1d { z0.d }, p0, [sp] +; CHECK-NEXT: cmp x8, #16 +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9, x8, lsl #3] +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %retval = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv2i8( %vec, i64 16) + ret <16 x i8> %retval +} ; Fixed length clamping @@ -181,10 +409,20 @@ define <4 x i64> @extract_fixed_v4i64_nxv2i64( %vec) nounwind } attributes #0 = { vscale_range(2,2) } +attributes #1 = { vscale_range(8,8) } declare <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64(, i64) + declare <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(, i64) +declare <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv2i32(, i64) + declare <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16(, i64) +declare <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv4i16(, i64) +declare <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv2i16(, i64) + declare <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8(, i64) +declare <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv8i8(, i64) +declare <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv4i8(, i64) +declare <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv2i8(, i64) declare <4 x i64> @llvm.experimental.vector.extract.v4i64.nxv2i64(, i64)