diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index b06f6308281c3..41b30320a90cf 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -23062,16 +23062,28 @@ SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE( Op1 = convertToScalableVector(DAG, ContainerVT, Op1); Op2 = convertToScalableVector(DAG, ContainerVT, Op2); + auto MinLegalExtractEltScalarTy = [](EVT ScalarTy) -> EVT { + if (ScalarTy == MVT::i8 || ScalarTy == MVT::i16) + return MVT::i32; + return ScalarTy; + }; + + if (SVN->isSplat()) { + unsigned Lane = std::max(0, SVN->getSplatIndex()); + EVT ScalarTy = MinLegalExtractEltScalarTy(VT.getVectorElementType()); + SDValue SplatEl = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarTy, Op1, + DAG.getConstant(Lane, DL, MVT::i64)); + Op = DAG.getNode(ISD::SPLAT_VECTOR, DL, ContainerVT, SplatEl); + return convertFromScalableVector(DAG, VT, Op); + } + bool ReverseEXT = false; unsigned Imm; if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm) && Imm == VT.getVectorNumElements() - 1) { if (ReverseEXT) std::swap(Op1, Op2); - - EVT ScalarTy = VT.getVectorElementType(); - if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16)) - ScalarTy = MVT::i32; + EVT ScalarTy = MinLegalExtractEltScalarTy(VT.getVectorElementType()); SDValue Scalar = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, DL, ScalarTy, Op1, DAG.getConstant(VT.getVectorNumElements() - 1, DL, MVT::i64)); diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll index a4d37caaf1991..b9080ed84bbed 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll @@ -10,26 +10,11 @@ target triple = "aarch64-unknown-linux-gnu" define void @hang_when_merging_stores_after_legalisation(ptr %a, <2 x i32> %b) vscale_range(2,2) #0 { ; CHECK-LABEL: hang_when_merging_stores_after_legalisation: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: mov x29, sp -; CHECK-NEXT: .cfi_def_cfa w29, 16 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: sub x9, sp, #48 -; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: stp s0, s0, [sp, #24] -; CHECK-NEXT: stp s0, s0, [sp, #16] -; CHECK-NEXT: stp s0, s0, [sp, #8] -; CHECK-NEXT: stp s0, s0, [sp] -; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: mov z0.s, s0 ; CHECK-NEXT: mov z1.d, z0.d ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #16 ; CHECK-NEXT: st2 { v0.4s, v1.4s }, [x0] -; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret %splat = shufflevector <2 x i32> %b, <2 x i32> undef, <8 x i32> zeroinitializer %interleaved.vec = shufflevector <8 x i32> %splat, <8 x i32> undef, <8 x i32> diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll index 8542736694b27..0204613b9fc8d 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll @@ -712,4 +712,82 @@ define void @splat_imm_v8f64(ptr %a) vscale_range(4,0) #0 { ret void } +define <8 x float> @load_splat_v8f32(ptr %p) vscale_range(2,2) #0 { +; CHECK-LABEL: load_splat_v8f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: mov z0.s, s0 +; CHECK-NEXT: st1w { z0.s }, p0, [x8] +; CHECK-NEXT: ret + %v = load <8 x float>, ptr %p + %splat = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> zeroinitializer + ret <8 x float> %splat +} + +define <4 x double> @load_splat_v4f64(ptr %p) vscale_range(2,2) #0 { +; CHECK-LABEL: load_splat_v4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: mov z0.d, d0 +; CHECK-NEXT: st1d { z0.d }, p0, [x8] +; CHECK-NEXT: ret + %v = load <4 x double>, ptr %p + %splat = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> zeroinitializer + ret <4 x double> %splat +} + +define <32 x i8> @load_splat_v32i8(ptr %p) vscale_range(2,2) #0 { +; CHECK-LABEL: load_splat_v32i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] +; CHECK-NEXT: mov z0.b, b0 +; CHECK-NEXT: st1b { z0.b }, p0, [x8] +; CHECK-NEXT: ret + %v = load <32 x i8>, ptr %p + %splat = shufflevector <32 x i8> %v, <32 x i8> undef, <32 x i32> zeroinitializer + ret <32 x i8> %splat +} + +define <16 x i16> @load_splat_v16i16(ptr %p) vscale_range(2,2) #0 { +; CHECK-LABEL: load_splat_v16i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] +; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: st1h { z0.h }, p0, [x8] +; CHECK-NEXT: ret + %v = load <16 x i16>, ptr %p + %splat = shufflevector <16 x i16> %v, <16 x i16> undef, <16 x i32> zeroinitializer + ret <16 x i16> %splat +} + +define <8 x i32> @load_splat_v8i32(ptr %p) vscale_range(2,2) #0 { +; CHECK-LABEL: load_splat_v8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: mov z0.s, s0 +; CHECK-NEXT: st1w { z0.s }, p0, [x8] +; CHECK-NEXT: ret + %v = load <8 x i32>, ptr %p + %splat = shufflevector <8 x i32> %v, <8 x i32> undef, <8 x i32> zeroinitializer + ret <8 x i32> %splat +} + +define <4 x i64> @load_splat_v4i64(ptr %p) vscale_range(2,2) #0 { +; CHECK-LABEL: load_splat_v4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: mov z0.d, d0 +; CHECK-NEXT: st1d { z0.d }, p0, [x8] +; CHECK-NEXT: ret + %v = load <4 x i64>, ptr %p + %splat = shufflevector <4 x i64> %v, <4 x i64> undef, <4 x i32> zeroinitializer + ret <4 x i64> %splat +} + attributes #0 = { "target-features"="+sve" } diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll index 8a6d1903c8f6e..2999b84360a71 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll @@ -124,8 +124,6 @@ define <1 x i32> @extract_subvector_v2i32(<2 x i32> %op) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: mov z0.s, z0.s[1] -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: insr z0.s, w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %ret = call <1 x i32> @llvm.vector.extract.v1i32.v2i32(<2 x i32> %op, i64 1) @@ -229,7 +227,6 @@ define <1 x float> @extract_subvector_v2f32(<2 x float> %op) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: mov z0.s, z0.s[1] -; CHECK-NEXT: insr z0.s, s0 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %ret = call <1 x float> @llvm.vector.extract.v1f32.v2f32(<2 x float> %op, i64 1) diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll index fdcc96974f7ba..ffea4b4c50072 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll @@ -10,15 +10,9 @@ target triple = "aarch64-unknown-linux-gnu" define void @hang_when_merging_stores_after_legalisation(<8 x i32>* %a, <2 x i32> %b) #0 { ; CHECK-LABEL: hang_when_merging_stores_after_legalisation: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: stp w8, w8, [sp, #8] -; CHECK-NEXT: stp w8, w8, [sp] -; CHECK-NEXT: ldr q0, [sp] +; CHECK-NEXT: mov z0.s, s0 ; CHECK-NEXT: stp q0, q0, [x0] -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %splat = shufflevector <2 x i32> %b, <2 x i32> undef, <8 x i32> zeroinitializer %interleaved.vec = shufflevector <8 x i32> %splat, <8 x i32> undef, <8 x i32>