diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 9665ae5ceb903..5b7a36d2eba76 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -21423,8 +21423,12 @@ static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG, } } - // These optimizations only work on little endian. - if (!DAG.getDataLayout().isLittleEndian()) + // uzp1(xtn x, xtn y) -> xtn(uzp1 (x, y)) + // Only implemented on little-endian subtargets. + bool IsLittleEndian = DAG.getDataLayout().isLittleEndian(); + + // This optimization only works on little endian. + if (!IsLittleEndian) return SDValue(); // uzp1(bitcast(x), bitcast(y)) -> uzp1(x, y) @@ -21443,28 +21447,21 @@ static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG, if (ResVT != MVT::v2i32 && ResVT != MVT::v4i16 && ResVT != MVT::v8i8) return SDValue(); - SDValue SourceOp0 = peekThroughBitcasts(Op0); - SDValue SourceOp1 = peekThroughBitcasts(Op1); + auto getSourceOp = [](SDValue Operand) -> SDValue { + const unsigned Opcode = Operand.getOpcode(); + if (Opcode == ISD::TRUNCATE) + return Operand->getOperand(0); + if (Opcode == ISD::BITCAST && + Operand->getOperand(0).getOpcode() == ISD::TRUNCATE) + return Operand->getOperand(0)->getOperand(0); + return SDValue(); + }; - // truncating uzp1(x, y) -> xtn(concat (x, y)) - if (SourceOp0.getValueType() == SourceOp1.getValueType()) { - EVT Op0Ty = SourceOp0.getValueType(); - if ((ResVT == MVT::v4i16 && Op0Ty == MVT::v2i32) || - (ResVT == MVT::v8i8 && Op0Ty == MVT::v4i16)) { - SDValue Concat = - DAG.getNode(ISD::CONCAT_VECTORS, DL, - Op0Ty.getDoubleNumVectorElementsVT(*DAG.getContext()), - SourceOp0, SourceOp1); - return DAG.getNode(ISD::TRUNCATE, DL, ResVT, Concat); - } - } + SDValue SourceOp0 = getSourceOp(Op0); + SDValue SourceOp1 = getSourceOp(Op1); - // uzp1(xtn x, xtn y) -> xtn(uzp1 (x, y)) - if (SourceOp0.getOpcode() != ISD::TRUNCATE || - SourceOp1.getOpcode() != ISD::TRUNCATE) + if (!SourceOp0 || !SourceOp1) return SDValue(); - SourceOp0 = SourceOp0.getOperand(0); - SourceOp1 = SourceOp1.getOperand(0); if (SourceOp0.getValueType() != SourceOp1.getValueType() || !SourceOp0.getValueType().isSimple()) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index b4b975cce007a..6254e68326f79 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -6153,39 +6153,26 @@ defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>; defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>; defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>; -def trunc_optional_assert_ext : PatFrags<(ops node:$op0), - [(trunc node:$op0), - (assertzext (trunc node:$op0)), - (assertsext (trunc node:$op0))]>; - -// concat_vectors(trunc(x), trunc(y)) -> uzp1(x, y) -// concat_vectors(assertzext(trunc(x)), assertzext(trunc(y))) -> uzp1(x, y) -// concat_vectors(assertsext(trunc(x)), assertsext(trunc(y))) -> uzp1(x, y) -class concat_trunc_to_uzp1_pat - : Pat<(ConcatTy (concat_vectors (TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vn))), - (TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vm))))), - (!cast("UZP1"#ConcatTy) V128:$Vn, V128:$Vm)>; -def : concat_trunc_to_uzp1_pat; -def : concat_trunc_to_uzp1_pat; -def : concat_trunc_to_uzp1_pat; - -// trunc(concat_vectors(trunc(x), trunc(y))) -> xtn(uzp1(x, y)) -// trunc(concat_vectors(assertzext(trunc(x)), assertzext(trunc(y)))) -> xtn(uzp1(x, y)) -// trunc(concat_vectors(assertsext(trunc(x)), assertsext(trunc(y)))) -> xtn(uzp1(x, y)) -class trunc_concat_trunc_to_xtn_uzp1_pat - : Pat<(Ty (trunc_optional_assert_ext - (ConcatTy (concat_vectors - (TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vn))), - (TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vm))))))), - (!cast("XTN"#Ty) (!cast("UZP1"#ConcatTy) V128:$Vn, V128:$Vm))>; -def : trunc_concat_trunc_to_xtn_uzp1_pat; -def : trunc_concat_trunc_to_xtn_uzp1_pat; - -def : Pat<(v8i8 (trunc (concat_vectors (v4i16 V64:$Vn), (v4i16 V64:$Vm)))), - (UZP1v8i8 V64:$Vn, V64:$Vm)>; -def : Pat<(v4i16 (trunc (concat_vectors (v2i32 V64:$Vn), (v2i32 V64:$Vm)))), - (UZP1v4i16 V64:$Vn, V64:$Vm)>; +def : Pat<(v16i8 (concat_vectors (v8i8 (trunc (v8i16 V128:$Vn))), + (v8i8 (trunc (v8i16 V128:$Vm))))), + (UZP1v16i8 V128:$Vn, V128:$Vm)>; +def : Pat<(v8i16 (concat_vectors (v4i16 (trunc (v4i32 V128:$Vn))), + (v4i16 (trunc (v4i32 V128:$Vm))))), + (UZP1v8i16 V128:$Vn, V128:$Vm)>; +def : Pat<(v4i32 (concat_vectors (v2i32 (trunc (v2i64 V128:$Vn))), + (v2i32 (trunc (v2i64 V128:$Vm))))), + (UZP1v4i32 V128:$Vn, V128:$Vm)>; +// These are the same as above, with an optional assertzext node that can be +// generated from fptoi lowering. +def : Pat<(v16i8 (concat_vectors (v8i8 (assertzext (trunc (v8i16 V128:$Vn)))), + (v8i8 (assertzext (trunc (v8i16 V128:$Vm)))))), + (UZP1v16i8 V128:$Vn, V128:$Vm)>; +def : Pat<(v8i16 (concat_vectors (v4i16 (assertzext (trunc (v4i32 V128:$Vn)))), + (v4i16 (assertzext (trunc (v4i32 V128:$Vm)))))), + (UZP1v8i16 V128:$Vn, V128:$Vm)>; +def : Pat<(v4i32 (concat_vectors (v2i32 (assertzext (trunc (v2i64 V128:$Vn)))), + (v2i32 (assertzext (trunc (v2i64 V128:$Vm)))))), + (UZP1v4i32 V128:$Vn, V128:$Vm)>; def : Pat<(v16i8 (concat_vectors (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vn), (i32 8)))), diff --git a/llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll b/llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll index 3007e7ce771e6..49325299f74a1 100644 --- a/llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll +++ b/llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll @@ -8,8 +8,9 @@ define <4 x i16> @fptosi_v4f64_to_v4i16(ptr %ptr) { ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s -; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: uzp1 v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %tmp1 = load <4 x double>, ptr %ptr %tmp2 = fptosi <4 x double> %tmp1 to <4 x i16> @@ -25,10 +26,13 @@ define <8 x i8> @fptosi_v4f64_to_v4i8(ptr %ptr) { ; CHECK-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-NEXT: fcvtzs v3.2d, v3.2d ; CHECK-NEXT: fcvtzs v2.2d, v2.2d -; CHECK-NEXT: uzp1 v0.4s, v1.4s, v0.4s -; CHECK-NEXT: uzp1 v1.4s, v2.4s, v3.4s -; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h -; CHECK-NEXT: xtn v0.8b, v0.8h +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: xtn v3.2s, v3.2d +; CHECK-NEXT: xtn v2.2s, v2.2d +; CHECK-NEXT: uzp1 v0.4h, v1.4h, v0.4h +; CHECK-NEXT: uzp1 v1.4h, v2.4h, v3.4h +; CHECK-NEXT: uzp1 v0.8b, v1.8b, v0.8b ; CHECK-NEXT: ret %tmp1 = load <8 x double>, ptr %ptr %tmp2 = fptosi <8 x double> %tmp1 to <8 x i8> @@ -92,8 +96,9 @@ define <4 x i16> @fptoui_v4f64_to_v4i16(ptr %ptr) { ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s -; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: uzp1 v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %tmp1 = load <4 x double>, ptr %ptr %tmp2 = fptoui <4 x double> %tmp1 to <4 x i16> diff --git a/llvm/test/CodeGen/AArch64/extbinopload.ll b/llvm/test/CodeGen/AArch64/extbinopload.ll index dff4831330deb..1f68c77611e10 100644 --- a/llvm/test/CodeGen/AArch64/extbinopload.ll +++ b/llvm/test/CodeGen/AArch64/extbinopload.ll @@ -650,7 +650,7 @@ define <16 x i32> @extrause_load(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) { ; CHECK-NEXT: add x11, x3, #12 ; CHECK-NEXT: str s1, [x4] ; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: ldp s0, s4, [x2] +; CHECK-NEXT: ldp s0, s5, [x2] ; CHECK-NEXT: ushll v2.8h, v0.8b, #0 ; CHECK-NEXT: umov w9, v2.h[0] ; CHECK-NEXT: umov w10, v2.h[1] @@ -662,25 +662,24 @@ define <16 x i32> @extrause_load(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) { ; CHECK-NEXT: ushll v2.8h, v2.8b, #0 ; CHECK-NEXT: mov v0.b[10], w9 ; CHECK-NEXT: add x9, x1, #4 -; CHECK-NEXT: mov v1.d[1], v2.d[0] +; CHECK-NEXT: uzp1 v1.8b, v1.8b, v2.8b ; CHECK-NEXT: mov v0.b[11], w10 ; CHECK-NEXT: add x10, x1, #12 -; CHECK-NEXT: bic v1.8h, #255, lsl #8 ; CHECK-NEXT: ld1 { v0.s }[3], [x3], #4 -; CHECK-NEXT: ldr s3, [x0, #12] -; CHECK-NEXT: ldp s2, s7, [x0, #4] -; CHECK-NEXT: ld1 { v4.s }[1], [x3] -; CHECK-NEXT: ldp s5, s6, [x2, #8] -; CHECK-NEXT: ld1 { v3.s }[1], [x10] -; CHECK-NEXT: ld1 { v2.s }[1], [x9] -; CHECK-NEXT: ld1 { v5.s }[1], [x8] -; CHECK-NEXT: ld1 { v6.s }[1], [x11] +; CHECK-NEXT: ldr s4, [x0, #12] +; CHECK-NEXT: ldp s3, s16, [x0, #4] +; CHECK-NEXT: ld1 { v5.s }[1], [x3] +; CHECK-NEXT: ldp s6, s7, [x2, #8] +; CHECK-NEXT: ld1 { v4.s }[1], [x10] +; CHECK-NEXT: ld1 { v3.s }[1], [x9] +; CHECK-NEXT: ld1 { v6.s }[1], [x8] +; CHECK-NEXT: ld1 { v7.s }[1], [x11] ; CHECK-NEXT: add x8, x1, #8 -; CHECK-NEXT: ld1 { v7.s }[1], [x8] -; CHECK-NEXT: uaddl v2.8h, v2.8b, v3.8b -; CHECK-NEXT: ushll v3.8h, v5.8b, #0 -; CHECK-NEXT: uaddl v4.8h, v4.8b, v6.8b -; CHECK-NEXT: uaddw v1.8h, v1.8h, v7.8b +; CHECK-NEXT: ld1 { v16.s }[1], [x8] +; CHECK-NEXT: uaddl v2.8h, v3.8b, v4.8b +; CHECK-NEXT: ushll v3.8h, v6.8b, #0 +; CHECK-NEXT: uaddl v4.8h, v5.8b, v7.8b +; CHECK-NEXT: uaddl v1.8h, v1.8b, v16.8b ; CHECK-NEXT: uaddw2 v5.8h, v3.8h, v0.16b ; CHECK-NEXT: ushll v0.4s, v2.4h, #3 ; CHECK-NEXT: ushll2 v2.4s, v2.8h, #3 diff --git a/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll b/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll index 0a3b9a070c2b3..1ea87bb6b04b5 100644 --- a/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll +++ b/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll @@ -73,8 +73,9 @@ define void @fptoui_v8f32_to_v8i8_no_loop(ptr %A, ptr %dst) { ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: fcvtzs.4s v1, v1 ; CHECK-NEXT: fcvtzs.4s v0, v0 -; CHECK-NEXT: uzp1.8h v0, v0, v1 -; CHECK-NEXT: xtn.8b v0, v0 +; CHECK-NEXT: xtn.4h v1, v1 +; CHECK-NEXT: xtn.4h v0, v0 +; CHECK-NEXT: uzp1.8b v0, v0, v1 ; CHECK-NEXT: str d0, [x1] ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll index 7af01b53dae7e..67190e8596c46 100644 --- a/llvm/test/CodeGen/AArch64/fptoi.ll +++ b/llvm/test/CodeGen/AArch64/fptoi.ll @@ -1096,17 +1096,30 @@ entry: } define <3 x i16> @fptos_v3f64_v3i16(<3 x double> %a) { -; CHECK-LABEL: fptos_v3f64_v3i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: fcvtzs v1.2d, v2.2d -; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s -; CHECK-NEXT: xtn v0.4h, v0.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptos_v3f64_v3i16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] +; CHECK-SD-NEXT: fcvtzs v1.2d, v2.2d +; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-SD-NEXT: xtn v1.2s, v1.2d +; CHECK-SD-NEXT: xtn v0.2s, v0.2d +; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptos_v3f64_v3i16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: fcvtzs v1.2d, v2.2d +; CHECK-GI-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: xtn v0.4h, v0.4s +; CHECK-GI-NEXT: ret entry: %c = fptosi <3 x double> %a to <3 x i16> ret <3 x i16> %c @@ -1121,8 +1134,9 @@ define <3 x i16> @fptou_v3f64_v3i16(<3 x double> %a) { ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] ; CHECK-SD-NEXT: fcvtzs v1.2d, v2.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s -; CHECK-SD-NEXT: xtn v0.4h, v0.4s +; CHECK-SD-NEXT: xtn v1.2s, v1.2d +; CHECK-SD-NEXT: xtn v0.2s, v0.2d +; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptou_v3f64_v3i16: @@ -1146,8 +1160,9 @@ define <4 x i16> @fptos_v4f64_v4i16(<4 x double> %a) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s -; CHECK-SD-NEXT: xtn v0.4h, v0.4s +; CHECK-SD-NEXT: xtn v1.2s, v1.2d +; CHECK-SD-NEXT: xtn v0.2s, v0.2d +; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptos_v4f64_v4i16: @@ -1167,8 +1182,9 @@ define <4 x i16> @fptou_v4f64_v4i16(<4 x double> %a) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s -; CHECK-SD-NEXT: xtn v0.4h, v0.4s +; CHECK-SD-NEXT: xtn v1.2s, v1.2d +; CHECK-SD-NEXT: xtn v0.2s, v0.2d +; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptou_v4f64_v4i16: @@ -1584,8 +1600,9 @@ define <3 x i8> @fptos_v3f64_v3i8(<3 x double> %a) { ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] ; CHECK-SD-NEXT: fcvtzs v1.2d, v2.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s -; CHECK-SD-NEXT: xtn v0.4h, v0.4s +; CHECK-SD-NEXT: xtn v1.2s, v1.2d +; CHECK-SD-NEXT: xtn v0.2s, v0.2d +; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h ; CHECK-SD-NEXT: umov w0, v0.h[0] ; CHECK-SD-NEXT: umov w1, v0.h[1] ; CHECK-SD-NEXT: umov w2, v0.h[2] @@ -1621,8 +1638,9 @@ define <3 x i8> @fptou_v3f64_v3i8(<3 x double> %a) { ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] ; CHECK-SD-NEXT: fcvtzs v1.2d, v2.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s -; CHECK-SD-NEXT: xtn v0.4h, v0.4s +; CHECK-SD-NEXT: xtn v1.2s, v1.2d +; CHECK-SD-NEXT: xtn v0.2s, v0.2d +; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h ; CHECK-SD-NEXT: umov w0, v0.h[0] ; CHECK-SD-NEXT: umov w1, v0.h[1] ; CHECK-SD-NEXT: umov w2, v0.h[2] @@ -1654,8 +1672,9 @@ define <4 x i8> @fptos_v4f64_v4i8(<4 x double> %a) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s -; CHECK-SD-NEXT: xtn v0.4h, v0.4s +; CHECK-SD-NEXT: xtn v1.2s, v1.2d +; CHECK-SD-NEXT: xtn v0.2s, v0.2d +; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptos_v4f64_v4i8: @@ -1675,8 +1694,9 @@ define <4 x i8> @fptou_v4f64_v4i8(<4 x double> %a) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s -; CHECK-SD-NEXT: xtn v0.4h, v0.4s +; CHECK-SD-NEXT: xtn v1.2s, v1.2d +; CHECK-SD-NEXT: xtn v0.2s, v0.2d +; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptou_v4f64_v4i8: @@ -1698,10 +1718,13 @@ define <8 x i8> @fptos_v8f64_v8i8(<8 x double> %a) { ; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-SD-NEXT: uzp1 v2.4s, v2.4s, v3.4s -; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s -; CHECK-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h -; CHECK-SD-NEXT: xtn v0.8b, v0.8h +; CHECK-SD-NEXT: xtn v3.2s, v3.2d +; CHECK-SD-NEXT: xtn v2.2s, v2.2d +; CHECK-SD-NEXT: xtn v1.2s, v1.2d +; CHECK-SD-NEXT: xtn v0.2s, v0.2d +; CHECK-SD-NEXT: uzp1 v2.4h, v2.4h, v3.4h +; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v2.8b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptos_v8f64_v8i8: @@ -1727,10 +1750,13 @@ define <8 x i8> @fptou_v8f64_v8i8(<8 x double> %a) { ; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-SD-NEXT: uzp1 v2.4s, v2.4s, v3.4s -; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s -; CHECK-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h -; CHECK-SD-NEXT: xtn v0.8b, v0.8h +; CHECK-SD-NEXT: xtn v3.2s, v3.2d +; CHECK-SD-NEXT: xtn v2.2s, v2.2d +; CHECK-SD-NEXT: xtn v1.2s, v1.2d +; CHECK-SD-NEXT: xtn v0.2s, v0.2d +; CHECK-SD-NEXT: uzp1 v2.4h, v2.4h, v3.4h +; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v2.8b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptou_v8f64_v8i8: @@ -1760,13 +1786,21 @@ define <16 x i8> @fptos_v16f64_v16i8(<16 x double> %a) { ; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-SD-NEXT: uzp1 v6.4s, v6.4s, v7.4s -; CHECK-SD-NEXT: uzp1 v4.4s, v4.4s, v5.4s -; CHECK-SD-NEXT: uzp1 v2.4s, v2.4s, v3.4s -; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s -; CHECK-SD-NEXT: uzp1 v1.8h, v4.8h, v6.8h -; CHECK-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h -; CHECK-SD-NEXT: uzp1 v0.16b, v0.16b, v1.16b +; CHECK-SD-NEXT: xtn v7.2s, v7.2d +; CHECK-SD-NEXT: xtn v6.2s, v6.2d +; CHECK-SD-NEXT: xtn v5.2s, v5.2d +; CHECK-SD-NEXT: xtn v4.2s, v4.2d +; CHECK-SD-NEXT: xtn v3.2s, v3.2d +; CHECK-SD-NEXT: xtn v2.2s, v2.2d +; CHECK-SD-NEXT: xtn v1.2s, v1.2d +; CHECK-SD-NEXT: xtn v0.2s, v0.2d +; CHECK-SD-NEXT: uzp1 v6.4h, v6.4h, v7.4h +; CHECK-SD-NEXT: uzp1 v4.4h, v4.4h, v5.4h +; CHECK-SD-NEXT: uzp1 v2.4h, v2.4h, v3.4h +; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: mov v4.d[1], v6.d[0] +; CHECK-SD-NEXT: mov v0.d[1], v2.d[0] +; CHECK-SD-NEXT: uzp1 v0.16b, v0.16b, v4.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptos_v16f64_v16i8: @@ -1803,13 +1837,21 @@ define <16 x i8> @fptou_v16f64_v16i8(<16 x double> %a) { ; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-SD-NEXT: uzp1 v6.4s, v6.4s, v7.4s -; CHECK-SD-NEXT: uzp1 v4.4s, v4.4s, v5.4s -; CHECK-SD-NEXT: uzp1 v2.4s, v2.4s, v3.4s -; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s -; CHECK-SD-NEXT: uzp1 v1.8h, v4.8h, v6.8h -; CHECK-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h -; CHECK-SD-NEXT: uzp1 v0.16b, v0.16b, v1.16b +; CHECK-SD-NEXT: xtn v7.2s, v7.2d +; CHECK-SD-NEXT: xtn v6.2s, v6.2d +; CHECK-SD-NEXT: xtn v5.2s, v5.2d +; CHECK-SD-NEXT: xtn v4.2s, v4.2d +; CHECK-SD-NEXT: xtn v3.2s, v3.2d +; CHECK-SD-NEXT: xtn v2.2s, v2.2d +; CHECK-SD-NEXT: xtn v1.2s, v1.2d +; CHECK-SD-NEXT: xtn v0.2s, v0.2d +; CHECK-SD-NEXT: uzp1 v6.4h, v6.4h, v7.4h +; CHECK-SD-NEXT: uzp1 v4.4h, v4.4h, v5.4h +; CHECK-SD-NEXT: uzp1 v2.4h, v2.4h, v3.4h +; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: mov v4.d[1], v6.d[0] +; CHECK-SD-NEXT: mov v0.d[1], v2.d[0] +; CHECK-SD-NEXT: uzp1 v0.16b, v0.16b, v4.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptou_v16f64_v16i8: @@ -1858,20 +1900,36 @@ define <32 x i8> @fptos_v32f64_v32i8(<32 x double> %a) { ; CHECK-SD-NEXT: fcvtzs v18.2d, v18.2d ; CHECK-SD-NEXT: fcvtzs v17.2d, v17.2d ; CHECK-SD-NEXT: fcvtzs v16.2d, v16.2d -; CHECK-SD-NEXT: uzp1 v6.4s, v6.4s, v7.4s -; CHECK-SD-NEXT: uzp1 v4.4s, v4.4s, v5.4s -; CHECK-SD-NEXT: uzp1 v2.4s, v2.4s, v3.4s -; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s -; CHECK-SD-NEXT: uzp1 v3.4s, v20.4s, v21.4s -; CHECK-SD-NEXT: uzp1 v1.4s, v22.4s, v23.4s -; CHECK-SD-NEXT: uzp1 v5.4s, v18.4s, v19.4s -; CHECK-SD-NEXT: uzp1 v7.4s, v16.4s, v17.4s -; CHECK-SD-NEXT: uzp1 v4.8h, v4.8h, v6.8h -; CHECK-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h -; CHECK-SD-NEXT: uzp1 v1.8h, v3.8h, v1.8h -; CHECK-SD-NEXT: uzp1 v2.8h, v7.8h, v5.8h +; CHECK-SD-NEXT: xtn v7.2s, v7.2d +; CHECK-SD-NEXT: xtn v6.2s, v6.2d +; CHECK-SD-NEXT: xtn v5.2s, v5.2d +; CHECK-SD-NEXT: xtn v4.2s, v4.2d +; CHECK-SD-NEXT: xtn v3.2s, v3.2d +; CHECK-SD-NEXT: xtn v2.2s, v2.2d +; CHECK-SD-NEXT: xtn v1.2s, v1.2d +; CHECK-SD-NEXT: xtn v0.2s, v0.2d +; CHECK-SD-NEXT: xtn v23.2s, v23.2d +; CHECK-SD-NEXT: xtn v22.2s, v22.2d +; CHECK-SD-NEXT: xtn v21.2s, v21.2d +; CHECK-SD-NEXT: xtn v20.2s, v20.2d +; CHECK-SD-NEXT: xtn v19.2s, v19.2d +; CHECK-SD-NEXT: xtn v18.2s, v18.2d +; CHECK-SD-NEXT: xtn v17.2s, v17.2d +; CHECK-SD-NEXT: xtn v16.2s, v16.2d +; CHECK-SD-NEXT: uzp1 v6.4h, v6.4h, v7.4h +; CHECK-SD-NEXT: uzp1 v4.4h, v4.4h, v5.4h +; CHECK-SD-NEXT: uzp1 v2.4h, v2.4h, v3.4h +; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: uzp1 v1.4h, v22.4h, v23.4h +; CHECK-SD-NEXT: uzp1 v3.4h, v20.4h, v21.4h +; CHECK-SD-NEXT: uzp1 v5.4h, v18.4h, v19.4h +; CHECK-SD-NEXT: uzp1 v7.4h, v16.4h, v17.4h +; CHECK-SD-NEXT: mov v4.d[1], v6.d[0] +; CHECK-SD-NEXT: mov v0.d[1], v2.d[0] +; CHECK-SD-NEXT: mov v3.d[1], v1.d[0] +; CHECK-SD-NEXT: mov v7.d[1], v5.d[0] ; CHECK-SD-NEXT: uzp1 v0.16b, v0.16b, v4.16b -; CHECK-SD-NEXT: uzp1 v1.16b, v2.16b, v1.16b +; CHECK-SD-NEXT: uzp1 v1.16b, v7.16b, v3.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptos_v32f64_v32i8: @@ -1939,20 +1997,36 @@ define <32 x i8> @fptou_v32f64_v32i8(<32 x double> %a) { ; CHECK-SD-NEXT: fcvtzs v18.2d, v18.2d ; CHECK-SD-NEXT: fcvtzs v17.2d, v17.2d ; CHECK-SD-NEXT: fcvtzs v16.2d, v16.2d -; CHECK-SD-NEXT: uzp1 v6.4s, v6.4s, v7.4s -; CHECK-SD-NEXT: uzp1 v4.4s, v4.4s, v5.4s -; CHECK-SD-NEXT: uzp1 v2.4s, v2.4s, v3.4s -; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s -; CHECK-SD-NEXT: uzp1 v3.4s, v20.4s, v21.4s -; CHECK-SD-NEXT: uzp1 v1.4s, v22.4s, v23.4s -; CHECK-SD-NEXT: uzp1 v5.4s, v18.4s, v19.4s -; CHECK-SD-NEXT: uzp1 v7.4s, v16.4s, v17.4s -; CHECK-SD-NEXT: uzp1 v4.8h, v4.8h, v6.8h -; CHECK-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h -; CHECK-SD-NEXT: uzp1 v1.8h, v3.8h, v1.8h -; CHECK-SD-NEXT: uzp1 v2.8h, v7.8h, v5.8h +; CHECK-SD-NEXT: xtn v7.2s, v7.2d +; CHECK-SD-NEXT: xtn v6.2s, v6.2d +; CHECK-SD-NEXT: xtn v5.2s, v5.2d +; CHECK-SD-NEXT: xtn v4.2s, v4.2d +; CHECK-SD-NEXT: xtn v3.2s, v3.2d +; CHECK-SD-NEXT: xtn v2.2s, v2.2d +; CHECK-SD-NEXT: xtn v1.2s, v1.2d +; CHECK-SD-NEXT: xtn v0.2s, v0.2d +; CHECK-SD-NEXT: xtn v23.2s, v23.2d +; CHECK-SD-NEXT: xtn v22.2s, v22.2d +; CHECK-SD-NEXT: xtn v21.2s, v21.2d +; CHECK-SD-NEXT: xtn v20.2s, v20.2d +; CHECK-SD-NEXT: xtn v19.2s, v19.2d +; CHECK-SD-NEXT: xtn v18.2s, v18.2d +; CHECK-SD-NEXT: xtn v17.2s, v17.2d +; CHECK-SD-NEXT: xtn v16.2s, v16.2d +; CHECK-SD-NEXT: uzp1 v6.4h, v6.4h, v7.4h +; CHECK-SD-NEXT: uzp1 v4.4h, v4.4h, v5.4h +; CHECK-SD-NEXT: uzp1 v2.4h, v2.4h, v3.4h +; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: uzp1 v1.4h, v22.4h, v23.4h +; CHECK-SD-NEXT: uzp1 v3.4h, v20.4h, v21.4h +; CHECK-SD-NEXT: uzp1 v5.4h, v18.4h, v19.4h +; CHECK-SD-NEXT: uzp1 v7.4h, v16.4h, v17.4h +; CHECK-SD-NEXT: mov v4.d[1], v6.d[0] +; CHECK-SD-NEXT: mov v0.d[1], v2.d[0] +; CHECK-SD-NEXT: mov v3.d[1], v1.d[0] +; CHECK-SD-NEXT: mov v7.d[1], v5.d[0] ; CHECK-SD-NEXT: uzp1 v0.16b, v0.16b, v4.16b -; CHECK-SD-NEXT: uzp1 v1.16b, v2.16b, v1.16b +; CHECK-SD-NEXT: uzp1 v1.16b, v7.16b, v3.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptou_v32f64_v32i8: @@ -2952,8 +3026,9 @@ define <8 x i8> @fptos_v8f32_v8i8(<8 x float> %a) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fcvtzs v1.4s, v1.4s ; CHECK-SD-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-SD-NEXT: uzp1 v0.8h, v0.8h, v1.8h -; CHECK-SD-NEXT: xtn v0.8b, v0.8h +; CHECK-SD-NEXT: xtn v1.4h, v1.4s +; CHECK-SD-NEXT: xtn v0.4h, v0.4s +; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v1.8b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptos_v8f32_v8i8: @@ -2973,8 +3048,9 @@ define <8 x i8> @fptou_v8f32_v8i8(<8 x float> %a) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fcvtzs v1.4s, v1.4s ; CHECK-SD-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-SD-NEXT: uzp1 v0.8h, v0.8h, v1.8h -; CHECK-SD-NEXT: xtn v0.8b, v0.8h +; CHECK-SD-NEXT: xtn v1.4h, v1.4s +; CHECK-SD-NEXT: xtn v0.4h, v0.4s +; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v1.8b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptou_v8f32_v8i8: @@ -2996,8 +3072,12 @@ define <16 x i8> @fptos_v16f32_v16i8(<16 x float> %a) { ; CHECK-SD-NEXT: fcvtzs v2.4s, v2.4s ; CHECK-SD-NEXT: fcvtzs v1.4s, v1.4s ; CHECK-SD-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-SD-NEXT: uzp1 v2.8h, v2.8h, v3.8h -; CHECK-SD-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-SD-NEXT: xtn v3.4h, v3.4s +; CHECK-SD-NEXT: xtn v2.4h, v2.4s +; CHECK-SD-NEXT: xtn v1.4h, v1.4s +; CHECK-SD-NEXT: xtn v0.4h, v0.4s +; CHECK-SD-NEXT: mov v2.d[1], v3.d[0] +; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] ; CHECK-SD-NEXT: uzp1 v0.16b, v0.16b, v2.16b ; CHECK-SD-NEXT: ret ; @@ -3054,12 +3134,20 @@ define <32 x i8> @fptos_v32f32_v32i8(<32 x float> %a) { ; CHECK-SD-NEXT: fcvtzs v6.4s, v6.4s ; CHECK-SD-NEXT: fcvtzs v5.4s, v5.4s ; CHECK-SD-NEXT: fcvtzs v4.4s, v4.4s -; CHECK-SD-NEXT: uzp1 v2.8h, v2.8h, v3.8h -; CHECK-SD-NEXT: uzp1 v0.8h, v0.8h, v1.8h -; CHECK-SD-NEXT: uzp1 v1.8h, v6.8h, v7.8h -; CHECK-SD-NEXT: uzp1 v3.8h, v4.8h, v5.8h +; CHECK-SD-NEXT: xtn v3.4h, v3.4s +; CHECK-SD-NEXT: xtn v2.4h, v2.4s +; CHECK-SD-NEXT: xtn v1.4h, v1.4s +; CHECK-SD-NEXT: xtn v0.4h, v0.4s +; CHECK-SD-NEXT: xtn v7.4h, v7.4s +; CHECK-SD-NEXT: xtn v6.4h, v6.4s +; CHECK-SD-NEXT: xtn v5.4h, v5.4s +; CHECK-SD-NEXT: xtn v4.4h, v4.4s +; CHECK-SD-NEXT: mov v2.d[1], v3.d[0] +; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] +; CHECK-SD-NEXT: mov v6.d[1], v7.d[0] +; CHECK-SD-NEXT: mov v4.d[1], v5.d[0] ; CHECK-SD-NEXT: uzp1 v0.16b, v0.16b, v2.16b -; CHECK-SD-NEXT: uzp1 v1.16b, v3.16b, v1.16b +; CHECK-SD-NEXT: uzp1 v1.16b, v4.16b, v6.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptos_v32f32_v32i8: diff --git a/llvm/test/CodeGen/AArch64/neon-truncstore.ll b/llvm/test/CodeGen/AArch64/neon-truncstore.ll index 5d78ad24eb333..b677d077b98c1 100644 --- a/llvm/test/CodeGen/AArch64/neon-truncstore.ll +++ b/llvm/test/CodeGen/AArch64/neon-truncstore.ll @@ -104,7 +104,7 @@ define void @v4i32_v4i8(<4 x i32> %a, ptr %result) { ; CHECK-LABEL: v4i32_v4i8: ; CHECK: // %bb.0: ; CHECK-NEXT: xtn v0.4h, v0.4s -; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-NEXT: xtn v0.8b, v0.8h ; CHECK-NEXT: str s0, [x0] ; CHECK-NEXT: ret %b = trunc <4 x i32> %a to <4 x i8> @@ -170,7 +170,8 @@ define void @v2i16_v2i8(<2 x i16> %a, ptr %result) { define void @v4i16_v4i8(<4 x i16> %a, ptr %result) { ; CHECK-LABEL: v4i16_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: xtn v0.8b, v0.8h ; CHECK-NEXT: str s0, [x0] ; CHECK-NEXT: ret %b = trunc <4 x i16> %a to <4 x i8> diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll index 6f1ae023bf25a..5f905d94e3573 100644 --- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll @@ -145,7 +145,7 @@ define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-NEXT: sqadd v0.4h, v0.4h, v1.4h ; CHECK-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-NEXT: xtn v0.8b, v0.8h ; CHECK-NEXT: str s0, [x2] ; CHECK-NEXT: ret %x = load <4 x i8>, ptr %px diff --git a/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll b/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll index fb571eff39fe5..0ef64789ad972 100644 --- a/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll +++ b/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll @@ -353,17 +353,13 @@ define <8 x i8> @shuffle4_v8i8_v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x define <8 x i16> @shuffle4_v4i8_zext(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> %d) { ; CHECK-LABEL: shuffle4_v4i8_zext: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov d5, d2 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 +; CHECK-NEXT: uzp1 v0.8b, v0.8b, v1.8b +; CHECK-NEXT: uzp1 v1.8b, v2.8b, v3.8b ; CHECK-NEXT: adrp x8, .LCPI8_0 -; CHECK-NEXT: fmov d4, d0 +; CHECK-NEXT: ushll v2.8h, v0.8b, #0 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI8_0] -; CHECK-NEXT: mov v4.d[1], v1.d[0] -; CHECK-NEXT: mov v5.d[1], v3.d[0] -; CHECK-NEXT: bic v4.8h, #255, lsl #8 -; CHECK-NEXT: bic v5.8h, #255, lsl #8 -; CHECK-NEXT: tbl v0.16b, { v4.16b, v5.16b }, v0.16b +; CHECK-NEXT: ushll v3.8h, v1.8b, #0 +; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b ; CHECK-NEXT: ret %x = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> %y = shufflevector <4 x i8> %c, <4 x i8> %d, <8 x i32> diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll index d1f843a09f749..acec3e74d3e93 100644 --- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll @@ -146,7 +146,7 @@ define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-NEXT: sqsub v0.4h, v0.4h, v1.4h ; CHECK-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-NEXT: xtn v0.8b, v0.8h ; CHECK-NEXT: str s0, [x2] ; CHECK-NEXT: ret %x = load <4 x i8>, ptr %px diff --git a/llvm/test/CodeGen/AArch64/tbl-loops.ll b/llvm/test/CodeGen/AArch64/tbl-loops.ll index 0ad9900865518..4f8a4f7aede3e 100644 --- a/llvm/test/CodeGen/AArch64/tbl-loops.ll +++ b/llvm/test/CodeGen/AArch64/tbl-loops.ll @@ -41,8 +41,8 @@ define void @loop1(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: fcvtzs v2.4s, v2.4s ; CHECK-NEXT: xtn v1.4h, v1.4s ; CHECK-NEXT: xtn v2.4h, v2.4s -; CHECK-NEXT: uzp1 v1.8b, v1.8b, v0.8b -; CHECK-NEXT: uzp1 v2.8b, v2.8b, v0.8b +; CHECK-NEXT: xtn v1.8b, v1.8h +; CHECK-NEXT: xtn v2.8b, v2.8h ; CHECK-NEXT: mov v1.s[1], v2.s[0] ; CHECK-NEXT: stur d1, [x12, #-4] ; CHECK-NEXT: add x12, x12, #8 diff --git a/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll index 18cd4cc2111a4..ba367b0dbfde3 100644 --- a/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll +++ b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll @@ -710,23 +710,23 @@ define void @trunc_v11i64_to_v11i8_in_loop(ptr %A, ptr %dst) { ; CHECK-NEXT: LBB6_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldp q4, q1, [x0, #48] -; CHECK-NEXT: add x9, x1, #10 -; CHECK-NEXT: ldr d0, [x0, #80] +; CHECK-NEXT: add x9, x1, #8 ; CHECK-NEXT: ldp q3, q2, [x0] -; CHECK-NEXT: ldr q5, [x0, #32] ; CHECK-NEXT: subs x8, x8, #1 +; CHECK-NEXT: ldr d0, [x0, #80] +; CHECK-NEXT: ldr q5, [x0, #32] ; CHECK-NEXT: add x0, x0, #128 -; CHECK-NEXT: uzp1.4s v0, v1, v0 -; CHECK-NEXT: uzp1.4s v1, v5, v4 +; CHECK-NEXT: uzp1.4s v4, v5, v4 ; CHECK-NEXT: uzp1.4s v2, v3, v2 +; CHECK-NEXT: uzp1.4s v0, v1, v0 +; CHECK-NEXT: uzp1.8h v1, v2, v4 ; CHECK-NEXT: xtn.4h v0, v0 -; CHECK-NEXT: uzp1.8h v1, v2, v1 -; CHECK-NEXT: uzp1.8b v2, v0, v0 -; CHECK-NEXT: uzp1.16b v0, v1, v0 -; CHECK-NEXT: st1.b { v2 }[2], [x9] -; CHECK-NEXT: add x9, x1, #8 -; CHECK-NEXT: st1.h { v0 }[4], [x9] -; CHECK-NEXT: str d0, [x1], #16 +; CHECK-NEXT: uzp1.16b v1, v1, v0 +; CHECK-NEXT: xtn.8b v0, v0 +; CHECK-NEXT: st1.h { v1 }[4], [x9] +; CHECK-NEXT: add x9, x1, #10 +; CHECK-NEXT: st1.b { v0 }[2], [x9] +; CHECK-NEXT: str d1, [x1], #16 ; CHECK-NEXT: b.eq LBB6_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret @@ -755,7 +755,7 @@ define void @trunc_v11i64_to_v11i8_in_loop(ptr %A, ptr %dst) { ; CHECK-BE-NEXT: xtn v0.4h, v0.4s ; CHECK-BE-NEXT: uzp1 v1.8h, v1.8h, v2.8h ; CHECK-BE-NEXT: uzp1 v1.16b, v1.16b, v0.16b -; CHECK-BE-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-BE-NEXT: xtn v0.8b, v0.8h ; CHECK-BE-NEXT: rev16 v2.16b, v1.16b ; CHECK-BE-NEXT: rev64 v1.16b, v1.16b ; CHECK-BE-NEXT: st1 { v0.b }[2], [x9] @@ -790,7 +790,7 @@ define void @trunc_v11i64_to_v11i8_in_loop(ptr %A, ptr %dst) { ; CHECK-DISABLE-NEXT: xtn v0.4h, v0.4s ; CHECK-DISABLE-NEXT: uzp1 v1.8h, v1.8h, v2.8h ; CHECK-DISABLE-NEXT: uzp1 v1.16b, v1.16b, v0.16b -; CHECK-DISABLE-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-DISABLE-NEXT: xtn v0.8b, v0.8h ; CHECK-DISABLE-NEXT: rev16 v2.16b, v1.16b ; CHECK-DISABLE-NEXT: rev64 v1.16b, v1.16b ; CHECK-DISABLE-NEXT: st1 { v0.b }[2], [x9] diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll index f0bbed59405e3..e05c65daf50aa 100644 --- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -142,7 +142,7 @@ define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-NEXT: movi d0, #0xff00ff00ff00ff ; CHECK-NEXT: uaddl v1.8h, v1.8b, v2.8b ; CHECK-NEXT: umin v0.4h, v1.4h, v0.4h -; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-NEXT: xtn v0.8b, v0.8h ; CHECK-NEXT: str s0, [x2] ; CHECK-NEXT: ret %x = load <4 x i8>, ptr %px diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll index 82c0327219f50..05f43e7d8427b 100644 --- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll @@ -143,7 +143,7 @@ define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-NEXT: uqsub v0.4h, v0.4h, v1.4h -; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-NEXT: xtn v0.8b, v0.8h ; CHECK-NEXT: str s0, [x2] ; CHECK-NEXT: ret %x = load <4 x i8>, ptr %px diff --git a/llvm/test/CodeGen/AArch64/vcvt-oversize.ll b/llvm/test/CodeGen/AArch64/vcvt-oversize.ll index 611940546bc1a..380bdbcc7f740 100644 --- a/llvm/test/CodeGen/AArch64/vcvt-oversize.ll +++ b/llvm/test/CodeGen/AArch64/vcvt-oversize.ll @@ -9,8 +9,9 @@ define <8 x i8> @float_to_i8(ptr %in) { ; CHECK-NEXT: fadd v0.4s, v0.4s, v0.4s ; CHECK-NEXT: fcvtzs v0.4s, v0.4s ; CHECK-NEXT: fcvtzs v1.4s, v1.4s -; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h -; CHECK-NEXT: xtn v0.8b, v0.8h +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: xtn v1.4h, v1.4s +; CHECK-NEXT: uzp1 v0.8b, v1.8b, v0.8b ; CHECK-NEXT: ret %l = load <8 x float>, ptr %in %scale = fmul <8 x float> %l, diff --git a/llvm/test/CodeGen/AArch64/vec-combine-compare-truncate-store.ll b/llvm/test/CodeGen/AArch64/vec-combine-compare-truncate-store.ll index dd7a9c6d7768b..9c6ab8da0fa75 100644 --- a/llvm/test/CodeGen/AArch64/vec-combine-compare-truncate-store.ll +++ b/llvm/test/CodeGen/AArch64/vec-combine-compare-truncate-store.ll @@ -210,7 +210,7 @@ define void @no_combine_for_non_bool_truncate(<4 x i32> %vec, ptr %out) { ; CHECK-LABEL: no_combine_for_non_bool_truncate: ; CHECK: ; %bb.0: ; CHECK-NEXT: xtn.4h v0, v0 -; CHECK-NEXT: uzp1.8b v0, v0, v0 +; CHECK-NEXT: xtn.8b v0, v0 ; CHECK-NEXT: str s0, [x0] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/vec3-loads-ext-trunc-stores.ll b/llvm/test/CodeGen/AArch64/vec3-loads-ext-trunc-stores.ll index 71d55df665176..90328f73f86b5 100644 --- a/llvm/test/CodeGen/AArch64/vec3-loads-ext-trunc-stores.ll +++ b/llvm/test/CodeGen/AArch64/vec3-loads-ext-trunc-stores.ll @@ -410,7 +410,7 @@ define void @store_trunc_from_64bits(ptr %src, ptr %dst) { ; BE-NEXT: ldrh w8, [x0, #4] ; BE-NEXT: rev32 v0.4h, v0.4h ; BE-NEXT: mov v0.h[2], w8 -; BE-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; BE-NEXT: xtn v0.8b, v0.8h ; BE-NEXT: rev32 v0.16b, v0.16b ; BE-NEXT: str s0, [sp, #12] ; BE-NEXT: ldrh w9, [sp, #12] @@ -456,7 +456,7 @@ define void @store_trunc_add_from_64bits(ptr %src, ptr %dst) { ; BE-NEXT: add x8, x8, :lo12:.LCPI11_0 ; BE-NEXT: ld1 { v1.4h }, [x8] ; BE-NEXT: add v0.4h, v0.4h, v1.4h -; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b +; BE-NEXT: xtn v1.8b, v0.8h ; BE-NEXT: umov w8, v0.h[2] ; BE-NEXT: rev32 v1.16b, v1.16b ; BE-NEXT: str s1, [sp, #12] @@ -638,7 +638,7 @@ define void @shift_trunc_store(ptr %src, ptr %dst) { ; BE-NEXT: .cfi_def_cfa_offset 16 ; BE-NEXT: ld1 { v0.4s }, [x0] ; BE-NEXT: shrn v0.4h, v0.4s, #16 -; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b +; BE-NEXT: xtn v1.8b, v0.8h ; BE-NEXT: umov w8, v0.h[2] ; BE-NEXT: rev32 v1.16b, v1.16b ; BE-NEXT: str s1, [sp, #12] @@ -672,7 +672,7 @@ define void @shift_trunc_store_default_align(ptr %src, ptr %dst) { ; BE-NEXT: .cfi_def_cfa_offset 16 ; BE-NEXT: ld1 { v0.4s }, [x0] ; BE-NEXT: shrn v0.4h, v0.4s, #16 -; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b +; BE-NEXT: xtn v1.8b, v0.8h ; BE-NEXT: umov w8, v0.h[2] ; BE-NEXT: rev32 v1.16b, v1.16b ; BE-NEXT: str s1, [sp, #12] @@ -706,7 +706,7 @@ define void @shift_trunc_store_align_4(ptr %src, ptr %dst) { ; BE-NEXT: .cfi_def_cfa_offset 16 ; BE-NEXT: ld1 { v0.4s }, [x0] ; BE-NEXT: shrn v0.4h, v0.4s, #16 -; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b +; BE-NEXT: xtn v1.8b, v0.8h ; BE-NEXT: umov w8, v0.h[2] ; BE-NEXT: rev32 v1.16b, v1.16b ; BE-NEXT: str s1, [sp, #12] @@ -741,7 +741,7 @@ define void @shift_trunc_store_const_offset_1(ptr %src, ptr %dst) { ; BE-NEXT: .cfi_def_cfa_offset 16 ; BE-NEXT: ld1 { v0.4s }, [x0] ; BE-NEXT: shrn v0.4h, v0.4s, #16 -; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b +; BE-NEXT: xtn v1.8b, v0.8h ; BE-NEXT: umov w8, v0.h[2] ; BE-NEXT: rev32 v1.16b, v1.16b ; BE-NEXT: str s1, [sp, #12] @@ -777,7 +777,7 @@ define void @shift_trunc_store_const_offset_3(ptr %src, ptr %dst) { ; BE-NEXT: .cfi_def_cfa_offset 16 ; BE-NEXT: ld1 { v0.4s }, [x0] ; BE-NEXT: shrn v0.4h, v0.4s, #16 -; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b +; BE-NEXT: xtn v1.8b, v0.8h ; BE-NEXT: umov w8, v0.h[2] ; BE-NEXT: rev32 v1.16b, v1.16b ; BE-NEXT: str s1, [sp, #12] @@ -801,7 +801,7 @@ define void @shift_trunc_volatile_store(ptr %src, ptr %dst) { ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: shrn.4h v0, v0, #16 -; CHECK-NEXT: uzp1.8b v1, v0, v0 +; CHECK-NEXT: xtn.8b v1, v0 ; CHECK-NEXT: umov.h w8, v0[2] ; CHECK-NEXT: str s1, [sp, #12] ; CHECK-NEXT: ldrh w9, [sp, #12] @@ -816,7 +816,7 @@ define void @shift_trunc_volatile_store(ptr %src, ptr %dst) { ; BE-NEXT: .cfi_def_cfa_offset 16 ; BE-NEXT: ld1 { v0.4s }, [x0] ; BE-NEXT: shrn v0.4h, v0.4s, #16 -; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b +; BE-NEXT: xtn v1.8b, v0.8h ; BE-NEXT: umov w8, v0.h[2] ; BE-NEXT: rev32 v1.16b, v1.16b ; BE-NEXT: str s1, [sp, #12] @@ -868,7 +868,7 @@ define void @load_v3i8_zext_to_3xi32_add_trunc_store(ptr %src) { ; BE-NEXT: ushll v0.8h, v0.8b, #0 ; BE-NEXT: ld1 { v0.b }[4], [x9] ; BE-NEXT: add v0.4h, v0.4h, v1.4h -; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b +; BE-NEXT: xtn v1.8b, v0.8h ; BE-NEXT: umov w8, v0.h[2] ; BE-NEXT: rev32 v1.16b, v1.16b ; BE-NEXT: str s1, [sp, #8] @@ -921,7 +921,7 @@ define void @load_v3i8_sext_to_3xi32_add_trunc_store(ptr %src) { ; BE-NEXT: ushll v0.8h, v0.8b, #0 ; BE-NEXT: ld1 { v0.b }[4], [x9] ; BE-NEXT: add v0.4h, v0.4h, v1.4h -; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b +; BE-NEXT: xtn v1.8b, v0.8h ; BE-NEXT: umov w8, v0.h[2] ; BE-NEXT: rev32 v1.16b, v1.16b ; BE-NEXT: str s1, [sp, #8]