diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index a55b1facb103f..d8398719acebc 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -24419,33 +24419,6 @@ static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG, if (N->getOpcode() == AArch64ISD::UZP2) return SDValue(); - // uzp1(x, undef) -> concat(truncate(x), undef) - if (Op1.isUndef()) { - EVT BCVT = MVT::Other, HalfVT = MVT::Other; - switch (ResVT.getSimpleVT().SimpleTy) { - default: - break; - case MVT::v16i8: - BCVT = MVT::v8i16; - HalfVT = MVT::v8i8; - break; - case MVT::v8i16: - BCVT = MVT::v4i32; - HalfVT = MVT::v4i16; - break; - case MVT::v4i32: - BCVT = MVT::v2i64; - HalfVT = MVT::v2i32; - break; - } - if (BCVT != MVT::Other) { - SDValue BC = DAG.getBitcast(BCVT, Op0); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, BC); - return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Trunc, - DAG.getUNDEF(HalfVT)); - } - } - if (SDValue Urshr = tryCombineExtendRShTrunc(N, DAG)) return Urshr; @@ -24487,6 +24460,33 @@ static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG, if (!DAG.getDataLayout().isLittleEndian()) return SDValue(); + // uzp1(x, undef) -> concat(truncate(x), undef) + if (Op1.isUndef()) { + EVT BCVT = MVT::Other, HalfVT = MVT::Other; + switch (ResVT.getSimpleVT().SimpleTy) { + default: + break; + case MVT::v16i8: + BCVT = MVT::v8i16; + HalfVT = MVT::v8i8; + break; + case MVT::v8i16: + BCVT = MVT::v4i32; + HalfVT = MVT::v4i16; + break; + case MVT::v4i32: + BCVT = MVT::v2i64; + HalfVT = MVT::v2i32; + break; + } + if (BCVT != MVT::Other) { + SDValue BC = DAG.getBitcast(BCVT, Op0); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, BC); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Trunc, + DAG.getUNDEF(HalfVT)); + } + } + // uzp1(bitcast(x), bitcast(y)) -> uzp1(x, y) // Example: // nxv4i32 = uzp1 bitcast(nxv4i32 x to nxv2i64), bitcast(nxv4i32 y to nxv2i64) diff --git a/llvm/test/CodeGen/AArch64/aarch64_be-shuffle-vector.ll b/llvm/test/CodeGen/AArch64/aarch64_be-shuffle-vector.ll new file mode 100644 index 0000000000000..4e60d99dbed36 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64_be-shuffle-vector.ll @@ -0,0 +1,95 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefix=CHECKLE +; RUN: llc < %s -mtriple=aarch64_be | FileCheck %s --check-prefix=CHECKBE + +define <4 x i16> @test_reconstructshuffle(<16 x i8> %a, <16 x i8> %b) nounwind { +; CHECKLE-LABEL: test_reconstructshuffle: +; CHECKLE: // %bb.0: +; CHECKLE-NEXT: mov b2, v0.b[3] +; CHECKLE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECKLE-NEXT: mov v2.b[2], v0.b[2] +; CHECKLE-NEXT: mov v2.b[4], v0.b[1] +; CHECKLE-NEXT: mov v2.b[6], v0.b[0] +; CHECKLE-NEXT: zip2 v0.8b, v1.8b, v0.8b +; CHECKLE-NEXT: add v0.4h, v2.4h, v0.4h +; CHECKLE-NEXT: bic v0.4h, #255, lsl #8 +; CHECKLE-NEXT: ret +; +; CHECKBE-LABEL: test_reconstructshuffle: +; CHECKBE: // %bb.0: +; CHECKBE-NEXT: rev64 v0.16b, v0.16b +; CHECKBE-NEXT: rev64 v1.16b, v1.16b +; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECKBE-NEXT: mov b2, v0.b[3] +; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECKBE-NEXT: mov v2.b[2], v0.b[2] +; CHECKBE-NEXT: mov v2.b[4], v0.b[1] +; CHECKBE-NEXT: mov v2.b[6], v0.b[0] +; CHECKBE-NEXT: zip2 v0.8b, v1.8b, v0.8b +; CHECKBE-NEXT: add v0.4h, v2.4h, v0.4h +; CHECKBE-NEXT: bic v0.4h, #255, lsl #8 +; CHECKBE-NEXT: rev64 v0.4h, v0.4h +; CHECKBE-NEXT: ret + %tmp1 = shufflevector <16 x i8> %a, <16 x i8> poison, <4 x i32> + %tmp2 = shufflevector <16 x i8> %b, <16 x i8> poison, <4 x i32> + %tmp3 = add <4 x i8> %tmp1, %tmp2 + %tmp4 = zext <4 x i8> %tmp3 to <4 x i16> + ret <4 x i16> %tmp4 +} + +; a shufflevector of even elements can become just xtn on little-endian, but not on big-endian. +define <8 x i8> @xtn_shuffle_even_v16i8(<16 x i8> %a) { +; CHECKLE-LABEL: xtn_shuffle_even_v16i8: +; CHECKLE: // %bb.0: +; CHECKLE-NEXT: xtn v0.8b, v0.8h +; CHECKLE-NEXT: ret +; +; CHECKBE-LABEL: xtn_shuffle_even_v16i8: +; CHECKBE: // %bb.0: +; CHECKBE-NEXT: rev64 v0.16b, v0.16b +; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECKBE-NEXT: uzp1 v0.16b, v0.16b, v0.16b +; CHECKBE-NEXT: rev64 v0.8b, v0.8b +; CHECKBE-NEXT: ret + %r = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> + ret <8 x i8> %r +} + +; a shufflevector of even elements can become just xtn on little-endian, but not on big-endian. +define <4 x i16> @xtn_shuffle_even_v8i16(<8 x i16> %a) { +; CHECKLE-LABEL: xtn_shuffle_even_v8i16: +; CHECKLE: // %bb.0: +; CHECKLE-NEXT: xtn v0.4h, v0.4s +; CHECKLE-NEXT: ret +; +; CHECKBE-LABEL: xtn_shuffle_even_v8i16: +; CHECKBE: // %bb.0: +; CHECKBE-NEXT: rev64 v0.8h, v0.8h +; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECKBE-NEXT: uzp1 v0.8h, v0.8h, v0.8h +; CHECKBE-NEXT: rev64 v0.4h, v0.4h +; CHECKBE-NEXT: ret + %r = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> + ret <4 x i16> %r +} + +; a shufflevector of even elements can become just xtn on little-endian, but not on big-endian. +define <4 x i32> @xtn_shuffle_uzp1_poison_v4i32(<4 x i32> %a) { +; CHECKLE-LABEL: xtn_shuffle_uzp1_poison_v4i32: +; CHECKLE: // %bb.0: +; CHECKLE-NEXT: xtn v0.2s, v0.2d +; CHECKLE-NEXT: ret +; +; CHECKBE-LABEL: xtn_shuffle_uzp1_poison_v4i32: +; CHECKBE: // %bb.0: +; CHECKBE-NEXT: rev64 v0.4s, v0.4s +; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECKBE-NEXT: uzp1 v0.4s, v0.4s, v0.4s +; CHECKBE-NEXT: rev64 v0.4s, v0.4s +; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECKBE-NEXT: ret + %r = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> + ret <4 x i32> %r + +} diff --git a/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll b/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll deleted file mode 100644 index 65da95e0163f4..0000000000000 --- a/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll +++ /dev/null @@ -1,39 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefix=CHECKLE -; RUN: llc < %s -mtriple=aarch64_be | FileCheck %s --check-prefix=CHECKBE - -define <4 x i16> @test_reconstructshuffle(<16 x i8> %a, <16 x i8> %b) nounwind { -; CHECKLE-LABEL: test_reconstructshuffle: -; CHECKLE: // %bb.0: -; CHECKLE-NEXT: mov b2, v0.b[3] -; CHECKLE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; CHECKLE-NEXT: mov v2.b[2], v0.b[2] -; CHECKLE-NEXT: mov v2.b[4], v0.b[1] -; CHECKLE-NEXT: mov v2.b[6], v0.b[0] -; CHECKLE-NEXT: zip2 v0.8b, v1.8b, v0.8b -; CHECKLE-NEXT: add v0.4h, v2.4h, v0.4h -; CHECKLE-NEXT: bic v0.4h, #255, lsl #8 -; CHECKLE-NEXT: ret -; -; CHECKBE-LABEL: test_reconstructshuffle: -; CHECKBE: // %bb.0: -; CHECKBE-NEXT: rev64 v0.16b, v0.16b -; CHECKBE-NEXT: rev64 v1.16b, v1.16b -; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; CHECKBE-NEXT: mov b2, v0.b[3] -; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; CHECKBE-NEXT: mov v2.b[2], v0.b[2] -; CHECKBE-NEXT: mov v2.b[4], v0.b[1] -; CHECKBE-NEXT: mov v2.b[6], v0.b[0] -; CHECKBE-NEXT: zip2 v0.8b, v1.8b, v0.8b -; CHECKBE-NEXT: add v0.4h, v2.4h, v0.4h -; CHECKBE-NEXT: bic v0.4h, #255, lsl #8 -; CHECKBE-NEXT: rev64 v0.4h, v0.4h -; CHECKBE-NEXT: ret - %tmp1 = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> - %tmp2 = shufflevector <16 x i8> %b, <16 x i8> undef, <4 x i32> - %tmp3 = add <4 x i8> %tmp1, %tmp2 - %tmp4 = zext <4 x i8> %tmp3 to <4 x i16> - ret <4 x i16> %tmp4 -}