diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 47c57edbd16cc..d42ae4ff93a44 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -11050,7 +11050,12 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, if (SrcEltTy == SmallestEltTy) continue; assert(ShuffleVT.getVectorElementType() == SmallestEltTy); - Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec); + if (DAG.getDataLayout().isBigEndian()) { + Src.ShuffleVec = + DAG.getNode(AArch64ISD::NVCAST, dl, ShuffleVT, Src.ShuffleVec); + } else { + Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec); + } Src.WindowScale = SrcEltTy.getFixedSizeInBits() / SmallestEltTy.getFixedSizeInBits(); Src.WindowBase *= Src.WindowScale; @@ -11102,7 +11107,12 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0], ShuffleOps[1], Mask); - SDValue V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle); + SDValue V; + if (DAG.getDataLayout().isBigEndian()) { + V = DAG.getNode(AArch64ISD::NVCAST, dl, VT, Shuffle); + } else { + V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle); + } LLVM_DEBUG(dbgs() << "Reshuffle, creating node: "; Shuffle.dump(); dbgs() << "Reshuffle, creating node: "; V.dump();); diff --git a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll index cc7dffc497495..945a73b05f1ba 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll @@ -140,7 +140,6 @@ define <3 x i32> @fsext_v3i32(ptr %a) { ; CHECK-BE-NEXT: ldr s0, [x0] ; CHECK-BE-NEXT: rev32 v0.8b, v0.8b ; CHECK-BE-NEXT: zip1 v0.8b, v0.8b, v0.8b -; CHECK-BE-NEXT: rev16 v0.8b, v0.8b ; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-BE-NEXT: shl v0.4s, v0.4s, #24 ; CHECK-BE-NEXT: sshr v0.4s, v0.4s, #24 @@ -284,7 +283,6 @@ define <3 x i16> @fsext_v3i16(ptr %a) { ; CHECK-BE-NEXT: ldr s0, [x0] ; CHECK-BE-NEXT: rev32 v0.8b, v0.8b ; CHECK-BE-NEXT: zip1 v0.8b, v0.8b, v0.8b -; CHECK-BE-NEXT: rev16 v0.8b, v0.8b ; CHECK-BE-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-BE-NEXT: sshr v0.4h, v0.4h, #8 ; CHECK-BE-NEXT: rev64 v0.4h, v0.4h @@ -447,7 +445,7 @@ define <4 x i8> @bitcast(i32 %0) { ; CHECK-BE-NEXT: fmov s0, w0 ; CHECK-BE-NEXT: rev32 v0.8b, v0.8b ; CHECK-BE-NEXT: zip1 v0.8b, v0.8b, v0.8b -; CHECK-BE-NEXT: rev64 v0.8b, v0.8b +; CHECK-BE-NEXT: rev64 v0.4h, v0.4h ; CHECK-BE-NEXT: ret %2 = bitcast i32 %0 to <4 x i8> ret <4 x i8> %2 diff --git a/llvm/test/CodeGen/AArch64/aarch64-uzp1-combine.ll b/llvm/test/CodeGen/AArch64/aarch64-uzp1-combine.ll index b2cb38c72bae8..d774d71d88f30 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-uzp1-combine.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-uzp1-combine.ll @@ -270,8 +270,6 @@ define i8 @trunc_v4i64_v4i8(<4 x i64> %input) { ; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 ; CHECK-BE-NEXT: xtn v1.2s, v1.2d ; CHECK-BE-NEXT: xtn v0.2s, v0.2d -; CHECK-BE-NEXT: rev32 v1.4h, v1.4h -; CHECK-BE-NEXT: rev32 v0.4h, v0.4h ; CHECK-BE-NEXT: uzp1 v0.4h, v0.4h, v1.4h ; CHECK-BE-NEXT: addv h0, v0.4h ; CHECK-BE-NEXT: fmov w0, s0 diff --git a/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll b/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll new file mode 100644 index 0000000000000..8b74de1c127dd --- /dev/null +++ b/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefix=CHECKLE +; RUN: llc < %s -mtriple=aarch64_be | FileCheck %s --check-prefix=CHECKBE + +define <4 x i16> @test_reconstructshuffle(<16 x i8> %a, <16 x i8> %b) nounwind { +; CHECKLE-LABEL: test_reconstructshuffle: +; CHECKLE: // %bb.0: +; CHECKLE-NEXT: umov w8, v0.b[3] +; CHECKLE-NEXT: umov w9, v0.b[2] +; CHECKLE-NEXT: fmov s2, w8 +; CHECKLE-NEXT: umov w8, v0.b[1] +; CHECKLE-NEXT: mov v2.h[1], w9 +; CHECKLE-NEXT: mov v2.h[2], w8 +; CHECKLE-NEXT: umov w8, v0.b[0] +; CHECKLE-NEXT: ext v0.16b, v1.16b, v1.16b, #8 +; CHECKLE-NEXT: mov v2.h[3], w8 +; CHECKLE-NEXT: zip2 v0.8b, v0.8b, v0.8b +; CHECKLE-NEXT: add v0.4h, v2.4h, v0.4h +; CHECKLE-NEXT: bic v0.4h, #255, lsl #8 +; CHECKLE-NEXT: ret +; +; CHECKBE-LABEL: test_reconstructshuffle: +; CHECKBE: // %bb.0: +; CHECKBE-NEXT: rev64 v0.16b, v0.16b +; CHECKBE-NEXT: rev64 v1.16b, v1.16b +; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECKBE-NEXT: umov w8, v0.b[3] +; CHECKBE-NEXT: umov w9, v0.b[2] +; CHECKBE-NEXT: fmov s2, w8 +; CHECKBE-NEXT: umov w8, v0.b[1] +; CHECKBE-NEXT: mov v2.h[1], w9 +; CHECKBE-NEXT: mov v2.h[2], w8 +; CHECKBE-NEXT: umov w8, v0.b[0] +; CHECKBE-NEXT: ext v0.16b, v1.16b, v1.16b, #8 +; CHECKBE-NEXT: mov v2.h[3], w8 +; CHECKBE-NEXT: zip2 v0.8b, v0.8b, v0.8b +; CHECKBE-NEXT: add v0.4h, v2.4h, v0.4h +; CHECKBE-NEXT: bic v0.4h, #255, lsl #8 +; CHECKBE-NEXT: rev64 v0.4h, v0.4h +; CHECKBE-NEXT: ret + %tmp1 = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> + %tmp2 = shufflevector <16 x i8> %b, <16 x i8> undef, <4 x i32> + %tmp3 = add <4 x i8> %tmp1, %tmp2 + %tmp4 = zext <4 x i8> %tmp3 to <4 x i16> + ret <4 x i16> %tmp4 +} diff --git a/llvm/test/CodeGen/AArch64/neon-bitcast.ll b/llvm/test/CodeGen/AArch64/neon-bitcast.ll index bfd59f3d813c8..d06612e2332e6 100644 --- a/llvm/test/CodeGen/AArch64/neon-bitcast.ll +++ b/llvm/test/CodeGen/AArch64/neon-bitcast.ll @@ -545,7 +545,7 @@ define <4 x i8> @bitcast_i32_to_v4i8(i32 %word) { ; CHECK-BE-NEXT: fmov s0, w0 ; CHECK-BE-NEXT: rev32 v0.8b, v0.8b ; CHECK-BE-NEXT: zip1 v0.8b, v0.8b, v0.8b -; CHECK-BE-NEXT: rev64 v0.8b, v0.8b +; CHECK-BE-NEXT: rev64 v0.4h, v0.4h ; CHECK-BE-NEXT: ret %ret = bitcast i32 %word to <4 x i8> ret <4 x i8> %ret diff --git a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll index cf3955be99b4f..0a3476e5f4cef 100644 --- a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll +++ b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll @@ -1345,10 +1345,6 @@ define void @zext_v16i4_to_v16i32_in_loop(ptr %src, ptr %dst) { ; CHECK-BE-NEXT: zip1 v1.8b, v1.8b, v0.8b ; CHECK-BE-NEXT: zip2 v4.8b, v2.8b, v0.8b ; CHECK-BE-NEXT: zip1 v2.8b, v2.8b, v0.8b -; CHECK-BE-NEXT: rev16 v3.8b, v3.8b -; CHECK-BE-NEXT: rev16 v1.8b, v1.8b -; CHECK-BE-NEXT: rev16 v4.8b, v4.8b -; CHECK-BE-NEXT: rev16 v2.8b, v2.8b ; CHECK-BE-NEXT: ushll v3.4s, v3.4h, #0 ; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-BE-NEXT: and v3.16b, v3.16b, v0.16b