diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp index 4a59610f3d4e27..0b147e42768b58 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp @@ -1734,7 +1734,13 @@ OpRef HvxSelector::contracting(ShuffleMask SM, OpRef Va, OpRef Vb, // The following shuffles only work for bytes and halfwords. This requires // the strip length to be 1 or 2. - if (Strip.second != 1 && Strip.second != 2) + // FIXME: Collecting even/odd elements of any power-of-2 length could be + // done by taking half of a deal operation. This should be handled in + // perfect shuffle generation, but currently that code requires an exact + // mask to work. To work with contracting perfect shuffles, it would need + // to be able to complete an incomplete mask. + // Once that's done, remove the handling of L=4. + if (Strip.second != 1 && Strip.second != 2 && /*FIXME*/Strip.second != 4) return OpRef::fail(); // The patterns for the shuffles, in terms of the starting offsets of the @@ -1800,6 +1806,17 @@ OpRef HvxSelector::contracting(ShuffleMask SM, OpRef Va, OpRef Vb, assert(Strip.first == 0 || Strip.first == L); using namespace Hexagon; NodeTemplate Res; + // FIXME: remove L=4 case after adding perfect mask completion. + if (L == 4) { + const SDLoc &dl(Results.InpNode); + Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(-L, dl)}); + OpRef C = OpRef::res(Results.top()); + MVT JoinTy = MVT::getVectorVT(ResTy.getVectorElementType(), + 2 * ResTy.getVectorNumElements()); + Results.push(Hexagon::V6_vshuffvdd, JoinTy, {Vb, Va, C}); + return Strip.first == 0 ? OpRef::lo(OpRef::res(Results.top())) + : OpRef::hi(OpRef::res(Results.top())); + } Res.Opc = Strip.second == 1 // Number of bytes. ? (Strip.first == 0 ? V6_vpackeb : V6_vpackob) : (Strip.first == 0 ? V6_vpackeh : V6_vpackoh); diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-vpackew.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-vpackew.ll new file mode 100644 index 00000000000000..22fdf01c218db6 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-vpackew.ll @@ -0,0 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=hexagon < %s | FileCheck %s + +define void @f0(ptr %a0, ptr %a1, ptr %a2) #0 { +; CHECK-LABEL: f0: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r7 = #-4 +; CHECK-NEXT: v0 = vmem(r0+#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v1 = vmem(r1+#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v1:0.w = vmpy(v0.h,v1.h) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v1:0.w = vadd(v1:0.w,v1:0.w) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v1:0 = vshuff(v1,v0,r7) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0.h = vpacko(v1.w,v0.w) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r2+#0) = v0.new +; CHECK-NEXT: } +b0: + %v0 = load <64 x i16>, ptr %a0, align 128 + %v1 = load <64 x i16>, ptr %a1, align 128 + %v2 = sext <64 x i16> %v0 to <64 x i32> + %v3 = sext <64 x i16> %v1 to <64 x i32> + %0 = trunc <64 x i32> %v2 to <64 x i16> + %1 = trunc <64 x i32> %v3 to <64 x i16> + %2 = bitcast <64 x i16> %0 to <32 x i32> + %3 = bitcast <64 x i16> %1 to <32 x i32> + %4 = call <64 x i32> @llvm.hexagon.V6.vmpyhv.128B(<32 x i32> %2, <32 x i32> %3) + %5 = add <64 x i32> %4, %4 + %6 = shufflevector <64 x i32> %5, <64 x i32> %5, <32 x i32> + %7 = shufflevector <64 x i32> %5, <64 x i32> %5, <32 x i32> + %8 = shufflevector <32 x i32> %6, <32 x i32> %7, <64 x i32> + %9 = bitcast <64 x i32> %8 to <128 x i16> + %10 = shufflevector <128 x i16> %9, <128 x i16> poison, <64 x i32> + %11 = sext <64 x i16> %10 to <64 x i32> + %v6 = trunc <64 x i32> %11 to <64 x i16> + store <64 x i16> %v6, ptr %a2, align 128 + ret void +} + +declare <64 x i32> @llvm.hexagon.V6.vmpyhv.128B(<32 x i32>, <32 x i32>) #0 + +attributes #0 = { nounwind "target-features"="+v66,+hvxv66,+hvx-length128b" }