Skip to content

Commit

Permalink
[Hexagon] Handle shifts of short vectors of i8
Browse files Browse the repository at this point in the history
  • Loading branch information
Krzysztof Parzyszek committed Sep 8, 2022
1 parent d8a2d3f commit 3c81757
Show file tree
Hide file tree
Showing 2 changed files with 282 additions and 1 deletion.
44 changes: 43 additions & 1 deletion llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
Expand Up @@ -2345,7 +2345,49 @@ HexagonTargetLowering::getVectorShiftByInt(SDValue Op, SelectionDAG &DAG)

SDValue
HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const {
return getVectorShiftByInt(Op, DAG);
const SDLoc &dl(Op);

// First try to convert the shift (by vector) to a shift by a scalar.
// If we first split the shift, the shift amount will become 'extract
// subvector', and will no longer be recognized as scalar.
SDValue Res = Op;
if (SDValue S = getVectorShiftByInt(Op, DAG))
Res = S;

MVT ResTy = ty(Res);
if (ResTy.getVectorElementType() != MVT::i8)
return Res;

// For shifts of i8, extend the inputs to i16, then truncate back to i8.
assert(ResTy.getVectorElementType() == MVT::i8);
unsigned Opc = Res.getOpcode();
switch (Opc) {
case HexagonISD::VASR:
case HexagonISD::VLSR:
case HexagonISD::VASL:
break;
default:
// No instructions for shifts by non-scalars.
return SDValue();
}

SDValue Val = Res.getOperand(0), Amt = Res.getOperand(1);

auto ShiftPartI8 = [&dl, &DAG, this](unsigned Opc, SDValue V, SDValue A) {
MVT Ty = ty(V);
MVT ExtTy = MVT::getVectorVT(MVT::i16, Ty.getVectorNumElements());
SDValue ExtV = Opc == HexagonISD::VASR ? DAG.getSExtOrTrunc(V, dl, ExtTy)
: DAG.getZExtOrTrunc(V, dl, ExtTy);
SDValue ExtS = DAG.getNode(Opc, dl, ExtTy, {ExtV, A});
return DAG.getZExtOrTrunc(ExtS, dl, Ty);
};

if (ResTy.getSizeInBits() == 32)
return ShiftPartI8(Opc, Val, Amt);

auto [LoV, HiV] = opSplit(Val, dl, DAG);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy,
{ShiftPartI8(Opc, LoV, Amt), ShiftPartI8(Opc, HiV, Amt)});
}

SDValue
Expand Down
239 changes: 239 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-shifts.ll
@@ -0,0 +1,239 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=hexagon < %s | FileCheck %s

define <4 x i8> @f0(<4 x i8> %a0) unnamed_addr #0 {
; CHECK-LABEL: f0:
; CHECK: // %bb.0: // %b0
; CHECK-NEXT: {
; CHECK-NEXT: r1:0 = vsxtbh(r0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r1:0 = vasrh(r1:0,#1)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r0 = vtrunehb(r1:0)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
b0:
%v0 = ashr <4 x i8> %a0, <i8 1, i8 1, i8 1, i8 1>
ret <4 x i8> %v0
}

define <4 x i8> @f1(<4 x i8> %a0) unnamed_addr #0 {
; CHECK-LABEL: f1:
; CHECK: // %bb.0: // %b0
; CHECK-NEXT: {
; CHECK-NEXT: r1:0 = vzxtbh(r0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r1:0 = vlsrh(r1:0,#1)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r0 = vtrunehb(r1:0)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
b0:
%v0 = lshr <4 x i8> %a0, <i8 1, i8 1, i8 1, i8 1>
ret <4 x i8> %v0
}

define <4 x i8> @f2(<4 x i8> %a0) unnamed_addr #0 {
; CHECK-LABEL: f2:
; CHECK: // %bb.0: // %b0
; CHECK-NEXT: {
; CHECK-NEXT: r1:0 = vzxtbh(r0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r1:0 = vaslh(r1:0,#1)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r0 = vtrunehb(r1:0)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
b0:
%v0 = shl <4 x i8> %a0, <i8 1, i8 1, i8 1, i8 1>
ret <4 x i8> %v0
}


define <8 x i8> @f3(<8 x i8> %a0) unnamed_addr #0 {
; CHECK-LABEL: f3:
; CHECK: // %bb.0: // %b0
; CHECK-NEXT: {
; CHECK-NEXT: r3:2 = vsxtbh(r1)
; CHECK-NEXT: r5:4 = vsxtbh(r0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r1:0 = vasrh(r5:4,#1)
; CHECK-NEXT: r3:2 = vasrh(r3:2,#1)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r0 = vtrunehb(r1:0)
; CHECK-NEXT: r1 = vtrunehb(r3:2)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
b0:
%v0 = ashr <8 x i8> %a0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
ret <8 x i8> %v0
}

define <8 x i8> @f4(<8 x i8> %a0) unnamed_addr #0 {
; CHECK-LABEL: f4:
; CHECK: // %bb.0: // %b0
; CHECK-NEXT: {
; CHECK-NEXT: r3:2 = vzxtbh(r1)
; CHECK-NEXT: r5:4 = vzxtbh(r0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r1:0 = vlsrh(r5:4,#1)
; CHECK-NEXT: r3:2 = vlsrh(r3:2,#1)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r0 = vtrunehb(r1:0)
; CHECK-NEXT: r1 = vtrunehb(r3:2)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
b0:
%v0 = lshr <8 x i8> %a0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
ret <8 x i8> %v0
}

define <8 x i8> @f5(<8 x i8> %a0) unnamed_addr #0 {
; CHECK-LABEL: f5:
; CHECK: // %bb.0: // %b0
; CHECK-NEXT: {
; CHECK-NEXT: r3:2 = vzxtbh(r1)
; CHECK-NEXT: r5:4 = vzxtbh(r0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r1:0 = vaslh(r5:4,#1)
; CHECK-NEXT: r3:2 = vaslh(r3:2,#1)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r0 = vtrunehb(r1:0)
; CHECK-NEXT: r1 = vtrunehb(r3:2)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
b0:
%v0 = shl <8 x i8> %a0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
ret <8 x i8> %v0
}


define <2 x i16> @f6(<2 x i16> %a0) unnamed_addr #0 {
; CHECK-LABEL: f6:
; CHECK: // %bb.0: // %b0
; CHECK-NEXT: {
; CHECK-NEXT: r1:0 = vasrh(r1:0,#1)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
b0:
%v0 = ashr <2 x i16> %a0, <i16 1, i16 1>
ret <2 x i16> %v0
}

define <2 x i16> @f7(<2 x i16> %a0) unnamed_addr #0 {
; CHECK-LABEL: f7:
; CHECK: // %bb.0: // %b0
; CHECK-NEXT: {
; CHECK-NEXT: r1:0 = vlsrh(r1:0,#1)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
b0:
%v0 = lshr <2 x i16> %a0, <i16 1, i16 1>
ret <2 x i16> %v0
}

define <2 x i16> @f8(<2 x i16> %a0) unnamed_addr #0 {
; CHECK-LABEL: f8:
; CHECK: // %bb.0: // %b0
; CHECK-NEXT: {
; CHECK-NEXT: r1:0 = vaslh(r1:0,#1)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
b0:
%v0 = shl <2 x i16> %a0, <i16 1, i16 1>
ret <2 x i16> %v0
}


define <4 x i16> @f9(<4 x i16> %a0) unnamed_addr #0 {
; CHECK-LABEL: f9:
; CHECK: // %bb.0: // %b0
; CHECK-NEXT: {
; CHECK-NEXT: r1:0 = vasrh(r1:0,#1)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
b0:
%v0 = ashr <4 x i16> %a0, <i16 1, i16 1, i16 1, i16 1>
ret <4 x i16> %v0
}

define <4 x i16> @f10(<4 x i16> %a0) unnamed_addr #0 {
; CHECK-LABEL: f10:
; CHECK: // %bb.0: // %b0
; CHECK-NEXT: {
; CHECK-NEXT: r1:0 = vlsrh(r1:0,#1)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
b0:
%v0 = lshr <4 x i16> %a0, <i16 1, i16 1, i16 1, i16 1>
ret <4 x i16> %v0
}

define <4 x i16> @f11(<4 x i16> %a0) unnamed_addr #0 {
; CHECK-LABEL: f11:
; CHECK: // %bb.0: // %b0
; CHECK-NEXT: {
; CHECK-NEXT: r1:0 = vaslh(r1:0,#1)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
b0:
%v0 = shl <4 x i16> %a0, <i16 1, i16 1, i16 1, i16 1>
ret <4 x i16> %v0
}


define <2 x i32> @f12(<2 x i32> %a0) unnamed_addr #0 {
; CHECK-LABEL: f12:
; CHECK: // %bb.0: // %b0
; CHECK-NEXT: {
; CHECK-NEXT: r1:0 = vasrw(r1:0,#1)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
b0:
%v0 = ashr <2 x i32> %a0, <i32 1, i32 1>
ret <2 x i32> %v0
}

define <2 x i32> @f13(<2 x i32> %a0) unnamed_addr #0 {
; CHECK-LABEL: f13:
; CHECK: // %bb.0: // %b0
; CHECK-NEXT: {
; CHECK-NEXT: r1:0 = vlsrw(r1:0,#1)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
b0:
%v0 = lshr <2 x i32> %a0, <i32 1, i32 1>
ret <2 x i32> %v0
}

define <2 x i32> @f14(<2 x i32> %a0) unnamed_addr #0 {
; CHECK-LABEL: f14:
; CHECK: // %bb.0: // %b0
; CHECK-NEXT: {
; CHECK-NEXT: r1:0 = vaslw(r1:0,#1)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
b0:
%v0 = shl <2 x i32> %a0, <i32 1, i32 1>
ret <2 x i32> %v0
}

attributes #0 = { nounwind }

0 comments on commit 3c81757

Please sign in to comment.