Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 119 additions & 14 deletions llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1948,6 +1948,85 @@ static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
}

// Check the Mask and then build SrcVec and MaskImm infos which will
// be used to build LoongArchISD nodes for VPERMI_W or XVPERMI_W.
// On success, return true. Otherwise, return false.
static bool buildVPERMIInfo(ArrayRef<int> Mask, SDValue V1, SDValue V2,
SmallVectorImpl<SDValue> &SrcVec,
unsigned &MaskImm) {
unsigned MaskSize = Mask.size();

auto isValid = [&](int M, int Off) {
return (M == -1) || (M >= Off && M < Off + 4);
};

auto buildImm = [&](int MLo, int MHi, unsigned Off, unsigned I) {
auto immPart = [&](int M, unsigned Off) {
return (M == -1 ? 0 : (M - Off)) & 0x3;
};
MaskImm |= immPart(MLo, Off) << (I * 2);
MaskImm |= immPart(MHi, Off) << ((I + 1) * 2);
};

for (unsigned i = 0; i < 4; i += 2) {
int MLo = Mask[i];
int MHi = Mask[i + 1];

if (MaskSize == 8) { // Only v8i32/v8f32 need this check.
int M2Lo = Mask[i + 4];
int M2Hi = Mask[i + 5];
if (M2Lo != MLo + 4 || M2Hi != MHi + 4)
return false;
}

if (isValid(MLo, 0) && isValid(MHi, 0)) {
SrcVec.push_back(V1);
buildImm(MLo, MHi, 0, i);
} else if (isValid(MLo, MaskSize) && isValid(MHi, MaskSize)) {
SrcVec.push_back(V2);
buildImm(MLo, MHi, MaskSize, i);
} else {
return false;
}
}

return true;
}

/// Lower VECTOR_SHUFFLE into VPERMI (if possible).
///
/// VPERMI selects two elements from each of the two vectors based on the
/// mask and places them in the corresponding positions of the result vector
/// in order. Only v4i32 and v4f32 types are allowed.
///
/// It is possible to lower into VPERMI when the mask consists of two of the
/// following forms concatenated:
/// <i, j, u, v>
/// <u, v, i, j>
/// where i,j are in [0,4) and u,v are in [4, 8).
/// For example:
/// <2, 3, 4, 5>
/// <5, 7, 0, 2>
///
/// When undef's appear in the mask they are treated as if they were whatever
/// value is necessary in order to fit the above forms.
static SDValue lowerVECTOR_SHUFFLE_VPERMI(const SDLoc &DL, ArrayRef<int> Mask,
MVT VT, SDValue V1, SDValue V2,
SelectionDAG &DAG,
const LoongArchSubtarget &Subtarget) {
if ((VT != MVT::v4i32 && VT != MVT::v4f32) ||
Mask.size() != VT.getVectorNumElements())
return SDValue();

SmallVector<SDValue, 2> SrcVec;
unsigned MaskImm = 0;
if (!buildVPERMIInfo(Mask, V1, V2, SrcVec, MaskImm))
return SDValue();

return DAG.getNode(LoongArchISD::VPERMI, DL, VT, SrcVec[1], SrcVec[0],
DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
}

/// Lower VECTOR_SHUFFLE into VSHUF.
///
/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
Expand Down Expand Up @@ -2028,12 +2107,15 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
(Result =
lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
return Result;
if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
Zeroable)))
return Result;
if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
Zeroable)))
return Result;
if ((Result =
lowerVECTOR_SHUFFLE_VPERMI(DL, Mask, VT, V1, V2, DAG, Subtarget)))
return Result;
if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
Zeroable)))
return Result;
if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
Subtarget)))
return Result;
Expand Down Expand Up @@ -2101,21 +2183,40 @@ lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
/// Lower VECTOR_SHUFFLE into XVPERMI (if possible).
static SDValue
lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
SDValue V1, SelectionDAG &DAG,
SDValue V1, SDValue V2, SelectionDAG &DAG,
const LoongArchSubtarget &Subtarget) {
// Only consider XVPERMI_D.
if (Mask.size() != 4 || (VT != MVT::v4i64 && VT != MVT::v4f64))
MVT GRLenVT = Subtarget.getGRLenVT();
unsigned MaskSize = Mask.size();
if (MaskSize != VT.getVectorNumElements())
return SDValue();

unsigned MaskImm = 0;
for (unsigned i = 0; i < Mask.size(); ++i) {
if (Mask[i] == -1)
continue;
MaskImm |= Mask[i] << (i * 2);
// Consider XVPERMI_W.
if (VT == MVT::v8i32 || VT == MVT::v8f32) {
SmallVector<SDValue, 2> SrcVec;
unsigned MaskImm = 0;
if (!buildVPERMIInfo(Mask, V1, V2, SrcVec, MaskImm))
return SDValue();

return DAG.getNode(LoongArchISD::VPERMI, DL, VT, SrcVec[1], SrcVec[0],
DAG.getConstant(MaskImm, DL, GRLenVT));
}

return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1,
DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
// Consider XVPERMI_D.
if (VT == MVT::v4i64 || VT == MVT::v4f64) {
unsigned MaskImm = 0;
for (unsigned i = 0; i < MaskSize; ++i) {
if (Mask[i] == -1)
continue;
if (Mask[i] >= (int)MaskSize)
return SDValue();
MaskImm |= Mask[i] << (i * 2);
}

return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1,
DAG.getConstant(MaskImm, DL, GRLenVT));
}

return SDValue();
}

/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
Expand Down Expand Up @@ -2618,7 +2719,7 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
return NewShuffle;
if ((Result =
lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, DAG, Subtarget)))
lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, V2, DAG, Subtarget)))
return Result;
if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
return Result;
Expand All @@ -2645,6 +2746,9 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
Zeroable)))
return Result;
if ((Result =
lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, V2, DAG, Subtarget)))
return Result;
if ((Result =
lowerVECTOR_SHUFFLE_XVINSVE0(DL, Mask, VT, V1, V2, DAG, Subtarget)))
return Result;
Expand Down Expand Up @@ -7501,6 +7605,7 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VSHUF4I)
NODE_NAME_CASE(VREPLVEI)
NODE_NAME_CASE(VREPLGR2VR)
NODE_NAME_CASE(VPERMI)
NODE_NAME_CASE(XVPERMI)
NODE_NAME_CASE(XVPERM)
NODE_NAME_CASE(XVREPLVE0)
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/LoongArch/LoongArchISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ enum NodeType : unsigned {
VSHUF4I,
VREPLVEI,
VREPLGR2VR,
VPERMI,
XVPERMI,
XVPERM,
XVREPLVE0,
Expand Down
6 changes: 5 additions & 1 deletion llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1887,7 +1887,11 @@ def : Pat<(loongarch_vreplvei v8f32:$xj, immZExt2:$ui2),
def : Pat<(loongarch_vreplvei v4f64:$xj, immZExt1:$ui1),
(XVREPL128VEI_D v4f64:$xj, immZExt1:$ui1)>;

// XVPERMI_D
// XVPERMI_{W/D}
def : Pat<(loongarch_vpermi v8i32:$xj, v8i32:$xk, immZExt8:$ui8),
(XVPERMI_W v8i32:$xj, v8i32:$xk, immZExt8:$ui8)>;
def : Pat<(loongarch_vpermi v8f32:$xj, v8f32:$xk, immZExt8:$ui8),
(XVPERMI_W v8f32:$xj, v8f32:$xk, immZExt8:$ui8)>;
def : Pat<(loongarch_xvpermi v4i64:$xj, immZExt8: $ui8),
(XVPERMI_D v4i64:$xj, immZExt8: $ui8)>;
def : Pat<(loongarch_xvpermi v4f64:$xj, immZExt8: $ui8),
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV1RUimm>;
def loongarch_vshuf4i_d : SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV2RUimm>;
def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_LoongArchV1RUimm>;
def loongarch_vreplgr2vr: SDNode<"LoongArchISD::VREPLGR2VR", SDT_LoongArchVreplgr2vr>;
def loongarch_vpermi : SDNode<"LoongArchISD::VPERMI", SDT_LoongArchV2RUimm>;

def loongarch_vfrecipe: SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchVFRECIPE>;
def loongarch_vfrsqrte: SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchVFRSQRTE>;
Expand Down Expand Up @@ -2041,6 +2042,12 @@ def : Pat<(loongarch_vshuf4i_d v2i64:$vj, v2i64:$vk, immZExt8:$ui8),
def : Pat<(loongarch_vshuf4i_d v2f64:$vj, v2f64:$vk, immZExt8:$ui8),
(VSHUF4I_D v2f64:$vj, v2f64:$vk, immZExt8:$ui8)>;

// VPERMI_W
def : Pat<(loongarch_vpermi v4i32:$vj, v4i32:$vk, immZExt8:$ui8),
(VPERMI_W v4i32:$vj, v4i32:$vk, immZExt8:$ui8)>;
def : Pat<(loongarch_vpermi v4f32:$vj, v4f32:$vk, immZExt8:$ui8),
(VPERMI_W v4f32:$vj, v4f32:$vk, immZExt8:$ui8)>;

// VREPLVEI_{B/H/W/D}
def : Pat<(loongarch_vreplvei v16i8:$vj, immZExt4:$ui4),
(VREPLVEI_B v16i8:$vj, immZExt4:$ui4)>;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,8 @@ define void @shufflevector_xvpermi_v8i32(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 8
; CHECK-NEXT: xvbsll.v $xr1, $xr1, 8
; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: xvpermi.w $xr1, $xr0, 78
; CHECK-NEXT: xvst $xr1, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
Expand All @@ -27,10 +25,8 @@ define void @shufflevector_xvpermi_v8f32(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0)
; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI1_0)
; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0
; CHECK-NEXT: xvst $xr2, $a0, 0
; CHECK-NEXT: xvpermi.w $xr0, $xr1, 141
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <8 x float>, ptr %a
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,9 @@ define <16 x i16> @shufflevector_v16i16(<16 x i16> %a, <16 x i16> %b) {
define <8 x i32> @shufflevector_v8i32(<8 x i32> %a, <8 x i32> %b) {
; CHECK-LABEL: shufflevector_v8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI2_0)
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 68
; CHECK-NEXT: xvpermi.d $xr1, $xr1, 68
; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0
; CHECK-NEXT: xvori.b $xr0, $xr2, 0
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 68
; CHECK-NEXT: xvpermi.w $xr0, $xr1, 180
; CHECK-NEXT: ret
%c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 9, i32 3, i32 2, i32 8, i32 9, i32 3, i32 2>
ret <8 x i32> %c
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,8 @@ define void @shufflevector_vpermi_v4i32(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
; CHECK-NEXT: vbsrl.v $vr0, $vr0, 8
; CHECK-NEXT: vbsll.v $vr1, $vr1, 8
; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: vpermi.w $vr1, $vr0, 78
; CHECK-NEXT: vst $vr1, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <4 x i32>, ptr %a
Expand All @@ -27,10 +25,8 @@ define void @shufflevector_vpermi_v4f32(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0)
; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI1_0)
; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0
; CHECK-NEXT: vst $vr2, $a0, 0
; CHECK-NEXT: vpermi.w $vr0, $vr1, 141
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <4 x float>, ptr %a
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,8 @@ define <8 x i16> @shufflevector_v8i16(<8 x i16> %a, <8 x i16> %b) {
define <4 x i32> @shufflevector_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: shufflevector_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI2_0)
; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0
; CHECK-NEXT: vori.b $vr0, $vr2, 0
; CHECK-NEXT: vpermi.w $vr1, $vr0, 220
; CHECK-NEXT: vori.b $vr0, $vr1, 0
; CHECK-NEXT: ret
%c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 3, i32 5, i32 7>
ret <4 x i32> %c
Expand All @@ -53,10 +51,8 @@ define <2 x i64> @shufflevector_v2i64(<2 x i64> %a, <2 x i64> %b) {
define <4 x float> @shufflevector_v4f32(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: shufflevector_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI4_0)
; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0
; CHECK-NEXT: vori.b $vr0, $vr2, 0
; CHECK-NEXT: vpermi.w $vr1, $vr0, 220
; CHECK-NEXT: vori.b $vr0, $vr1, 0
; CHECK-NEXT: ret
%c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 3, i32 5, i32 7>
ret <4 x float> %c
Expand Down
9 changes: 6 additions & 3 deletions llvm/test/CodeGen/LoongArch/lsx/widen-shuffle-mask.ll
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ define <16 x i8> @widen_shuffle_mask_v16i8_to_v4i32(<16 x i8> %a, <16 x i8> %b)
define <16 x i8> @widen_shuffle_mask_v16i8_to_v2i64(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: widen_shuffle_mask_v16i8_to_v2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 12
; CHECK-NEXT: vpermi.w $vr1, $vr0, 228
; CHECK-NEXT: vori.b $vr0, $vr1, 0
; CHECK-NEXT: ret
%r = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
ret <16 x i8> %r
Expand All @@ -50,7 +51,8 @@ define <8 x i16> @widen_shuffle_mask_v8i16_to_v4i32(<8 x i16> %a, <8 x i16> %b)
define <8 x i16> @widen_shuffle_mask_v8i16_to_v2i64(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: widen_shuffle_mask_v8i16_to_v2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 12
; CHECK-NEXT: vpermi.w $vr1, $vr0, 228
; CHECK-NEXT: vori.b $vr0, $vr1, 0
; CHECK-NEXT: ret
%r = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
ret <8 x i16> %r
Expand All @@ -59,7 +61,8 @@ define <8 x i16> @widen_shuffle_mask_v8i16_to_v2i64(<8 x i16> %a, <8 x i16> %b)
define <4 x i32> @widen_shuffle_mask_v4i32_to_v2i64(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: widen_shuffle_mask_v4i32_to_v2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 12
; CHECK-NEXT: vpermi.w $vr1, $vr0, 228
; CHECK-NEXT: vori.b $vr0, $vr1, 0
; CHECK-NEXT: ret
%r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
ret <4 x i32> %r
Expand Down