-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[LoongArch] Enable more vector tests for 32-bit target #160656
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@llvm/pr-subscribers-backend-loongarch Author: hev (heiher) ChangesPatch is 306.27 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/160656.diff 74 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 32baa2d111270..468dd1c085b98 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -666,6 +666,7 @@ SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
+ unsigned ResBits = OpVT.getScalarSizeInBits();
unsigned LegalVecSize = 128;
bool isLASX256Vector =
@@ -691,10 +692,11 @@ SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
if (isLASX256Vector) {
SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
- DAG.getConstant(2, DL, MVT::i64));
+ DAG.getConstant(2, DL, Subtarget.getGRLenVT()));
Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
}
+ Val = DAG.getBitcast(MVT::getVectorVT(OpVT, LegalVecSize / ResBits), Val);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
}
@@ -727,15 +729,16 @@ SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
MVT VecTy = Val.getSimpleValueType();
+ MVT GRLenVT = Subtarget.getGRLenVT();
for (int i = NumEles; i > 1; i /= 2) {
- SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, MVT::i64);
+ SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, GRLenVT);
SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
}
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
- DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
+ DAG.getConstant(0, DL, GRLenVT));
}
SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
@@ -1128,10 +1131,12 @@ SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
SmallVector<SDValue, 8> Ops;
for (unsigned int i = 0; i < NewEltNum; i++) {
SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
- DAG.getConstant(i, DL, MVT::i64));
+ DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
? (unsigned)LoongArchISD::BITREV_8B
: (unsigned)ISD::BITREVERSE;
+ if (!Subtarget.is64Bit() && RevOp == LoongArchISD::BITREV_8B)
+ return SDValue();
Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
}
SDValue Res =
@@ -1611,9 +1616,8 @@ lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
- APInt Imm(64, SplatIndex);
return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
- DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()));
+ DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
}
return SDValue();
@@ -1671,7 +1675,7 @@ lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
}
// Calculate the immediate. Replace any remaining undefs with zero
- APInt Imm(64, 0);
+ int Imm = 0;
for (int i = SubVecSize - 1; i >= 0; --i) {
int M = SubMask[i];
@@ -1946,11 +1950,12 @@ static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
/// adding it as an operand to the resulting VSHUF.
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef<int> Mask,
MVT VT, SDValue V1, SDValue V2,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG,
+ const LoongArchSubtarget &Subtarget) {
SmallVector<SDValue, 16> Ops;
for (auto M : Mask)
- Ops.push_back(DAG.getConstant(M, DL, MVT::i64));
+ Ops.push_back(DAG.getSignedConstant(M, DL, Subtarget.getGRLenVT()));
EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
@@ -2030,7 +2035,8 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
return Result;
if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
return NewShuffle;
- if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
+ if ((Result =
+ lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG, Subtarget)))
return Result;
return SDValue();
}
@@ -2088,7 +2094,8 @@ lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef<int> Mask,
MVT VT, SDValue V1, SDValue V2,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG,
+ const LoongArchSubtarget &Subtarget) {
// LoongArch LASX only have XVPERM_W.
if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
return SDValue();
@@ -2119,9 +2126,10 @@ static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef<int> Mask,
return SDValue();
SmallVector<SDValue, 8> Masks;
+ MVT GRLenVT = Subtarget.getGRLenVT();
for (unsigned i = 0; i < NumElts; ++i)
- Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(MVT::i64)
- : DAG.getConstant(Mask[i], DL, MVT::i64));
+ Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(GRLenVT)
+ : DAG.getConstant(Mask[i], DL, GRLenVT));
SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
@@ -2533,7 +2541,8 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG,
Subtarget)))
return Result;
- if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, NewMask, VT, V1, V2, DAG)))
+ if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, NewMask, VT, V1, V2, DAG,
+ Subtarget)))
return Result;
if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
V1, V2, DAG)))
@@ -3102,12 +3111,33 @@ LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
return SDValue();
SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
- SDValue SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
-
SmallVector<SDValue, 32> RawIndices;
- for (unsigned i = 0; i < NumElts; ++i)
- RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
- SDValue Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
+ SDValue SplatIdx;
+ SDValue Indices;
+
+ if (!Subtarget.is64Bit() && IdxTy == MVT::i64) {
+ MVT PairVTy = MVT::getVectorVT(MVT::i32, NumElts * 2);
+ for (unsigned i = 0; i < NumElts; ++i) {
+ RawIndices.push_back(Op2);
+ RawIndices.push_back(DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
+ }
+ SplatIdx = DAG.getBuildVector(PairVTy, DL, RawIndices);
+ SplatIdx = DAG.getBitcast(IdxVTy, SplatIdx);
+
+ RawIndices.clear();
+ for (unsigned i = 0; i < NumElts; ++i) {
+ RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
+ RawIndices.push_back(DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
+ }
+ Indices = DAG.getBuildVector(PairVTy, DL, RawIndices);
+ Indices = DAG.getBitcast(IdxVTy, Indices);
+ } else {
+ SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
+
+ for (unsigned i = 0; i < NumElts; ++i)
+ RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
+ Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
+ }
// insert vec, elt, idx
// =>
@@ -5129,7 +5159,7 @@ performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG,
if (Opc == ISD::DELETED_NODE)
return SDValue();
- SDValue V = DAG.getNode(Opc, DL, MVT::i64, Src.getOperand(0));
+ SDValue V = DAG.getNode(Opc, DL, Subtarget.getGRLenVT(), Src.getOperand(0));
EVT T = EVT::getIntegerVT(*DAG.getContext(), SrcVT.getVectorNumElements());
V = DAG.getZExtOrTrunc(V, DL, T);
return DAG.getBitcast(VT, V);
@@ -5142,6 +5172,7 @@ static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
SDValue Src = N->getOperand(0);
EVT SrcVT = Src.getValueType();
+ MVT GRLenVT = Subtarget.getGRLenVT();
if (!DCI.isBeforeLegalizeOps())
return SDValue();
@@ -5209,11 +5240,11 @@ static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG,
if (Src.getSimpleValueType() == MVT::v32i8) {
SDValue Lo, Hi;
std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
- Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, MVT::i64, Lo);
- Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, MVT::i64, Hi);
- Hi = DAG.getNode(ISD::SHL, DL, MVT::i64, Hi,
+ Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Lo);
+ Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Hi);
+ Hi = DAG.getNode(ISD::SHL, DL, GRLenVT, Hi,
DAG.getConstant(16, DL, MVT::i8));
- V = DAG.getNode(ISD::OR, DL, MVT::i64, Lo, Hi);
+ V = DAG.getNode(ISD::OR, DL, GRLenVT, Lo, Hi);
} else if (UseLASX) {
return SDValue();
}
@@ -5221,7 +5252,7 @@ static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG,
if (!V) {
Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
- V = DAG.getNode(Opc, DL, MVT::i64, Src);
+ V = DAG.getNode(Opc, DL, GRLenVT, Src);
}
EVT T = EVT::getIntegerVT(*DAG.getContext(), SrcVT.getVectorNumElements());
@@ -5878,6 +5909,22 @@ static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) {
return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
}
+template <unsigned W>
+static SDValue lowerVectorPickVE2GR(SDNode *N, SelectionDAG &DAG,
+ unsigned ResOp) {
+ unsigned Imm = N->getConstantOperandVal(2);
+ if (!isUInt<W>(Imm)) {
+ const StringRef ErrorMsg = "argument out of range";
+ DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
+ return DAG.getUNDEF(N->getValueType(0));
+ }
+ SDLoc DL(N);
+ SDValue Vec = N->getOperand(1);
+ SDValue Idx = DAG.getConstant(Imm, DL, MVT::i32);
+ SDValue EltVT = DAG.getValueType(Vec.getValueType().getVectorElementType());
+ return DAG.getNode(ResOp, DL, N->getValueType(0), Vec, Idx, EltVT);
+}
+
static SDValue
performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
@@ -6367,6 +6414,68 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
N->getOperand(1),
DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
N->getOperand(2)));
+ case Intrinsic::loongarch_lsx_vpickve2gr_b:
+ if (!Subtarget.is64Bit())
+ return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
+ break;
+ case Intrinsic::loongarch_lsx_vpickve2gr_h:
+ case Intrinsic::loongarch_lasx_xvpickve2gr_w:
+ if (!Subtarget.is64Bit())
+ return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
+ break;
+ case Intrinsic::loongarch_lsx_vpickve2gr_w:
+ if (!Subtarget.is64Bit())
+ return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
+ break;
+ case Intrinsic::loongarch_lsx_vpickve2gr_bu:
+ if (!Subtarget.is64Bit())
+ return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
+ break;
+ case Intrinsic::loongarch_lsx_vpickve2gr_hu:
+ case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
+ if (!Subtarget.is64Bit())
+ return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
+ break;
+ case Intrinsic::loongarch_lsx_vpickve2gr_wu:
+ if (!Subtarget.is64Bit())
+ return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
+ break;
+ case Intrinsic::loongarch_lsx_bz_b:
+ case Intrinsic::loongarch_lsx_bz_h:
+ case Intrinsic::loongarch_lsx_bz_w:
+ case Intrinsic::loongarch_lsx_bz_d:
+ case Intrinsic::loongarch_lasx_xbz_b:
+ case Intrinsic::loongarch_lasx_xbz_h:
+ case Intrinsic::loongarch_lasx_xbz_w:
+ case Intrinsic::loongarch_lasx_xbz_d:
+ if (!Subtarget.is64Bit())
+ return DAG.getNode(LoongArchISD::VALL_ZERO, DL, N->getValueType(0),
+ N->getOperand(1));
+ break;
+ case Intrinsic::loongarch_lsx_bz_v:
+ case Intrinsic::loongarch_lasx_xbz_v:
+ if (!Subtarget.is64Bit())
+ return DAG.getNode(LoongArchISD::VANY_ZERO, DL, N->getValueType(0),
+ N->getOperand(1));
+ break;
+ case Intrinsic::loongarch_lsx_bnz_b:
+ case Intrinsic::loongarch_lsx_bnz_h:
+ case Intrinsic::loongarch_lsx_bnz_w:
+ case Intrinsic::loongarch_lsx_bnz_d:
+ case Intrinsic::loongarch_lasx_xbnz_b:
+ case Intrinsic::loongarch_lasx_xbnz_h:
+ case Intrinsic::loongarch_lasx_xbnz_w:
+ case Intrinsic::loongarch_lasx_xbnz_d:
+ if (!Subtarget.is64Bit())
+ return DAG.getNode(LoongArchISD::VALL_NONZERO, DL, N->getValueType(0),
+ N->getOperand(1));
+ break;
+ case Intrinsic::loongarch_lsx_bnz_v:
+ case Intrinsic::loongarch_lasx_xbnz_v:
+ if (!Subtarget.is64Bit())
+ return DAG.getNode(LoongArchISD::VANY_NONZERO, DL, N->getValueType(0),
+ N->getOperand(1));
+ break;
}
return SDValue();
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index d99a57e562528..b0eb51a92c6c6 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -26,7 +26,7 @@ def SDT_LoongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>,
def SDT_LoongArchV2RUimm
: SDTypeProfile<1, 3,
[SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
- SDTCisVT<3, i64>]>;
+ SDTCisVT<3, GRLenVT>]>;
def SDT_LoongArchVreplgr2vr : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<0>, SDTCisInt<1>]>;
def SDT_LoongArchVFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
def SDT_LoongArchVFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
@@ -1482,7 +1482,7 @@ multiclass VldreplPat<ValueType vt, LAInst Inst, Operand ImmOpnd> {
}
multiclass VstelmPat<PatFrag StoreOp, ValueType vt, LAInst Inst,
- Operand ImmOpnd, Operand IdxOpnd, ValueType elt = i64> {
+ Operand ImmOpnd, Operand IdxOpnd, ValueType elt = GRLenVT> {
def : Pat<(StoreOp(elt(vector_extract vt:$vd, IdxOpnd:$idx)), BaseAddr:$rj),
(Inst vt:$vd, BaseAddr:$rj, 0, IdxOpnd:$idx)>;
@@ -2110,8 +2110,8 @@ def : Pat<(GRLenVT (vector_extract v4i32:$vj, GRLenVT:$rk)),
(COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_W v4i32:$vj, GRLenVT:$rk),
sub_32)),
GPR)>;
-def : Pat<(i64 (vector_extract v2i64:$vj, i64:$rk)),
- (COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (VREPLVE_D v2i64:$vj, i64:$rk),
+def : Pat<(GRLenVT (vector_extract v2i64:$vj, GRLenVT:$rk)),
+ (COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (VREPLVE_D v2i64:$vj, GRLenVT:$rk),
sub_64)),
GPR)>;
def : Pat<(f32 (vector_extract v4f32:$vj, GRLenVT:$rk)),
diff --git a/llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll b/llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll
index 87ee4ad025395..8b12216d0f856 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll
@@ -1,27 +1,46 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch32 -mattr=+32s,+lasx --verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefix=LA32
; RUN: llc --mtriple=loongarch64 -mattr=+lasx --verify-machineinstrs < %s \
-; RUN: | FileCheck %s
+; RUN: | FileCheck %s --check-prefix=LA64
declare <32 x i8> @llvm.bitreverse.v32i8(<32 x i8>)
define <32 x i8> @test_bitreverse_v32i8(<32 x i8> %a) nounwind {
-; CHECK-LABEL: test_bitreverse_v32i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 2
-; CHECK-NEXT: bitrev.8b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.d $vr2, $a0, 0
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
-; CHECK-NEXT: bitrev.8b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.d $vr2, $a0, 1
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
-; CHECK-NEXT: bitrev.8b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
-; CHECK-NEXT: bitrev.8b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 1
-; CHECK-NEXT: xvpermi.q $xr1, $xr2, 2
-; CHECK-NEXT: xvori.b $xr0, $xr1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: test_bitreverse_v32i8:
+; LA32: # %bb.0:
+; LA32-NEXT: xvslli.b $xr1, $xr0, 4
+; LA32-NEXT: xvsrli.b $xr0, $xr0, 4
+; LA32-NEXT: xvor.v $xr0, $xr0, $xr1
+; LA32-NEXT: xvandi.b $xr1, $xr0, 51
+; LA32-NEXT: xvslli.b $xr1, $xr1, 2
+; LA32-NEXT: xvsrli.b $xr0, $xr0, 2
+; LA32-NEXT: xvandi.b $xr0, $xr0, 51
+; LA32-NEXT: xvor.v $xr0, $xr0, $xr1
+; LA32-NEXT: xvandi.b $xr1, $xr0, 85
+; LA32-NEXT: xvslli.b $xr1, $xr1, 1
+; LA32-NEXT: xvsrli.b $xr0, $xr0, 1
+; LA32-NEXT: xvandi.b $xr0, $xr0, 85
+; LA32-NEXT: xvor.v $xr0, $xr0, $xr1
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_bitreverse_v32i8:
+; LA64: # %bb.0:
+; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 2
+; LA64-NEXT: bitrev.8b $a0, $a0
+; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0
+; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 3
+; LA64-NEXT: bitrev.8b $a0, $a0
+; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1
+; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 0
+; LA64-NEXT: bitrev.8b $a0, $a0
+; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0
+; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 1
+; LA64-NEXT: bitrev.8b $a0, $a0
+; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1
+; LA64-NEXT: xvpermi.q $xr1, $xr2, 2
+; LA64-NEXT: xvori.b $xr0, $xr1, 0
+; LA64-NEXT: ret
%b = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> %a)
ret <32 x i8> %b
}
@@ -29,23 +48,53 @@ define <32 x i8> @test_bitreverse_v32i8(<32 x i8> %a) nounwind {
declare <16 x i16> @llvm.bitreverse.v16i16(<16 x i16>)
define <16 x i16> @test_bitreverse_v16i16(<16 x i16> %a) nounwind {
-; CHECK-LABEL: test_bitreverse_v16i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 2
-; CHECK-NEXT: bitrev.d $a0, $a0
-; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
-; CHECK-NEXT: bitrev.d $a0, $a0
-; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 1
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
-; CHECK-NEXT: bitrev.d $a0, $a0
-; CHECK-NEXT: vinsgr2vr.d $vr2, $a0, 0
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
-; CHECK-NEXT: bitrev.d $a0, $a0
-; CHECK-NEXT: vinsgr2vr.d $vr2, $a0, 1
-; CHECK-NEXT: xvpermi.q $xr2, $xr1, 2
-; CHECK-NEXT: xvshuf4i.h $xr0, $xr2, 27
-; CHECK-NEXT: ret
+; LA32-LABEL: test_bitreverse_v16i16:
+; LA32: # %bb.0:
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 5
+; LA32-NEXT: bitrev.w $a0, $a0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 0
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 4
+; LA32-NEXT: bitrev.w $a0, $a0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 7
+; LA32-NEXT: bitrev.w $a0, $a0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 2
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 6
+; LA32-NEXT: bitrev.w $a0, $a0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 3
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 1
+; LA32-NEXT: bitrev.w $a0, $a0
+; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 0
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 0
+; LA32-N...
[truncated]
|
zhaoqi5
reviewed
Sep 25, 2025
zhaoqi5
approved these changes
Sep 25, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
mahesh-attarde
pushed a commit
to mahesh-attarde/llvm-project
that referenced
this pull request
Oct 3, 2025
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
No description provided.