diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 32ea2198f7898..4d232028133db 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -338,7 +338,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM}, VT, Legal); setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); - setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); + setOperationAction({ISD::SHL, ISD::SRA}, VT, Legal); setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); setCondCodeAction( @@ -354,6 +354,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::USUBSAT, VT, Legal); setOperationAction(ISD::ROTL, VT, Custom); setOperationAction(ISD::ROTR, VT, Custom); + setOperationAction(ISD::SRL, VT, Custom); } for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) setOperationAction(ISD::BITREVERSE, VT, Custom); @@ -427,7 +428,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM}, VT, Legal); setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); - setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); + setOperationAction({ISD::SHL, ISD::SRA}, VT, Legal); setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); setCondCodeAction( @@ -444,6 +445,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); setOperationAction(ISD::ROTL, VT, Custom); setOperationAction(ISD::ROTR, VT, Custom); + setOperationAction(ISD::SRL, VT, Custom); } for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32}) setOperationAction(ISD::BITREVERSE, VT, Custom); @@ -618,10 +620,51 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, return lowerVECREDUCE(Op, DAG); case ISD::ConstantFP: return lowerConstantFP(Op, DAG); + case ISD::SRL: + return lowerVectorSRL(Op, DAG); } return SDValue(); } +/// getVShiftAmt - Check if this is a valid build_vector for the immediate +/// operand of a vector shift operation, where all the elements of the +/// build_vector must have the same constant integer value. +static bool getVShiftAmt(SDValue Op, unsigned ElementBits, int64_t &Amt) { + // Ignore bit_converts. + while (Op.getOpcode() == ISD::BITCAST) + Op = Op.getOperand(0); + BuildVectorSDNode *BVN = dyn_cast(Op.getNode()); + APInt SplatBits, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (!BVN || + !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, + ElementBits) || + SplatBitSize > ElementBits) + return false; + Amt = SplatBits.getSExtValue(); + return true; +} + +SDValue LoongArchTargetLowering::lowerVectorSRL(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + SDLoc DL(Op); + int64_t Amt; + + if (!Op.getOperand(1).getValueType().isVector()) + return Op; + unsigned EltSize = VT.getScalarSizeInBits(); + MVT GRLenVT = Subtarget.getGRLenVT(); + + assert(Op.getOpcode() == ISD::SRL && "unexpected shift opcode"); + if (getVShiftAmt(Op.getOperand(1), EltSize, Amt) && Amt >= 0 && Amt < EltSize) + return DAG.getNode(LoongArchISD::VSRLI, DL, VT, Op.getOperand(0), + DAG.getConstant(Amt, DL, GRLenVT)); + return DAG.getNode(LoongArchISD::VSRL, DL, VT, Op.getOperand(0), + Op.getOperand(1)); +} + // Helper to attempt to return a cheaper, bit-inverted version of \p V. static SDValue isNOT(SDValue V, SelectionDAG &DAG) { // TODO: don't always ignore oneuse constraints. diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 5277e7e3e74ca..84622c30c0999 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -240,6 +240,7 @@ class LoongArchTargetLowering : public TargetLowering { SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerRotate(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVectorSRL(SDValue Op, SelectionDAG &DAG) const; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index d6af093411c3a..5896ca3f5a980 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -1437,9 +1437,8 @@ defm : PatShiftXrSplatUimm; defm : PatShiftXrUimm; // XVSRL[I]_{B/H/W/D} -defm : PatXrXr; -defm : PatShiftXrXr; -defm : PatShiftXrSplatUimm; +defm : PatXrXr; +defm : PatShiftXrXr; defm : PatShiftXrUimm; // XVSRA[I]_{B/H/W/D} @@ -2045,18 +2044,18 @@ defm : VAvgPat; defm : VAvgPat; defm : VAvgPat; defm : VAvgPat; -defm : VAvgPat; -defm : VAvgPat; -defm : VAvgPat; -defm : VAvgPat; +defm : VAvgIPat; +defm : VAvgIPat; +defm : VAvgIPat; +defm : VAvgIPat; defm : VAvgrPat; defm : VAvgrPat; defm : VAvgrPat; defm : VAvgrPat; -defm : VAvgrPat; -defm : VAvgrPat; -defm : VAvgrPat; -defm : VAvgrPat; +defm : VAvgrIPat; +defm : VAvgrIPat; +defm : VAvgrIPat; +defm : VAvgrIPat; // abs def : Pat<(abs v32i8:$xj), (XVSIGNCOV_B v32i8:$xj, v32i8:$xj)>; diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index 43ad3819029cf..96bf8a2db835d 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -72,6 +72,9 @@ def loongarch_vreplgr2vr: SDNode<"LoongArchISD::VREPLGR2VR", SDT_LoongArchVreplg def loongarch_vfrecipe: SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchVFRECIPE>; def loongarch_vfrsqrte: SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchVFRSQRTE>; +// Vector logicial right shift +def loongarch_vsrl : SDNode<"LoongArchISD::VSRL", SDT_LoongArchV2R>; + // Vector logicial left / right shift by immediate def loongarch_vslli : SDNode<"LoongArchISD::VSLLI", SDT_LoongArchV1RUimm>; def loongarch_vsrli : SDNode<"LoongArchISD::VSRLI", SDT_LoongArchV1RUimm>; @@ -1535,6 +1538,11 @@ multiclass VAvgPat { (!cast(Inst) vt:$vj, vt:$vk)>; } +multiclass VAvgIPat { + def : Pat<(OpNode (vt (add vt:$vj, vt:$vk)), (GRLenVT 1)), + (!cast(Inst) vt:$vj, vt:$vk)>; +} + multiclass VAvgrPat { def : Pat<(OpNode (vt (add (vt (add vt:$vj, vt:$vk)), (vt (vsplat_imm_eq_1)))), @@ -1542,6 +1550,13 @@ multiclass VAvgrPat { (!cast(Inst) vt:$vj, vt:$vk)>; } +multiclass VAvgrIPat { + def : Pat<(OpNode (vt (add (vt (add vt:$vj, vt:$vk)), + (vt (vsplat_imm_eq_1)))), + (GRLenVT 1)), + (!cast(Inst) vt:$vj, vt:$vk)>; +} + let Predicates = [HasExtLSX] in { // VADD_{B/H/W/D} @@ -1647,9 +1662,8 @@ defm : PatShiftVrSplatUimm; defm : PatShiftVrUimm; // VSRL[I]_{B/H/W/D} -defm : PatVrVr; -defm : PatShiftVrVr; -defm : PatShiftVrSplatUimm; +defm : PatVrVr; +defm : PatShiftVrVr; defm : PatShiftVrUimm; // VSRA[I]_{B/H/W/D} @@ -2195,18 +2209,18 @@ defm : VAvgPat; defm : VAvgPat; defm : VAvgPat; defm : VAvgPat; -defm : VAvgPat; -defm : VAvgPat; -defm : VAvgPat; -defm : VAvgPat; +defm : VAvgIPat; +defm : VAvgIPat; +defm : VAvgIPat; +defm : VAvgIPat; defm : VAvgrPat; defm : VAvgrPat; defm : VAvgrPat; defm : VAvgrPat; -defm : VAvgrPat; -defm : VAvgrPat; -defm : VAvgrPat; -defm : VAvgrPat; +defm : VAvgrIPat; +defm : VAvgrIPat; +defm : VAvgrIPat; +defm : VAvgrIPat; // abs def : Pat<(abs v16i8:$vj), (VSIGNCOV_B v16i8:$vj, v16i8:$vj)>; diff --git a/llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll b/llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll index 8b12216d0f856..7f663d8de3cb8 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll @@ -11,6 +11,7 @@ define <32 x i8> @test_bitreverse_v32i8(<32 x i8> %a) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: xvslli.b $xr1, $xr0, 4 ; LA32-NEXT: xvsrli.b $xr0, $xr0, 4 +; LA32-NEXT: xvandi.b $xr0, $xr0, 15 ; LA32-NEXT: xvor.v $xr0, $xr0, $xr1 ; LA32-NEXT: xvandi.b $xr1, $xr0, 51 ; LA32-NEXT: xvslli.b $xr1, $xr1, 2 @@ -163,6 +164,7 @@ define <4 x i64> @test_bitreverse_v4i64(<4 x i64> %a) nounwind { ; LA32-NEXT: xvshuf.b $xr0, $xr0, $xr0, $xr1 ; LA32-NEXT: xvslli.b $xr1, $xr0, 4 ; LA32-NEXT: xvsrli.b $xr0, $xr0, 4 +; LA32-NEXT: xvandi.b $xr0, $xr0, 15 ; LA32-NEXT: xvor.v $xr0, $xr0, $xr1 ; LA32-NEXT: xvandi.b $xr1, $xr0, 51 ; LA32-NEXT: xvslli.b $xr1, $xr1, 2 diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll index 5c5c19935080b..0577a116bee5a 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll @@ -131,22 +131,13 @@ entry: } define void @xvavg_du(ptr %res, ptr %a, ptr %b) nounwind { -; LA32-LABEL: xvavg_du: -; LA32: # %bb.0: # %entry -; LA32-NEXT: xvld $xr0, $a1, 0 -; LA32-NEXT: xvld $xr1, $a2, 0 -; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1 -; LA32-NEXT: xvsrli.d $xr0, $xr0, 1 -; LA32-NEXT: xvst $xr0, $a0, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: xvavg_du: -; LA64: # %bb.0: # %entry -; LA64-NEXT: xvld $xr0, $a1, 0 -; LA64-NEXT: xvld $xr1, $a2, 0 -; LA64-NEXT: xvavg.du $xr0, $xr0, $xr1 -; LA64-NEXT: xvst $xr0, $a0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: xvavg_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvavg.du $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret entry: %va = load <4 x i64>, ptr %a %vb = load <4 x i64>, ptr %b @@ -298,8 +289,8 @@ define void @xvavgr_du(ptr %res, ptr %a, ptr %b) nounwind { ; LA32-NEXT: xvld $xr0, $a1, 0 ; LA32-NEXT: xvld $xr1, $a2, 0 ; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1 -; LA32-NEXT: xvaddi.du $xr0, $xr0, 1 -; LA32-NEXT: xvsrli.d $xr0, $xr0, 1 +; LA32-NEXT: xvrepli.d $xr1, 1 +; LA32-NEXT: xvavg.du $xr0, $xr0, $xr1 ; LA32-NEXT: xvst $xr0, $a0, 0 ; LA32-NEXT: ret ; diff --git a/llvm/test/CodeGen/LoongArch/lasx/issue170976.ll b/llvm/test/CodeGen/LoongArch/lasx/issue170976.ll new file mode 100644 index 0000000000000..9b17d7b8c9767 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/issue170976.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 + +define <64 x i8> @test_i8(<64 x i8> %shuffle) { +; CHECK-LABEL: test_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrepli.b $xr2, -85 +; CHECK-NEXT: xvmuh.bu $xr0, $xr0, $xr2 +; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1 +; CHECK-NEXT: xvmuh.bu $xr1, $xr1, $xr2 +; CHECK-NEXT: xvsrli.b $xr1, $xr1, 1 +; CHECK-NEXT: ret +entry: + %div = udiv <64 x i8> %shuffle, splat (i8 3) + ret <64 x i8> %div +} + +define <32 x i16> @test_i16(<32 x i16> %shuffle) { +; CHECK-LABEL: test_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu12i.w $a0, 10 +; CHECK-NEXT: ori $a0, $a0, 2731 +; CHECK-NEXT: xvreplgr2vr.h $xr2, $a0 +; CHECK-NEXT: xvmuh.hu $xr0, $xr0, $xr2 +; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1 +; CHECK-NEXT: xvmuh.hu $xr1, $xr1, $xr2 +; CHECK-NEXT: xvsrli.h $xr1, $xr1, 1 +; CHECK-NEXT: ret +entry: + %div = udiv <32 x i16> %shuffle, splat (i16 3) + ret <32 x i16> %div +} + +define <16 x i32> @test_i32(<16 x i32> %shuffle) { +; CHECK-LABEL: test_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu12i.w $a0, -349526 +; CHECK-NEXT: ori $a0, $a0, 2731 +; CHECK-NEXT: xvreplgr2vr.w $xr2, $a0 +; CHECK-NEXT: xvmuh.wu $xr0, $xr0, $xr2 +; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1 +; CHECK-NEXT: xvmuh.wu $xr1, $xr1, $xr2 +; CHECK-NEXT: xvsrli.w $xr1, $xr1, 1 +; CHECK-NEXT: ret +entry: + %div = udiv <16 x i32> %shuffle, splat (i32 3) + ret <16 x i32> %div +} + +define <8 x i64> @test_i64(<8 x i64> %shuffle) { +; LA32-LABEL: test_i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvrepli.d $xr2, 3 +; LA32-NEXT: xvdiv.du $xr0, $xr0, $xr2 +; LA32-NEXT: xvdiv.du $xr1, $xr1, $xr2 +; LA32-NEXT: ret +; +; LA64-LABEL: test_i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: lu12i.w $a0, -349526 +; LA64-NEXT: ori $a0, $a0, 2731 +; LA64-NEXT: lu32i.d $a0, -349526 +; LA64-NEXT: lu52i.d $a0, $a0, -1366 +; LA64-NEXT: xvreplgr2vr.d $xr2, $a0 +; LA64-NEXT: xvmuh.du $xr0, $xr0, $xr2 +; LA64-NEXT: xvsrli.d $xr0, $xr0, 1 +; LA64-NEXT: xvmuh.du $xr1, $xr1, $xr2 +; LA64-NEXT: xvsrli.d $xr1, $xr1, 1 +; LA64-NEXT: ret +entry: + %div = udiv <8 x i64> %shuffle, splat (i64 3) + ret <8 x i64> %div +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll b/llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll index b0d36a8143fa1..ba84e5c136de3 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll @@ -11,6 +11,7 @@ define <16 x i8> @test_bitreverse_v16i8(<16 x i8> %a) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: vslli.b $vr1, $vr0, 4 ; LA32-NEXT: vsrli.b $vr0, $vr0, 4 +; LA32-NEXT: vandi.b $vr0, $vr0, 15 ; LA32-NEXT: vor.v $vr0, $vr0, $vr1 ; LA32-NEXT: vandi.b $vr1, $vr0, 51 ; LA32-NEXT: vslli.b $vr1, $vr1, 2 @@ -116,6 +117,7 @@ define <2 x i64> @test_bitreverse_v2i64(<2 x i64> %a) nounwind { ; LA32-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1 ; LA32-NEXT: vslli.b $vr1, $vr0, 4 ; LA32-NEXT: vsrli.b $vr0, $vr0, 4 +; LA32-NEXT: vandi.b $vr0, $vr0, 15 ; LA32-NEXT: vor.v $vr0, $vr0, $vr1 ; LA32-NEXT: vandi.b $vr1, $vr0, 51 ; LA32-NEXT: vslli.b $vr1, $vr1, 2 diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll index 334af22edee59..8e700689fdc58 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll @@ -131,22 +131,13 @@ entry: } define void @vavg_du(ptr %res, ptr %a, ptr %b) nounwind { -; LA32-LABEL: vavg_du: -; LA32: # %bb.0: # %entry -; LA32-NEXT: vld $vr0, $a1, 0 -; LA32-NEXT: vld $vr1, $a2, 0 -; LA32-NEXT: vadd.d $vr0, $vr0, $vr1 -; LA32-NEXT: vsrli.d $vr0, $vr0, 1 -; LA32-NEXT: vst $vr0, $a0, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: vavg_du: -; LA64: # %bb.0: # %entry -; LA64-NEXT: vld $vr0, $a1, 0 -; LA64-NEXT: vld $vr1, $a2, 0 -; LA64-NEXT: vavg.du $vr0, $vr0, $vr1 -; LA64-NEXT: vst $vr0, $a0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: vavg_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vavg.du $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret entry: %va = load <2 x i64>, ptr %a %vb = load <2 x i64>, ptr %b @@ -298,8 +289,8 @@ define void @vavgr_du(ptr %res, ptr %a, ptr %b) nounwind { ; LA32-NEXT: vld $vr0, $a1, 0 ; LA32-NEXT: vld $vr1, $a2, 0 ; LA32-NEXT: vadd.d $vr0, $vr0, $vr1 -; LA32-NEXT: vaddi.du $vr0, $vr0, 1 -; LA32-NEXT: vsrli.d $vr0, $vr0, 1 +; LA32-NEXT: vrepli.d $vr1, 1 +; LA32-NEXT: vavg.du $vr0, $vr0, $vr1 ; LA32-NEXT: vst $vr0, $a0, 0 ; LA32-NEXT: ret ; diff --git a/llvm/test/CodeGen/LoongArch/lsx/issue170976.ll b/llvm/test/CodeGen/LoongArch/lsx/issue170976.ll new file mode 100644 index 0000000000000..df4da0178f389 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/issue170976.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64 + +define <32 x i8> @test_i8(<32 x i8> %shuffle) { +; CHECK-LABEL: test_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrepli.b $vr2, -85 +; CHECK-NEXT: vmuh.bu $vr0, $vr0, $vr2 +; CHECK-NEXT: vsrli.b $vr0, $vr0, 1 +; CHECK-NEXT: vmuh.bu $vr1, $vr1, $vr2 +; CHECK-NEXT: vsrli.b $vr1, $vr1, 1 +; CHECK-NEXT: ret +entry: + %div = udiv <32 x i8> %shuffle, splat (i8 3) + ret <32 x i8> %div +} + +define <16 x i16> @test_i16(<16 x i16> %shuffle) { +; CHECK-LABEL: test_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu12i.w $a0, 10 +; CHECK-NEXT: ori $a0, $a0, 2731 +; CHECK-NEXT: vreplgr2vr.h $vr2, $a0 +; CHECK-NEXT: vmuh.hu $vr0, $vr0, $vr2 +; CHECK-NEXT: vsrli.h $vr0, $vr0, 1 +; CHECK-NEXT: vmuh.hu $vr1, $vr1, $vr2 +; CHECK-NEXT: vsrli.h $vr1, $vr1, 1 +; CHECK-NEXT: ret +entry: + %div = udiv <16 x i16> %shuffle, splat (i16 3) + ret <16 x i16> %div +} + +define <8 x i32> @test_i32(<8 x i32> %shuffle) { +; CHECK-LABEL: test_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu12i.w $a0, -349526 +; CHECK-NEXT: ori $a0, $a0, 2731 +; CHECK-NEXT: vreplgr2vr.w $vr2, $a0 +; CHECK-NEXT: vmuh.wu $vr0, $vr0, $vr2 +; CHECK-NEXT: vsrli.w $vr0, $vr0, 1 +; CHECK-NEXT: vmuh.wu $vr1, $vr1, $vr2 +; CHECK-NEXT: vsrli.w $vr1, $vr1, 1 +; CHECK-NEXT: ret +entry: + %div = udiv <8 x i32> %shuffle, splat (i32 3) + ret <8 x i32> %div +} + +define <4 x i64> @test_i64(<4 x i64> %shuffle) { +; LA32-LABEL: test_i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vrepli.d $vr2, 3 +; LA32-NEXT: vdiv.du $vr0, $vr0, $vr2 +; LA32-NEXT: vdiv.du $vr1, $vr1, $vr2 +; LA32-NEXT: ret +; +; LA64-LABEL: test_i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: lu12i.w $a0, -349526 +; LA64-NEXT: ori $a0, $a0, 2731 +; LA64-NEXT: lu32i.d $a0, -349526 +; LA64-NEXT: lu52i.d $a0, $a0, -1366 +; LA64-NEXT: vreplgr2vr.d $vr2, $a0 +; LA64-NEXT: vmuh.du $vr0, $vr0, $vr2 +; LA64-NEXT: vsrli.d $vr0, $vr0, 1 +; LA64-NEXT: vmuh.du $vr1, $vr1, $vr2 +; LA64-NEXT: vsrli.d $vr1, $vr1, 1 +; LA64-NEXT: ret +entry: + %div = udiv <4 x i64> %shuffle, splat (i64 3) + ret <4 x i64> %div +}