From e05899e67d55058a68d645b77fb55bc17eec24cc Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Mon, 8 Dec 2025 12:13:38 +0800 Subject: [PATCH 1/4] [LoongArch] Custom lowering for 128-bit vector integer shifts --- .../LoongArch/LoongArchISelLowering.cpp | 63 +++++++++- .../Target/LoongArch/LoongArchISelLowering.h | 1 + .../LoongArch/LoongArchLASXInstrInfo.td | 44 ++++--- .../Target/LoongArch/LoongArchLSXInstrInfo.td | 110 ++++++++++-------- llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll | 8 +- .../LoongArch/lsx/ir-instruction/avg.ll | 54 +++------ .../test/CodeGen/LoongArch/lsx/issue170976.ll | 74 ++++++++++++ llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll | 13 +++ 8 files changed, 261 insertions(+), 106 deletions(-) create mode 100644 llvm/test/CodeGen/LoongArch/lsx/issue170976.ll diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 32ea2198f7898..2356f551c119e 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -338,7 +338,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM}, VT, Legal); setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); - setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); + setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Custom); setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); setCondCodeAction( @@ -618,10 +618,71 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, return lowerVECREDUCE(Op, DAG); case ISD::ConstantFP: return lowerConstantFP(Op, DAG); + case ISD::SRA: + case ISD::SRL: + case ISD::SHL: + return lowerVectorSRA_SRL_SHL(Op, DAG); } return SDValue(); } +/// getVShiftImm - Check if this is a valid build_vector for the immediate +/// operand of a vector shift operation, where all the elements of the +/// build_vector must have the same constant integer value. +static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { + // Ignore bit_converts. + while (Op.getOpcode() == ISD::BITCAST) + Op = Op.getOperand(0); + BuildVectorSDNode *BVN = dyn_cast(Op.getNode()); + APInt SplatBits, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (!BVN || + !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, + ElementBits) || + SplatBitSize > ElementBits) + return false; + Cnt = SplatBits.getSExtValue(); + return true; +} + +SDValue +LoongArchTargetLowering::lowerVectorSRA_SRL_SHL(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + SDLoc DL(Op); + int64_t Cnt; + + if (!Op.getOperand(1).getValueType().isVector()) + return Op; + unsigned EltSize = VT.getScalarSizeInBits(); + MVT GRLenVT = Subtarget.getGRLenVT(); + + switch (Op.getOpcode()) { + case ISD::SHL: + if (getVShiftImm(Op.getOperand(1), EltSize, Cnt) && Cnt >= 0 && + Cnt < EltSize) + return DAG.getNode(LoongArchISD::VSLLI, DL, VT, Op.getOperand(0), + DAG.getConstant(Cnt, DL, GRLenVT)); + return DAG.getNode(LoongArchISD::VSLL, DL, VT, Op.getOperand(0), + Op.getOperand(1)); + case ISD::SRA: + case ISD::SRL: + if (getVShiftImm(Op.getOperand(1), EltSize, Cnt) && Cnt >= 0 && + Cnt < EltSize) { + unsigned Opc = (Op.getOpcode() == ISD::SRA) ? LoongArchISD::VSRAI + : LoongArchISD::VSRLI; + return DAG.getNode(Opc, DL, VT, Op.getOperand(0), + DAG.getConstant(Cnt, DL, GRLenVT)); + } + unsigned Opc = + (Op.getOpcode() == ISD::SRA) ? LoongArchISD::VSRA : LoongArchISD::VSRL; + return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1)); + } + + llvm_unreachable("unexpected shift opcode"); +} + // Helper to attempt to return a cheaper, bit-inverted version of \p V. static SDValue isNOT(SDValue V, SelectionDAG &DAG) { // TODO: don't always ignore oneuse constraints. diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 5277e7e3e74ca..6ad14ea9d6951 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -240,6 +240,7 @@ class LoongArchTargetLowering : public TargetLowering { SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerRotate(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index d6af093411c3a..6bb74e76fabc6 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -1332,6 +1332,18 @@ multiclass PairInsertExtractPatV4 { } } +multiclass XVAvgPat { + def : Pat<(OpNode (vt (add vt:$vj, vt:$vk)), (vt (vsplat_imm_eq_1))), + (!cast(Inst) vt:$vj, vt:$vk)>; +} + +multiclass XVAvgrPat { + def : Pat<(OpNode (vt (add (vt (add vt:$vj, vt:$vk)), + (vt (vsplat_imm_eq_1)))), + (vt (vsplat_imm_eq_1))), + (!cast(Inst) vt:$vj, vt:$vk)>; +} + let Predicates = [HasExtLASX] in { // XVADD_{B/H/W/D} @@ -2041,22 +2053,22 @@ def : Pat<(v4i32(fp_to_uint v4f64:$vj)), sub_128)>; // XVAVG_{B/H/W/D/BU/HU/WU/DU}, XVAVGR_{B/H/W/D/BU/HU/WU/DU} -defm : VAvgPat; -defm : VAvgPat; -defm : VAvgPat; -defm : VAvgPat; -defm : VAvgPat; -defm : VAvgPat; -defm : VAvgPat; -defm : VAvgPat; -defm : VAvgrPat; -defm : VAvgrPat; -defm : VAvgrPat; -defm : VAvgrPat; -defm : VAvgrPat; -defm : VAvgrPat; -defm : VAvgrPat; -defm : VAvgrPat; +defm : XVAvgPat; +defm : XVAvgPat; +defm : XVAvgPat; +defm : XVAvgPat; +defm : XVAvgPat; +defm : XVAvgPat; +defm : XVAvgPat; +defm : XVAvgPat; +defm : XVAvgrPat; +defm : XVAvgrPat; +defm : XVAvgrPat; +defm : XVAvgrPat; +defm : XVAvgrPat; +defm : XVAvgrPat; +defm : XVAvgrPat; +defm : XVAvgrPat; // abs def : Pat<(abs v32i8:$xj), (XVSIGNCOV_B v32i8:$xj, v32i8:$xj)>; diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index 43ad3819029cf..933975a05878b 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -72,10 +72,20 @@ def loongarch_vreplgr2vr: SDNode<"LoongArchISD::VREPLGR2VR", SDT_LoongArchVreplg def loongarch_vfrecipe: SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchVFRECIPE>; def loongarch_vfrsqrte: SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchVFRSQRTE>; +// Vector logicial left / right shift +def loongarch_vsll : SDNode<"LoongArchISD::VSLL", SDT_LoongArchV2R>; +def loongarch_vsrl : SDNode<"LoongArchISD::VSRL", SDT_LoongArchV2R>; + +// Vector arithmetic right shift +def loongarch_vsra : SDNode<"LoongArchISD::VSRA", SDT_LoongArchV2R>; + // Vector logicial left / right shift by immediate def loongarch_vslli : SDNode<"LoongArchISD::VSLLI", SDT_LoongArchV1RUimm>; def loongarch_vsrli : SDNode<"LoongArchISD::VSRLI", SDT_LoongArchV1RUimm>; +// Vector arithmetic right shift by immediate +def loongarch_vsrai : SDNode<"LoongArchISD::VSRAI", SDT_LoongArchV1RUimm>; + // Vector byte logicial left / right shift def loongarch_vbsll : SDNode<"LoongArchISD::VBSLL", SDT_LoongArchV1RUimm>; def loongarch_vbsrl : SDNode<"LoongArchISD::VBSRL", SDT_LoongArchV1RUimm>; @@ -1531,14 +1541,14 @@ multiclass InsertExtractPatV2 { } multiclass VAvgPat { - def : Pat<(OpNode (vt (add vt:$vj, vt:$vk)), (vt (vsplat_imm_eq_1))), + def : Pat<(OpNode (vt (add vt:$vj, vt:$vk)), (GRLenVT 1)), (!cast(Inst) vt:$vj, vt:$vk)>; } multiclass VAvgrPat { def : Pat<(OpNode (vt (add (vt (add vt:$vj, vt:$vk)), (vt (vsplat_imm_eq_1)))), - (vt (vsplat_imm_eq_1))), + (GRLenVT 1)), (!cast(Inst) vt:$vj, vt:$vk)>; } @@ -1641,21 +1651,19 @@ foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, (VBSRL_V LSX128:$vj, uimm5:$imm)>; // VSLL[I]_{B/H/W/D} -defm : PatVrVr; -defm : PatShiftVrVr; -defm : PatShiftVrSplatUimm; +defm : PatVrVr; +defm : PatShiftVrVr; defm : PatShiftVrUimm; // VSRL[I]_{B/H/W/D} -defm : PatVrVr; -defm : PatShiftVrVr; -defm : PatShiftVrSplatUimm; +defm : PatVrVr; +defm : PatShiftVrVr; defm : PatShiftVrUimm; // VSRA[I]_{B/H/W/D} -defm : PatVrVr; -defm : PatShiftVrVr; -defm : PatShiftVrSplatUimm; +defm : PatVrVr; +defm : PatShiftVrVr; +defm : PatShiftVrUimm; // VROTR[I]_{B/H/W/D} defm : PatVrVr; @@ -1669,24 +1677,24 @@ defm : PatVr; defm : PatVr; // VBITCLR_{B/H/W/D} -def : Pat<(loongarch_vandn (v16i8 (shl vsplat_imm_eq_1, v16i8:$vk)), v16i8:$vj), +def : Pat<(loongarch_vandn (v16i8 (loongarch_vsll vsplat_imm_eq_1, v16i8:$vk)), v16i8:$vj), (v16i8 (VBITCLR_B v16i8:$vj, v16i8:$vk))>; -def : Pat<(loongarch_vandn (v8i16 (shl vsplat_imm_eq_1, v8i16:$vk)), v8i16:$vj), +def : Pat<(loongarch_vandn (v8i16 (loongarch_vsll vsplat_imm_eq_1, v8i16:$vk)), v8i16:$vj), (v8i16 (VBITCLR_H v8i16:$vj, v8i16:$vk))>; -def : Pat<(loongarch_vandn (v4i32 (shl vsplat_imm_eq_1, v4i32:$vk)), v4i32:$vj), +def : Pat<(loongarch_vandn (v4i32 (loongarch_vsll vsplat_imm_eq_1, v4i32:$vk)), v4i32:$vj), (v4i32 (VBITCLR_W v4i32:$vj, v4i32:$vk))>; -def : Pat<(loongarch_vandn (v2i64 (shl vsplat_imm_eq_1, v2i64:$vk)), v2i64:$vj), +def : Pat<(loongarch_vandn (v2i64 (loongarch_vsll vsplat_imm_eq_1, v2i64:$vk)), v2i64:$vj), (v2i64 (VBITCLR_D v2i64:$vj, v2i64:$vk))>; -def : Pat<(loongarch_vandn (v16i8 (shl vsplat_imm_eq_1, +def : Pat<(loongarch_vandn (v16i8 (loongarch_vsll vsplat_imm_eq_1, (vsplati8imm7 v16i8:$vk))), v16i8:$vj), (v16i8 (VBITCLR_B v16i8:$vj, v16i8:$vk))>; -def : Pat<(loongarch_vandn (v8i16 (shl vsplat_imm_eq_1, +def : Pat<(loongarch_vandn (v8i16 (loongarch_vsll vsplat_imm_eq_1, (vsplati16imm15 v8i16:$vk))), v8i16:$vj), (v8i16 (VBITCLR_H v8i16:$vj, v8i16:$vk))>; -def : Pat<(loongarch_vandn (v4i32 (shl vsplat_imm_eq_1, +def : Pat<(loongarch_vandn (v4i32 (loongarch_vsll vsplat_imm_eq_1, (vsplati32imm31 v4i32:$vk))), v4i32:$vj), (v4i32 (VBITCLR_W v4i32:$vj, v4i32:$vk))>; -def : Pat<(loongarch_vandn (v2i64 (shl vsplat_imm_eq_1, +def : Pat<(loongarch_vandn (v2i64 (loongarch_vsll vsplat_imm_eq_1, (vsplati64imm63 v2i64:$vk))), v2i64:$vj), (v2i64 (VBITCLR_D v2i64:$vj, v2i64:$vk))>; @@ -1701,21 +1709,21 @@ def : Pat<(and (v2i64 LSX128:$vj), (v2i64 (vsplat_uimm_inv_pow2 uimm6:$imm))), (VBITCLRI_D LSX128:$vj, uimm6:$imm)>; // VBITSET_{B/H/W/D} -def : Pat<(or v16i8:$vj, (shl vsplat_imm_eq_1, v16i8:$vk)), +def : Pat<(or v16i8:$vj, (loongarch_vsll vsplat_imm_eq_1, v16i8:$vk)), (v16i8 (VBITSET_B v16i8:$vj, v16i8:$vk))>; -def : Pat<(or v8i16:$vj, (shl vsplat_imm_eq_1, v8i16:$vk)), +def : Pat<(or v8i16:$vj, (loongarch_vsll vsplat_imm_eq_1, v8i16:$vk)), (v8i16 (VBITSET_H v8i16:$vj, v8i16:$vk))>; -def : Pat<(or v4i32:$vj, (shl vsplat_imm_eq_1, v4i32:$vk)), +def : Pat<(or v4i32:$vj, (loongarch_vsll vsplat_imm_eq_1, v4i32:$vk)), (v4i32 (VBITSET_W v4i32:$vj, v4i32:$vk))>; -def : Pat<(or v2i64:$vj, (shl vsplat_imm_eq_1, v2i64:$vk)), +def : Pat<(or v2i64:$vj, (loongarch_vsll vsplat_imm_eq_1, v2i64:$vk)), (v2i64 (VBITSET_D v2i64:$vj, v2i64:$vk))>; -def : Pat<(or v16i8:$vj, (shl vsplat_imm_eq_1, (vsplati8imm7 v16i8:$vk))), +def : Pat<(or v16i8:$vj, (loongarch_vsll vsplat_imm_eq_1, (vsplati8imm7 v16i8:$vk))), (v16i8 (VBITSET_B v16i8:$vj, v16i8:$vk))>; -def : Pat<(or v8i16:$vj, (shl vsplat_imm_eq_1, (vsplati16imm15 v8i16:$vk))), +def : Pat<(or v8i16:$vj, (loongarch_vsll vsplat_imm_eq_1, (vsplati16imm15 v8i16:$vk))), (v8i16 (VBITSET_H v8i16:$vj, v8i16:$vk))>; -def : Pat<(or v4i32:$vj, (shl vsplat_imm_eq_1, (vsplati32imm31 v4i32:$vk))), +def : Pat<(or v4i32:$vj, (loongarch_vsll vsplat_imm_eq_1, (vsplati32imm31 v4i32:$vk))), (v4i32 (VBITSET_W v4i32:$vj, v4i32:$vk))>; -def : Pat<(or v2i64:$vj, (shl vsplat_imm_eq_1, (vsplati64imm63 v2i64:$vk))), +def : Pat<(or v2i64:$vj, (loongarch_vsll vsplat_imm_eq_1, (vsplati64imm63 v2i64:$vk))), (v2i64 (VBITSET_D v2i64:$vj, v2i64:$vk))>; // VBITSETI_{B/H/W/D} @@ -1729,21 +1737,21 @@ def : Pat<(or (v2i64 LSX128:$vj), (v2i64 (vsplat_uimm_pow2 uimm6:$imm))), (VBITSETI_D LSX128:$vj, uimm6:$imm)>; // VBITREV_{B/H/W/D} -def : Pat<(xor v16i8:$vj, (shl vsplat_imm_eq_1, v16i8:$vk)), +def : Pat<(xor v16i8:$vj, (loongarch_vsll vsplat_imm_eq_1, v16i8:$vk)), (v16i8 (VBITREV_B v16i8:$vj, v16i8:$vk))>; -def : Pat<(xor v8i16:$vj, (shl vsplat_imm_eq_1, v8i16:$vk)), +def : Pat<(xor v8i16:$vj, (loongarch_vsll vsplat_imm_eq_1, v8i16:$vk)), (v8i16 (VBITREV_H v8i16:$vj, v8i16:$vk))>; -def : Pat<(xor v4i32:$vj, (shl vsplat_imm_eq_1, v4i32:$vk)), +def : Pat<(xor v4i32:$vj, (loongarch_vsll vsplat_imm_eq_1, v4i32:$vk)), (v4i32 (VBITREV_W v4i32:$vj, v4i32:$vk))>; -def : Pat<(xor v2i64:$vj, (shl vsplat_imm_eq_1, v2i64:$vk)), +def : Pat<(xor v2i64:$vj, (loongarch_vsll vsplat_imm_eq_1, v2i64:$vk)), (v2i64 (VBITREV_D v2i64:$vj, v2i64:$vk))>; -def : Pat<(xor v16i8:$vj, (shl vsplat_imm_eq_1, (vsplati8imm7 v16i8:$vk))), +def : Pat<(xor v16i8:$vj, (loongarch_vsll vsplat_imm_eq_1, (vsplati8imm7 v16i8:$vk))), (v16i8 (VBITREV_B v16i8:$vj, v16i8:$vk))>; -def : Pat<(xor v8i16:$vj, (shl vsplat_imm_eq_1, (vsplati16imm15 v8i16:$vk))), +def : Pat<(xor v8i16:$vj, (loongarch_vsll vsplat_imm_eq_1, (vsplati16imm15 v8i16:$vk))), (v8i16 (VBITREV_H v8i16:$vj, v8i16:$vk))>; -def : Pat<(xor v4i32:$vj, (shl vsplat_imm_eq_1, (vsplati32imm31 v4i32:$vk))), +def : Pat<(xor v4i32:$vj, (loongarch_vsll vsplat_imm_eq_1, (vsplati32imm31 v4i32:$vk))), (v4i32 (VBITREV_W v4i32:$vj, v4i32:$vk))>; -def : Pat<(xor v2i64:$vj, (shl vsplat_imm_eq_1, (vsplati64imm63 v2i64:$vk))), +def : Pat<(xor v2i64:$vj, (loongarch_vsll vsplat_imm_eq_1, (vsplati64imm63 v2i64:$vk))), (v2i64 (VBITREV_D v2i64:$vj, v2i64:$vk))>; // VBITREVI_{B/H/W/D} @@ -2191,22 +2199,22 @@ def : Pat<(f64 f64imm_vldi:$in), (f64 (EXTRACT_SUBREG (VLDI (to_f64imm_vldi f64imm_vldi:$in)), sub_64))>; // VAVG_{B/H/W/D/BU/HU/WU/DU}, VAVGR_{B/H/W/D/BU/HU/WU/DU} -defm : VAvgPat; -defm : VAvgPat; -defm : VAvgPat; -defm : VAvgPat; -defm : VAvgPat; -defm : VAvgPat; -defm : VAvgPat; -defm : VAvgPat; -defm : VAvgrPat; -defm : VAvgrPat; -defm : VAvgrPat; -defm : VAvgrPat; -defm : VAvgrPat; -defm : VAvgrPat; -defm : VAvgrPat; -defm : VAvgrPat; +defm : VAvgPat; +defm : VAvgPat; +defm : VAvgPat; +defm : VAvgPat; +defm : VAvgPat; +defm : VAvgPat; +defm : VAvgPat; +defm : VAvgPat; +defm : VAvgrPat; +defm : VAvgrPat; +defm : VAvgrPat; +defm : VAvgrPat; +defm : VAvgrPat; +defm : VAvgrPat; +defm : VAvgrPat; +defm : VAvgrPat; // abs def : Pat<(abs v16i8:$vj), (VSIGNCOV_B v16i8:$vj, v16i8:$vj)>; diff --git a/llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll b/llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll index b0d36a8143fa1..b043e90d302a6 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll @@ -9,8 +9,10 @@ declare <16 x i8> @llvm.bitreverse.v16i8(<16 x i8>) define <16 x i8> @test_bitreverse_v16i8(<16 x i8> %a) nounwind { ; LA32-LABEL: test_bitreverse_v16i8: ; LA32: # %bb.0: -; LA32-NEXT: vslli.b $vr1, $vr0, 4 +; LA32-NEXT: vandi.b $vr1, $vr0, 15 +; LA32-NEXT: vslli.b $vr1, $vr1, 4 ; LA32-NEXT: vsrli.b $vr0, $vr0, 4 +; LA32-NEXT: vandi.b $vr0, $vr0, 15 ; LA32-NEXT: vor.v $vr0, $vr0, $vr1 ; LA32-NEXT: vandi.b $vr1, $vr0, 51 ; LA32-NEXT: vslli.b $vr1, $vr1, 2 @@ -114,8 +116,10 @@ define <2 x i64> @test_bitreverse_v2i64(<2 x i64> %a) nounwind { ; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) ; LA32-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI3_0) ; LA32-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1 -; LA32-NEXT: vslli.b $vr1, $vr0, 4 +; LA32-NEXT: vandi.b $vr1, $vr0, 15 +; LA32-NEXT: vslli.b $vr1, $vr1, 4 ; LA32-NEXT: vsrli.b $vr0, $vr0, 4 +; LA32-NEXT: vandi.b $vr0, $vr0, 15 ; LA32-NEXT: vor.v $vr0, $vr0, $vr1 ; LA32-NEXT: vandi.b $vr1, $vr0, 51 ; LA32-NEXT: vslli.b $vr1, $vr1, 2 diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll index 334af22edee59..1de393965c7a0 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll @@ -54,22 +54,13 @@ entry: } define void @vavg_d(ptr %res, ptr %a, ptr %b) nounwind { -; LA32-LABEL: vavg_d: -; LA32: # %bb.0: # %entry -; LA32-NEXT: vld $vr0, $a1, 0 -; LA32-NEXT: vld $vr1, $a2, 0 -; LA32-NEXT: vadd.d $vr0, $vr0, $vr1 -; LA32-NEXT: vsrai.d $vr0, $vr0, 1 -; LA32-NEXT: vst $vr0, $a0, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: vavg_d: -; LA64: # %bb.0: # %entry -; LA64-NEXT: vld $vr0, $a1, 0 -; LA64-NEXT: vld $vr1, $a2, 0 -; LA64-NEXT: vavg.d $vr0, $vr0, $vr1 -; LA64-NEXT: vst $vr0, $a0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: vavg_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vavg.d $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret entry: %va = load <2 x i64>, ptr %a %vb = load <2 x i64>, ptr %b @@ -131,22 +122,13 @@ entry: } define void @vavg_du(ptr %res, ptr %a, ptr %b) nounwind { -; LA32-LABEL: vavg_du: -; LA32: # %bb.0: # %entry -; LA32-NEXT: vld $vr0, $a1, 0 -; LA32-NEXT: vld $vr1, $a2, 0 -; LA32-NEXT: vadd.d $vr0, $vr0, $vr1 -; LA32-NEXT: vsrli.d $vr0, $vr0, 1 -; LA32-NEXT: vst $vr0, $a0, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: vavg_du: -; LA64: # %bb.0: # %entry -; LA64-NEXT: vld $vr0, $a1, 0 -; LA64-NEXT: vld $vr1, $a2, 0 -; LA64-NEXT: vavg.du $vr0, $vr0, $vr1 -; LA64-NEXT: vst $vr0, $a0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: vavg_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vavg.du $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret entry: %va = load <2 x i64>, ptr %a %vb = load <2 x i64>, ptr %b @@ -216,8 +198,8 @@ define void @vavgr_d(ptr %res, ptr %a, ptr %b) nounwind { ; LA32-NEXT: vld $vr0, $a1, 0 ; LA32-NEXT: vld $vr1, $a2, 0 ; LA32-NEXT: vadd.d $vr0, $vr0, $vr1 -; LA32-NEXT: vaddi.du $vr0, $vr0, 1 -; LA32-NEXT: vsrai.d $vr0, $vr0, 1 +; LA32-NEXT: vrepli.d $vr1, 1 +; LA32-NEXT: vavg.d $vr0, $vr0, $vr1 ; LA32-NEXT: vst $vr0, $a0, 0 ; LA32-NEXT: ret ; @@ -298,8 +280,8 @@ define void @vavgr_du(ptr %res, ptr %a, ptr %b) nounwind { ; LA32-NEXT: vld $vr0, $a1, 0 ; LA32-NEXT: vld $vr1, $a2, 0 ; LA32-NEXT: vadd.d $vr0, $vr0, $vr1 -; LA32-NEXT: vaddi.du $vr0, $vr0, 1 -; LA32-NEXT: vsrli.d $vr0, $vr0, 1 +; LA32-NEXT: vrepli.d $vr1, 1 +; LA32-NEXT: vavg.du $vr0, $vr0, $vr1 ; LA32-NEXT: vst $vr0, $a0, 0 ; LA32-NEXT: ret ; diff --git a/llvm/test/CodeGen/LoongArch/lsx/issue170976.ll b/llvm/test/CodeGen/LoongArch/lsx/issue170976.ll new file mode 100644 index 0000000000000..df4da0178f389 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/issue170976.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64 + +define <32 x i8> @test_i8(<32 x i8> %shuffle) { +; CHECK-LABEL: test_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrepli.b $vr2, -85 +; CHECK-NEXT: vmuh.bu $vr0, $vr0, $vr2 +; CHECK-NEXT: vsrli.b $vr0, $vr0, 1 +; CHECK-NEXT: vmuh.bu $vr1, $vr1, $vr2 +; CHECK-NEXT: vsrli.b $vr1, $vr1, 1 +; CHECK-NEXT: ret +entry: + %div = udiv <32 x i8> %shuffle, splat (i8 3) + ret <32 x i8> %div +} + +define <16 x i16> @test_i16(<16 x i16> %shuffle) { +; CHECK-LABEL: test_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu12i.w $a0, 10 +; CHECK-NEXT: ori $a0, $a0, 2731 +; CHECK-NEXT: vreplgr2vr.h $vr2, $a0 +; CHECK-NEXT: vmuh.hu $vr0, $vr0, $vr2 +; CHECK-NEXT: vsrli.h $vr0, $vr0, 1 +; CHECK-NEXT: vmuh.hu $vr1, $vr1, $vr2 +; CHECK-NEXT: vsrli.h $vr1, $vr1, 1 +; CHECK-NEXT: ret +entry: + %div = udiv <16 x i16> %shuffle, splat (i16 3) + ret <16 x i16> %div +} + +define <8 x i32> @test_i32(<8 x i32> %shuffle) { +; CHECK-LABEL: test_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu12i.w $a0, -349526 +; CHECK-NEXT: ori $a0, $a0, 2731 +; CHECK-NEXT: vreplgr2vr.w $vr2, $a0 +; CHECK-NEXT: vmuh.wu $vr0, $vr0, $vr2 +; CHECK-NEXT: vsrli.w $vr0, $vr0, 1 +; CHECK-NEXT: vmuh.wu $vr1, $vr1, $vr2 +; CHECK-NEXT: vsrli.w $vr1, $vr1, 1 +; CHECK-NEXT: ret +entry: + %div = udiv <8 x i32> %shuffle, splat (i32 3) + ret <8 x i32> %div +} + +define <4 x i64> @test_i64(<4 x i64> %shuffle) { +; LA32-LABEL: test_i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vrepli.d $vr2, 3 +; LA32-NEXT: vdiv.du $vr0, $vr0, $vr2 +; LA32-NEXT: vdiv.du $vr1, $vr1, $vr2 +; LA32-NEXT: ret +; +; LA64-LABEL: test_i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: lu12i.w $a0, -349526 +; LA64-NEXT: ori $a0, $a0, 2731 +; LA64-NEXT: lu32i.d $a0, -349526 +; LA64-NEXT: lu52i.d $a0, $a0, -1366 +; LA64-NEXT: vreplgr2vr.d $vr2, $a0 +; LA64-NEXT: vmuh.du $vr0, $vr0, $vr2 +; LA64-NEXT: vsrli.d $vr0, $vr0, 1 +; LA64-NEXT: vmuh.du $vr1, $vr1, $vr2 +; LA64-NEXT: vsrli.d $vr1, $vr1, 1 +; LA64-NEXT: ret +entry: + %div = udiv <4 x i64> %shuffle, splat (i64 3) + ret <4 x i64> %div +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll b/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll index 8bdeebef13dd2..47b633e5f7895 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll @@ -182,6 +182,7 @@ define i2 @vmsk_sgt_v2i8(<2 x i8> %a, <2 x i8> %b) { ; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 ; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0 ; CHECK-NEXT: vslli.d $vr0, $vr0, 56 +; CHECK-NEXT: vsrai.d $vr0, $vr0, 56 ; CHECK-NEXT: vmskltz.d $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -197,6 +198,7 @@ define i2 @vmsk_sgt_v2i16(<2 x i16> %a, <2 x i16> %b) { ; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 ; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0 ; CHECK-NEXT: vslli.d $vr0, $vr0, 48 +; CHECK-NEXT: vsrai.d $vr0, $vr0, 48 ; CHECK-NEXT: vmskltz.d $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -211,6 +213,7 @@ define i2 @vmsk_sgt_v2i32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-NEXT: vslt.w $vr0, $vr1, $vr0 ; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 16 ; CHECK-NEXT: vslli.d $vr0, $vr0, 32 +; CHECK-NEXT: vsrai.d $vr0, $vr0, 32 ; CHECK-NEXT: vmskltz.d $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -250,6 +253,7 @@ define i4 @vmsk_sgt_v4i8(<4 x i8> %a, <4 x i8> %b) { ; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0 ; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 ; CHECK-NEXT: vslli.w $vr0, $vr0, 24 +; CHECK-NEXT: vsrai.w $vr0, $vr0, 24 ; CHECK-NEXT: vmskltz.w $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -264,6 +268,7 @@ define i4 @vmsk_sgt_v4i16(<4 x i16> %a, <4 x i16> %b) { ; CHECK-NEXT: vslt.h $vr0, $vr1, $vr0 ; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 ; CHECK-NEXT: vslli.w $vr0, $vr0, 16 +; CHECK-NEXT: vsrai.w $vr0, $vr0, 16 ; CHECK-NEXT: vmskltz.w $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -302,6 +307,7 @@ define i8 @vmsk_sgt_v8i8(<8 x i8> %a, <8 x i8> %b) { ; CHECK-NEXT: vslt.b $vr0, $vr1, $vr0 ; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0 ; CHECK-NEXT: vslli.h $vr0, $vr0, 8 +; CHECK-NEXT: vsrai.h $vr0, $vr0, 8 ; CHECK-NEXT: vmskltz.h $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -344,6 +350,7 @@ define i2 @vmsk_sgt_and_sgt_v2i8(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> ; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 ; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0 ; CHECK-NEXT: vslli.d $vr0, $vr0, 56 +; CHECK-NEXT: vsrai.d $vr0, $vr0, 56 ; CHECK-NEXT: vmskltz.d $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -363,6 +370,7 @@ define i2 @vmsk_sgt_and_sgt_v2i16(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x ; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 ; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0 ; CHECK-NEXT: vslli.d $vr0, $vr0, 48 +; CHECK-NEXT: vsrai.d $vr0, $vr0, 48 ; CHECK-NEXT: vmskltz.d $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -381,6 +389,7 @@ define i2 @vmsk_sgt_and_sgt_v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x ; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 ; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 16 ; CHECK-NEXT: vslli.d $vr0, $vr0, 32 +; CHECK-NEXT: vsrai.d $vr0, $vr0, 32 ; CHECK-NEXT: vmskltz.d $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -432,6 +441,7 @@ define i4 @vmsk_sgt_and_sgt_v4i8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> ; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0 ; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 ; CHECK-NEXT: vslli.w $vr0, $vr0, 24 +; CHECK-NEXT: vsrai.w $vr0, $vr0, 24 ; CHECK-NEXT: vmskltz.w $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -450,6 +460,7 @@ define i4 @vmsk_sgt_and_sgt_v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x ; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 ; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 ; CHECK-NEXT: vslli.w $vr0, $vr0, 16 +; CHECK-NEXT: vsrai.w $vr0, $vr0, 16 ; CHECK-NEXT: vmskltz.w $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -500,6 +511,7 @@ define i8 @vmsk_sgt_and_sgt_v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> ; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 ; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0 ; CHECK-NEXT: vslli.h $vr0, $vr0, 8 +; CHECK-NEXT: vsrai.h $vr0, $vr0, 8 ; CHECK-NEXT: vmskltz.h $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -597,6 +609,7 @@ define i4 @vmsk_eq_allzeros_v4i8(<4 x i8> %a) { ; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0 ; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 ; CHECK-NEXT: vslli.w $vr0, $vr0, 24 +; CHECK-NEXT: vsrai.w $vr0, $vr0, 24 ; CHECK-NEXT: vmskltz.w $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret From 9dbe7006892c524666fef45d7cd33a6f5d119378 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Mon, 8 Dec 2025 19:26:14 +0800 Subject: [PATCH 2/4] Address weining's comments --- .../LoongArch/LoongArchISelLowering.cpp | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 2356f551c119e..9c079fa1e932b 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -626,10 +626,10 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, return SDValue(); } -/// getVShiftImm - Check if this is a valid build_vector for the immediate +/// getVShiftAmt - Check if this is a valid build_vector for the immediate /// operand of a vector shift operation, where all the elements of the /// build_vector must have the same constant integer value. -static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { +static bool getVShiftAmt(SDValue Op, unsigned ElementBits, int64_t &Amt) { // Ignore bit_converts. while (Op.getOpcode() == ISD::BITCAST) Op = Op.getOperand(0); @@ -642,7 +642,7 @@ static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { ElementBits) || SplatBitSize > ElementBits) return false; - Cnt = SplatBits.getSExtValue(); + Amt = SplatBits.getSExtValue(); return true; } @@ -651,7 +651,7 @@ LoongArchTargetLowering::lowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc DL(Op); - int64_t Cnt; + int64_t Amt; if (!Op.getOperand(1).getValueType().isVector()) return Op; @@ -660,20 +660,20 @@ LoongArchTargetLowering::lowerVectorSRA_SRL_SHL(SDValue Op, switch (Op.getOpcode()) { case ISD::SHL: - if (getVShiftImm(Op.getOperand(1), EltSize, Cnt) && Cnt >= 0 && - Cnt < EltSize) + if (getVShiftAmt(Op.getOperand(1), EltSize, Amt) && Amt >= 0 && + Amt < EltSize) return DAG.getNode(LoongArchISD::VSLLI, DL, VT, Op.getOperand(0), - DAG.getConstant(Cnt, DL, GRLenVT)); + DAG.getConstant(Amt, DL, GRLenVT)); return DAG.getNode(LoongArchISD::VSLL, DL, VT, Op.getOperand(0), Op.getOperand(1)); case ISD::SRA: case ISD::SRL: - if (getVShiftImm(Op.getOperand(1), EltSize, Cnt) && Cnt >= 0 && - Cnt < EltSize) { + if (getVShiftAmt(Op.getOperand(1), EltSize, Amt) && Amt >= 0 && + Amt < EltSize) { unsigned Opc = (Op.getOpcode() == ISD::SRA) ? LoongArchISD::VSRAI : LoongArchISD::VSRLI; return DAG.getNode(Opc, DL, VT, Op.getOperand(0), - DAG.getConstant(Cnt, DL, GRLenVT)); + DAG.getConstant(Amt, DL, GRLenVT)); } unsigned Opc = (Op.getOpcode() == ISD::SRA) ? LoongArchISD::VSRA : LoongArchISD::VSRL; From c8b91459f061590c0292a58f52a9e91136b5a985 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Tue, 9 Dec 2025 11:51:23 +0800 Subject: [PATCH 3/4] Address zhaoqi's comments --- .../LoongArch/LoongArchISelLowering.cpp | 32 ++--- .../Target/LoongArch/LoongArchISelLowering.h | 2 +- .../LoongArch/LoongArchLASXInstrInfo.td | 49 +++----- .../Target/LoongArch/LoongArchLSXInstrInfo.td | 112 +++++++++--------- .../test/CodeGen/LoongArch/lasx/bitreverse.ll | 2 + .../LoongArch/lasx/ir-instruction/avg.ll | 27 ++--- .../CodeGen/LoongArch/lasx/issue170976.ll | 74 ++++++++++++ llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll | 6 +- .../LoongArch/lsx/ir-instruction/avg.ll | 27 +++-- llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll | 13 -- 10 files changed, 192 insertions(+), 152 deletions(-) create mode 100644 llvm/test/CodeGen/LoongArch/lasx/issue170976.ll diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 9c079fa1e932b..a75c2e67c5111 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -338,7 +338,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM}, VT, Legal); setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); - setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Custom); + setOperationAction({ISD::SHL, ISD::SRA}, VT, Legal); setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); setCondCodeAction( @@ -354,6 +354,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::USUBSAT, VT, Legal); setOperationAction(ISD::ROTL, VT, Custom); setOperationAction(ISD::ROTR, VT, Custom); + setOperationAction(ISD::SRL, VT, Custom); } for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) setOperationAction(ISD::BITREVERSE, VT, Custom); @@ -427,7 +428,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM}, VT, Legal); setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); - setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); + setOperationAction({ISD::SHL, ISD::SRA}, VT, Legal); setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); setCondCodeAction( @@ -444,6 +445,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); setOperationAction(ISD::ROTL, VT, Custom); setOperationAction(ISD::ROTR, VT, Custom); + setOperationAction(ISD::SRL, VT, Custom); } for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32}) setOperationAction(ISD::BITREVERSE, VT, Custom); @@ -618,10 +620,8 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, return lowerVECREDUCE(Op, DAG); case ISD::ConstantFP: return lowerConstantFP(Op, DAG); - case ISD::SRA: case ISD::SRL: - case ISD::SHL: - return lowerVectorSRA_SRL_SHL(Op, DAG); + return lowerVectorSRL(Op, DAG); } return SDValue(); } @@ -646,8 +646,7 @@ static bool getVShiftAmt(SDValue Op, unsigned ElementBits, int64_t &Amt) { return true; } -SDValue -LoongArchTargetLowering::lowerVectorSRA_SRL_SHL(SDValue Op, +SDValue LoongArchTargetLowering::lowerVectorSRL(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc DL(Op); @@ -658,26 +657,13 @@ LoongArchTargetLowering::lowerVectorSRA_SRL_SHL(SDValue Op, unsigned EltSize = VT.getScalarSizeInBits(); MVT GRLenVT = Subtarget.getGRLenVT(); - switch (Op.getOpcode()) { - case ISD::SHL: + if (Op.getOpcode() == ISD::SRL) { if (getVShiftAmt(Op.getOperand(1), EltSize, Amt) && Amt >= 0 && Amt < EltSize) - return DAG.getNode(LoongArchISD::VSLLI, DL, VT, Op.getOperand(0), + return DAG.getNode(LoongArchISD::VSRLI, DL, VT, Op.getOperand(0), DAG.getConstant(Amt, DL, GRLenVT)); - return DAG.getNode(LoongArchISD::VSLL, DL, VT, Op.getOperand(0), + return DAG.getNode(LoongArchISD::VSRL, DL, VT, Op.getOperand(0), Op.getOperand(1)); - case ISD::SRA: - case ISD::SRL: - if (getVShiftAmt(Op.getOperand(1), EltSize, Amt) && Amt >= 0 && - Amt < EltSize) { - unsigned Opc = (Op.getOpcode() == ISD::SRA) ? LoongArchISD::VSRAI - : LoongArchISD::VSRLI; - return DAG.getNode(Opc, DL, VT, Op.getOperand(0), - DAG.getConstant(Amt, DL, GRLenVT)); - } - unsigned Opc = - (Op.getOpcode() == ISD::SRA) ? LoongArchISD::VSRA : LoongArchISD::VSRL; - return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1)); } llvm_unreachable("unexpected shift opcode"); diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 6ad14ea9d6951..84622c30c0999 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -240,7 +240,7 @@ class LoongArchTargetLowering : public TargetLowering { SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerRotate(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVectorSRL(SDValue Op, SelectionDAG &DAG) const; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index 6bb74e76fabc6..5896ca3f5a980 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -1332,18 +1332,6 @@ multiclass PairInsertExtractPatV4 { } } -multiclass XVAvgPat { - def : Pat<(OpNode (vt (add vt:$vj, vt:$vk)), (vt (vsplat_imm_eq_1))), - (!cast(Inst) vt:$vj, vt:$vk)>; -} - -multiclass XVAvgrPat { - def : Pat<(OpNode (vt (add (vt (add vt:$vj, vt:$vk)), - (vt (vsplat_imm_eq_1)))), - (vt (vsplat_imm_eq_1))), - (!cast(Inst) vt:$vj, vt:$vk)>; -} - let Predicates = [HasExtLASX] in { // XVADD_{B/H/W/D} @@ -1449,9 +1437,8 @@ defm : PatShiftXrSplatUimm; defm : PatShiftXrUimm; // XVSRL[I]_{B/H/W/D} -defm : PatXrXr; -defm : PatShiftXrXr; -defm : PatShiftXrSplatUimm; +defm : PatXrXr; +defm : PatShiftXrXr; defm : PatShiftXrUimm; // XVSRA[I]_{B/H/W/D} @@ -2053,22 +2040,22 @@ def : Pat<(v4i32(fp_to_uint v4f64:$vj)), sub_128)>; // XVAVG_{B/H/W/D/BU/HU/WU/DU}, XVAVGR_{B/H/W/D/BU/HU/WU/DU} -defm : XVAvgPat; -defm : XVAvgPat; -defm : XVAvgPat; -defm : XVAvgPat; -defm : XVAvgPat; -defm : XVAvgPat; -defm : XVAvgPat; -defm : XVAvgPat; -defm : XVAvgrPat; -defm : XVAvgrPat; -defm : XVAvgrPat; -defm : XVAvgrPat; -defm : XVAvgrPat; -defm : XVAvgrPat; -defm : XVAvgrPat; -defm : XVAvgrPat; +defm : VAvgPat; +defm : VAvgPat; +defm : VAvgPat; +defm : VAvgPat; +defm : VAvgIPat; +defm : VAvgIPat; +defm : VAvgIPat; +defm : VAvgIPat; +defm : VAvgrPat; +defm : VAvgrPat; +defm : VAvgrPat; +defm : VAvgrPat; +defm : VAvgrIPat; +defm : VAvgrIPat; +defm : VAvgrIPat; +defm : VAvgrIPat; // abs def : Pat<(abs v32i8:$xj), (XVSIGNCOV_B v32i8:$xj, v32i8:$xj)>; diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index 933975a05878b..96bf8a2db835d 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -72,20 +72,13 @@ def loongarch_vreplgr2vr: SDNode<"LoongArchISD::VREPLGR2VR", SDT_LoongArchVreplg def loongarch_vfrecipe: SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchVFRECIPE>; def loongarch_vfrsqrte: SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchVFRSQRTE>; -// Vector logicial left / right shift -def loongarch_vsll : SDNode<"LoongArchISD::VSLL", SDT_LoongArchV2R>; +// Vector logicial right shift def loongarch_vsrl : SDNode<"LoongArchISD::VSRL", SDT_LoongArchV2R>; -// Vector arithmetic right shift -def loongarch_vsra : SDNode<"LoongArchISD::VSRA", SDT_LoongArchV2R>; - // Vector logicial left / right shift by immediate def loongarch_vslli : SDNode<"LoongArchISD::VSLLI", SDT_LoongArchV1RUimm>; def loongarch_vsrli : SDNode<"LoongArchISD::VSRLI", SDT_LoongArchV1RUimm>; -// Vector arithmetic right shift by immediate -def loongarch_vsrai : SDNode<"LoongArchISD::VSRAI", SDT_LoongArchV1RUimm>; - // Vector byte logicial left / right shift def loongarch_vbsll : SDNode<"LoongArchISD::VBSLL", SDT_LoongArchV1RUimm>; def loongarch_vbsrl : SDNode<"LoongArchISD::VBSRL", SDT_LoongArchV1RUimm>; @@ -1541,11 +1534,23 @@ multiclass InsertExtractPatV2 { } multiclass VAvgPat { + def : Pat<(OpNode (vt (add vt:$vj, vt:$vk)), (vt (vsplat_imm_eq_1))), + (!cast(Inst) vt:$vj, vt:$vk)>; +} + +multiclass VAvgIPat { def : Pat<(OpNode (vt (add vt:$vj, vt:$vk)), (GRLenVT 1)), (!cast(Inst) vt:$vj, vt:$vk)>; } multiclass VAvgrPat { + def : Pat<(OpNode (vt (add (vt (add vt:$vj, vt:$vk)), + (vt (vsplat_imm_eq_1)))), + (vt (vsplat_imm_eq_1))), + (!cast(Inst) vt:$vj, vt:$vk)>; +} + +multiclass VAvgrIPat { def : Pat<(OpNode (vt (add (vt (add vt:$vj, vt:$vk)), (vt (vsplat_imm_eq_1)))), (GRLenVT 1)), @@ -1651,8 +1656,9 @@ foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, (VBSRL_V LSX128:$vj, uimm5:$imm)>; // VSLL[I]_{B/H/W/D} -defm : PatVrVr; -defm : PatShiftVrVr; +defm : PatVrVr; +defm : PatShiftVrVr; +defm : PatShiftVrSplatUimm; defm : PatShiftVrUimm; // VSRL[I]_{B/H/W/D} @@ -1661,9 +1667,9 @@ defm : PatShiftVrVr; defm : PatShiftVrUimm; // VSRA[I]_{B/H/W/D} -defm : PatVrVr; -defm : PatShiftVrVr; -defm : PatShiftVrUimm; +defm : PatVrVr; +defm : PatShiftVrVr; +defm : PatShiftVrSplatUimm; // VROTR[I]_{B/H/W/D} defm : PatVrVr; @@ -1677,24 +1683,24 @@ defm : PatVr; defm : PatVr; // VBITCLR_{B/H/W/D} -def : Pat<(loongarch_vandn (v16i8 (loongarch_vsll vsplat_imm_eq_1, v16i8:$vk)), v16i8:$vj), +def : Pat<(loongarch_vandn (v16i8 (shl vsplat_imm_eq_1, v16i8:$vk)), v16i8:$vj), (v16i8 (VBITCLR_B v16i8:$vj, v16i8:$vk))>; -def : Pat<(loongarch_vandn (v8i16 (loongarch_vsll vsplat_imm_eq_1, v8i16:$vk)), v8i16:$vj), +def : Pat<(loongarch_vandn (v8i16 (shl vsplat_imm_eq_1, v8i16:$vk)), v8i16:$vj), (v8i16 (VBITCLR_H v8i16:$vj, v8i16:$vk))>; -def : Pat<(loongarch_vandn (v4i32 (loongarch_vsll vsplat_imm_eq_1, v4i32:$vk)), v4i32:$vj), +def : Pat<(loongarch_vandn (v4i32 (shl vsplat_imm_eq_1, v4i32:$vk)), v4i32:$vj), (v4i32 (VBITCLR_W v4i32:$vj, v4i32:$vk))>; -def : Pat<(loongarch_vandn (v2i64 (loongarch_vsll vsplat_imm_eq_1, v2i64:$vk)), v2i64:$vj), +def : Pat<(loongarch_vandn (v2i64 (shl vsplat_imm_eq_1, v2i64:$vk)), v2i64:$vj), (v2i64 (VBITCLR_D v2i64:$vj, v2i64:$vk))>; -def : Pat<(loongarch_vandn (v16i8 (loongarch_vsll vsplat_imm_eq_1, +def : Pat<(loongarch_vandn (v16i8 (shl vsplat_imm_eq_1, (vsplati8imm7 v16i8:$vk))), v16i8:$vj), (v16i8 (VBITCLR_B v16i8:$vj, v16i8:$vk))>; -def : Pat<(loongarch_vandn (v8i16 (loongarch_vsll vsplat_imm_eq_1, +def : Pat<(loongarch_vandn (v8i16 (shl vsplat_imm_eq_1, (vsplati16imm15 v8i16:$vk))), v8i16:$vj), (v8i16 (VBITCLR_H v8i16:$vj, v8i16:$vk))>; -def : Pat<(loongarch_vandn (v4i32 (loongarch_vsll vsplat_imm_eq_1, +def : Pat<(loongarch_vandn (v4i32 (shl vsplat_imm_eq_1, (vsplati32imm31 v4i32:$vk))), v4i32:$vj), (v4i32 (VBITCLR_W v4i32:$vj, v4i32:$vk))>; -def : Pat<(loongarch_vandn (v2i64 (loongarch_vsll vsplat_imm_eq_1, +def : Pat<(loongarch_vandn (v2i64 (shl vsplat_imm_eq_1, (vsplati64imm63 v2i64:$vk))), v2i64:$vj), (v2i64 (VBITCLR_D v2i64:$vj, v2i64:$vk))>; @@ -1709,21 +1715,21 @@ def : Pat<(and (v2i64 LSX128:$vj), (v2i64 (vsplat_uimm_inv_pow2 uimm6:$imm))), (VBITCLRI_D LSX128:$vj, uimm6:$imm)>; // VBITSET_{B/H/W/D} -def : Pat<(or v16i8:$vj, (loongarch_vsll vsplat_imm_eq_1, v16i8:$vk)), +def : Pat<(or v16i8:$vj, (shl vsplat_imm_eq_1, v16i8:$vk)), (v16i8 (VBITSET_B v16i8:$vj, v16i8:$vk))>; -def : Pat<(or v8i16:$vj, (loongarch_vsll vsplat_imm_eq_1, v8i16:$vk)), +def : Pat<(or v8i16:$vj, (shl vsplat_imm_eq_1, v8i16:$vk)), (v8i16 (VBITSET_H v8i16:$vj, v8i16:$vk))>; -def : Pat<(or v4i32:$vj, (loongarch_vsll vsplat_imm_eq_1, v4i32:$vk)), +def : Pat<(or v4i32:$vj, (shl vsplat_imm_eq_1, v4i32:$vk)), (v4i32 (VBITSET_W v4i32:$vj, v4i32:$vk))>; -def : Pat<(or v2i64:$vj, (loongarch_vsll vsplat_imm_eq_1, v2i64:$vk)), +def : Pat<(or v2i64:$vj, (shl vsplat_imm_eq_1, v2i64:$vk)), (v2i64 (VBITSET_D v2i64:$vj, v2i64:$vk))>; -def : Pat<(or v16i8:$vj, (loongarch_vsll vsplat_imm_eq_1, (vsplati8imm7 v16i8:$vk))), +def : Pat<(or v16i8:$vj, (shl vsplat_imm_eq_1, (vsplati8imm7 v16i8:$vk))), (v16i8 (VBITSET_B v16i8:$vj, v16i8:$vk))>; -def : Pat<(or v8i16:$vj, (loongarch_vsll vsplat_imm_eq_1, (vsplati16imm15 v8i16:$vk))), +def : Pat<(or v8i16:$vj, (shl vsplat_imm_eq_1, (vsplati16imm15 v8i16:$vk))), (v8i16 (VBITSET_H v8i16:$vj, v8i16:$vk))>; -def : Pat<(or v4i32:$vj, (loongarch_vsll vsplat_imm_eq_1, (vsplati32imm31 v4i32:$vk))), +def : Pat<(or v4i32:$vj, (shl vsplat_imm_eq_1, (vsplati32imm31 v4i32:$vk))), (v4i32 (VBITSET_W v4i32:$vj, v4i32:$vk))>; -def : Pat<(or v2i64:$vj, (loongarch_vsll vsplat_imm_eq_1, (vsplati64imm63 v2i64:$vk))), +def : Pat<(or v2i64:$vj, (shl vsplat_imm_eq_1, (vsplati64imm63 v2i64:$vk))), (v2i64 (VBITSET_D v2i64:$vj, v2i64:$vk))>; // VBITSETI_{B/H/W/D} @@ -1737,21 +1743,21 @@ def : Pat<(or (v2i64 LSX128:$vj), (v2i64 (vsplat_uimm_pow2 uimm6:$imm))), (VBITSETI_D LSX128:$vj, uimm6:$imm)>; // VBITREV_{B/H/W/D} -def : Pat<(xor v16i8:$vj, (loongarch_vsll vsplat_imm_eq_1, v16i8:$vk)), +def : Pat<(xor v16i8:$vj, (shl vsplat_imm_eq_1, v16i8:$vk)), (v16i8 (VBITREV_B v16i8:$vj, v16i8:$vk))>; -def : Pat<(xor v8i16:$vj, (loongarch_vsll vsplat_imm_eq_1, v8i16:$vk)), +def : Pat<(xor v8i16:$vj, (shl vsplat_imm_eq_1, v8i16:$vk)), (v8i16 (VBITREV_H v8i16:$vj, v8i16:$vk))>; -def : Pat<(xor v4i32:$vj, (loongarch_vsll vsplat_imm_eq_1, v4i32:$vk)), +def : Pat<(xor v4i32:$vj, (shl vsplat_imm_eq_1, v4i32:$vk)), (v4i32 (VBITREV_W v4i32:$vj, v4i32:$vk))>; -def : Pat<(xor v2i64:$vj, (loongarch_vsll vsplat_imm_eq_1, v2i64:$vk)), +def : Pat<(xor v2i64:$vj, (shl vsplat_imm_eq_1, v2i64:$vk)), (v2i64 (VBITREV_D v2i64:$vj, v2i64:$vk))>; -def : Pat<(xor v16i8:$vj, (loongarch_vsll vsplat_imm_eq_1, (vsplati8imm7 v16i8:$vk))), +def : Pat<(xor v16i8:$vj, (shl vsplat_imm_eq_1, (vsplati8imm7 v16i8:$vk))), (v16i8 (VBITREV_B v16i8:$vj, v16i8:$vk))>; -def : Pat<(xor v8i16:$vj, (loongarch_vsll vsplat_imm_eq_1, (vsplati16imm15 v8i16:$vk))), +def : Pat<(xor v8i16:$vj, (shl vsplat_imm_eq_1, (vsplati16imm15 v8i16:$vk))), (v8i16 (VBITREV_H v8i16:$vj, v8i16:$vk))>; -def : Pat<(xor v4i32:$vj, (loongarch_vsll vsplat_imm_eq_1, (vsplati32imm31 v4i32:$vk))), +def : Pat<(xor v4i32:$vj, (shl vsplat_imm_eq_1, (vsplati32imm31 v4i32:$vk))), (v4i32 (VBITREV_W v4i32:$vj, v4i32:$vk))>; -def : Pat<(xor v2i64:$vj, (loongarch_vsll vsplat_imm_eq_1, (vsplati64imm63 v2i64:$vk))), +def : Pat<(xor v2i64:$vj, (shl vsplat_imm_eq_1, (vsplati64imm63 v2i64:$vk))), (v2i64 (VBITREV_D v2i64:$vj, v2i64:$vk))>; // VBITREVI_{B/H/W/D} @@ -2199,22 +2205,22 @@ def : Pat<(f64 f64imm_vldi:$in), (f64 (EXTRACT_SUBREG (VLDI (to_f64imm_vldi f64imm_vldi:$in)), sub_64))>; // VAVG_{B/H/W/D/BU/HU/WU/DU}, VAVGR_{B/H/W/D/BU/HU/WU/DU} -defm : VAvgPat; -defm : VAvgPat; -defm : VAvgPat; -defm : VAvgPat; -defm : VAvgPat; -defm : VAvgPat; -defm : VAvgPat; -defm : VAvgPat; -defm : VAvgrPat; -defm : VAvgrPat; -defm : VAvgrPat; -defm : VAvgrPat; -defm : VAvgrPat; -defm : VAvgrPat; -defm : VAvgrPat; -defm : VAvgrPat; +defm : VAvgPat; +defm : VAvgPat; +defm : VAvgPat; +defm : VAvgPat; +defm : VAvgIPat; +defm : VAvgIPat; +defm : VAvgIPat; +defm : VAvgIPat; +defm : VAvgrPat; +defm : VAvgrPat; +defm : VAvgrPat; +defm : VAvgrPat; +defm : VAvgrIPat; +defm : VAvgrIPat; +defm : VAvgrIPat; +defm : VAvgrIPat; // abs def : Pat<(abs v16i8:$vj), (VSIGNCOV_B v16i8:$vj, v16i8:$vj)>; diff --git a/llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll b/llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll index 8b12216d0f856..7f663d8de3cb8 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll @@ -11,6 +11,7 @@ define <32 x i8> @test_bitreverse_v32i8(<32 x i8> %a) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: xvslli.b $xr1, $xr0, 4 ; LA32-NEXT: xvsrli.b $xr0, $xr0, 4 +; LA32-NEXT: xvandi.b $xr0, $xr0, 15 ; LA32-NEXT: xvor.v $xr0, $xr0, $xr1 ; LA32-NEXT: xvandi.b $xr1, $xr0, 51 ; LA32-NEXT: xvslli.b $xr1, $xr1, 2 @@ -163,6 +164,7 @@ define <4 x i64> @test_bitreverse_v4i64(<4 x i64> %a) nounwind { ; LA32-NEXT: xvshuf.b $xr0, $xr0, $xr0, $xr1 ; LA32-NEXT: xvslli.b $xr1, $xr0, 4 ; LA32-NEXT: xvsrli.b $xr0, $xr0, 4 +; LA32-NEXT: xvandi.b $xr0, $xr0, 15 ; LA32-NEXT: xvor.v $xr0, $xr0, $xr1 ; LA32-NEXT: xvandi.b $xr1, $xr0, 51 ; LA32-NEXT: xvslli.b $xr1, $xr1, 2 diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll index 5c5c19935080b..0577a116bee5a 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll @@ -131,22 +131,13 @@ entry: } define void @xvavg_du(ptr %res, ptr %a, ptr %b) nounwind { -; LA32-LABEL: xvavg_du: -; LA32: # %bb.0: # %entry -; LA32-NEXT: xvld $xr0, $a1, 0 -; LA32-NEXT: xvld $xr1, $a2, 0 -; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1 -; LA32-NEXT: xvsrli.d $xr0, $xr0, 1 -; LA32-NEXT: xvst $xr0, $a0, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: xvavg_du: -; LA64: # %bb.0: # %entry -; LA64-NEXT: xvld $xr0, $a1, 0 -; LA64-NEXT: xvld $xr1, $a2, 0 -; LA64-NEXT: xvavg.du $xr0, $xr0, $xr1 -; LA64-NEXT: xvst $xr0, $a0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: xvavg_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvavg.du $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret entry: %va = load <4 x i64>, ptr %a %vb = load <4 x i64>, ptr %b @@ -298,8 +289,8 @@ define void @xvavgr_du(ptr %res, ptr %a, ptr %b) nounwind { ; LA32-NEXT: xvld $xr0, $a1, 0 ; LA32-NEXT: xvld $xr1, $a2, 0 ; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1 -; LA32-NEXT: xvaddi.du $xr0, $xr0, 1 -; LA32-NEXT: xvsrli.d $xr0, $xr0, 1 +; LA32-NEXT: xvrepli.d $xr1, 1 +; LA32-NEXT: xvavg.du $xr0, $xr0, $xr1 ; LA32-NEXT: xvst $xr0, $a0, 0 ; LA32-NEXT: ret ; diff --git a/llvm/test/CodeGen/LoongArch/lasx/issue170976.ll b/llvm/test/CodeGen/LoongArch/lasx/issue170976.ll new file mode 100644 index 0000000000000..9b17d7b8c9767 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/issue170976.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 + +define <64 x i8> @test_i8(<64 x i8> %shuffle) { +; CHECK-LABEL: test_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrepli.b $xr2, -85 +; CHECK-NEXT: xvmuh.bu $xr0, $xr0, $xr2 +; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1 +; CHECK-NEXT: xvmuh.bu $xr1, $xr1, $xr2 +; CHECK-NEXT: xvsrli.b $xr1, $xr1, 1 +; CHECK-NEXT: ret +entry: + %div = udiv <64 x i8> %shuffle, splat (i8 3) + ret <64 x i8> %div +} + +define <32 x i16> @test_i16(<32 x i16> %shuffle) { +; CHECK-LABEL: test_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu12i.w $a0, 10 +; CHECK-NEXT: ori $a0, $a0, 2731 +; CHECK-NEXT: xvreplgr2vr.h $xr2, $a0 +; CHECK-NEXT: xvmuh.hu $xr0, $xr0, $xr2 +; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1 +; CHECK-NEXT: xvmuh.hu $xr1, $xr1, $xr2 +; CHECK-NEXT: xvsrli.h $xr1, $xr1, 1 +; CHECK-NEXT: ret +entry: + %div = udiv <32 x i16> %shuffle, splat (i16 3) + ret <32 x i16> %div +} + +define <16 x i32> @test_i32(<16 x i32> %shuffle) { +; CHECK-LABEL: test_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu12i.w $a0, -349526 +; CHECK-NEXT: ori $a0, $a0, 2731 +; CHECK-NEXT: xvreplgr2vr.w $xr2, $a0 +; CHECK-NEXT: xvmuh.wu $xr0, $xr0, $xr2 +; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1 +; CHECK-NEXT: xvmuh.wu $xr1, $xr1, $xr2 +; CHECK-NEXT: xvsrli.w $xr1, $xr1, 1 +; CHECK-NEXT: ret +entry: + %div = udiv <16 x i32> %shuffle, splat (i32 3) + ret <16 x i32> %div +} + +define <8 x i64> @test_i64(<8 x i64> %shuffle) { +; LA32-LABEL: test_i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvrepli.d $xr2, 3 +; LA32-NEXT: xvdiv.du $xr0, $xr0, $xr2 +; LA32-NEXT: xvdiv.du $xr1, $xr1, $xr2 +; LA32-NEXT: ret +; +; LA64-LABEL: test_i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: lu12i.w $a0, -349526 +; LA64-NEXT: ori $a0, $a0, 2731 +; LA64-NEXT: lu32i.d $a0, -349526 +; LA64-NEXT: lu52i.d $a0, $a0, -1366 +; LA64-NEXT: xvreplgr2vr.d $xr2, $a0 +; LA64-NEXT: xvmuh.du $xr0, $xr0, $xr2 +; LA64-NEXT: xvsrli.d $xr0, $xr0, 1 +; LA64-NEXT: xvmuh.du $xr1, $xr1, $xr2 +; LA64-NEXT: xvsrli.d $xr1, $xr1, 1 +; LA64-NEXT: ret +entry: + %div = udiv <8 x i64> %shuffle, splat (i64 3) + ret <8 x i64> %div +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll b/llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll index b043e90d302a6..ba84e5c136de3 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll @@ -9,8 +9,7 @@ declare <16 x i8> @llvm.bitreverse.v16i8(<16 x i8>) define <16 x i8> @test_bitreverse_v16i8(<16 x i8> %a) nounwind { ; LA32-LABEL: test_bitreverse_v16i8: ; LA32: # %bb.0: -; LA32-NEXT: vandi.b $vr1, $vr0, 15 -; LA32-NEXT: vslli.b $vr1, $vr1, 4 +; LA32-NEXT: vslli.b $vr1, $vr0, 4 ; LA32-NEXT: vsrli.b $vr0, $vr0, 4 ; LA32-NEXT: vandi.b $vr0, $vr0, 15 ; LA32-NEXT: vor.v $vr0, $vr0, $vr1 @@ -116,8 +115,7 @@ define <2 x i64> @test_bitreverse_v2i64(<2 x i64> %a) nounwind { ; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) ; LA32-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI3_0) ; LA32-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1 -; LA32-NEXT: vandi.b $vr1, $vr0, 15 -; LA32-NEXT: vslli.b $vr1, $vr1, 4 +; LA32-NEXT: vslli.b $vr1, $vr0, 4 ; LA32-NEXT: vsrli.b $vr0, $vr0, 4 ; LA32-NEXT: vandi.b $vr0, $vr0, 15 ; LA32-NEXT: vor.v $vr0, $vr0, $vr1 diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll index 1de393965c7a0..8e700689fdc58 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll @@ -54,13 +54,22 @@ entry: } define void @vavg_d(ptr %res, ptr %a, ptr %b) nounwind { -; CHECK-LABEL: vavg_d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vavg.d $vr0, $vr0, $vr1 -; CHECK-NEXT: vst $vr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: vavg_d: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vld $vr0, $a1, 0 +; LA32-NEXT: vld $vr1, $a2, 0 +; LA32-NEXT: vadd.d $vr0, $vr0, $vr1 +; LA32-NEXT: vsrai.d $vr0, $vr0, 1 +; LA32-NEXT: vst $vr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: vavg_d: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vld $vr0, $a1, 0 +; LA64-NEXT: vld $vr1, $a2, 0 +; LA64-NEXT: vavg.d $vr0, $vr0, $vr1 +; LA64-NEXT: vst $vr0, $a0, 0 +; LA64-NEXT: ret entry: %va = load <2 x i64>, ptr %a %vb = load <2 x i64>, ptr %b @@ -198,8 +207,8 @@ define void @vavgr_d(ptr %res, ptr %a, ptr %b) nounwind { ; LA32-NEXT: vld $vr0, $a1, 0 ; LA32-NEXT: vld $vr1, $a2, 0 ; LA32-NEXT: vadd.d $vr0, $vr0, $vr1 -; LA32-NEXT: vrepli.d $vr1, 1 -; LA32-NEXT: vavg.d $vr0, $vr0, $vr1 +; LA32-NEXT: vaddi.du $vr0, $vr0, 1 +; LA32-NEXT: vsrai.d $vr0, $vr0, 1 ; LA32-NEXT: vst $vr0, $a0, 0 ; LA32-NEXT: ret ; diff --git a/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll b/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll index 47b633e5f7895..8bdeebef13dd2 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll @@ -182,7 +182,6 @@ define i2 @vmsk_sgt_v2i8(<2 x i8> %a, <2 x i8> %b) { ; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 ; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0 ; CHECK-NEXT: vslli.d $vr0, $vr0, 56 -; CHECK-NEXT: vsrai.d $vr0, $vr0, 56 ; CHECK-NEXT: vmskltz.d $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -198,7 +197,6 @@ define i2 @vmsk_sgt_v2i16(<2 x i16> %a, <2 x i16> %b) { ; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 ; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0 ; CHECK-NEXT: vslli.d $vr0, $vr0, 48 -; CHECK-NEXT: vsrai.d $vr0, $vr0, 48 ; CHECK-NEXT: vmskltz.d $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -213,7 +211,6 @@ define i2 @vmsk_sgt_v2i32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-NEXT: vslt.w $vr0, $vr1, $vr0 ; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 16 ; CHECK-NEXT: vslli.d $vr0, $vr0, 32 -; CHECK-NEXT: vsrai.d $vr0, $vr0, 32 ; CHECK-NEXT: vmskltz.d $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -253,7 +250,6 @@ define i4 @vmsk_sgt_v4i8(<4 x i8> %a, <4 x i8> %b) { ; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0 ; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 ; CHECK-NEXT: vslli.w $vr0, $vr0, 24 -; CHECK-NEXT: vsrai.w $vr0, $vr0, 24 ; CHECK-NEXT: vmskltz.w $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -268,7 +264,6 @@ define i4 @vmsk_sgt_v4i16(<4 x i16> %a, <4 x i16> %b) { ; CHECK-NEXT: vslt.h $vr0, $vr1, $vr0 ; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 ; CHECK-NEXT: vslli.w $vr0, $vr0, 16 -; CHECK-NEXT: vsrai.w $vr0, $vr0, 16 ; CHECK-NEXT: vmskltz.w $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -307,7 +302,6 @@ define i8 @vmsk_sgt_v8i8(<8 x i8> %a, <8 x i8> %b) { ; CHECK-NEXT: vslt.b $vr0, $vr1, $vr0 ; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0 ; CHECK-NEXT: vslli.h $vr0, $vr0, 8 -; CHECK-NEXT: vsrai.h $vr0, $vr0, 8 ; CHECK-NEXT: vmskltz.h $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -350,7 +344,6 @@ define i2 @vmsk_sgt_and_sgt_v2i8(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> ; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 ; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0 ; CHECK-NEXT: vslli.d $vr0, $vr0, 56 -; CHECK-NEXT: vsrai.d $vr0, $vr0, 56 ; CHECK-NEXT: vmskltz.d $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -370,7 +363,6 @@ define i2 @vmsk_sgt_and_sgt_v2i16(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x ; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 ; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0 ; CHECK-NEXT: vslli.d $vr0, $vr0, 48 -; CHECK-NEXT: vsrai.d $vr0, $vr0, 48 ; CHECK-NEXT: vmskltz.d $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -389,7 +381,6 @@ define i2 @vmsk_sgt_and_sgt_v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x ; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 ; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 16 ; CHECK-NEXT: vslli.d $vr0, $vr0, 32 -; CHECK-NEXT: vsrai.d $vr0, $vr0, 32 ; CHECK-NEXT: vmskltz.d $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -441,7 +432,6 @@ define i4 @vmsk_sgt_and_sgt_v4i8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> ; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0 ; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 ; CHECK-NEXT: vslli.w $vr0, $vr0, 24 -; CHECK-NEXT: vsrai.w $vr0, $vr0, 24 ; CHECK-NEXT: vmskltz.w $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -460,7 +450,6 @@ define i4 @vmsk_sgt_and_sgt_v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x ; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 ; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 ; CHECK-NEXT: vslli.w $vr0, $vr0, 16 -; CHECK-NEXT: vsrai.w $vr0, $vr0, 16 ; CHECK-NEXT: vmskltz.w $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -511,7 +500,6 @@ define i8 @vmsk_sgt_and_sgt_v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> ; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 ; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0 ; CHECK-NEXT: vslli.h $vr0, $vr0, 8 -; CHECK-NEXT: vsrai.h $vr0, $vr0, 8 ; CHECK-NEXT: vmskltz.h $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -609,7 +597,6 @@ define i4 @vmsk_eq_allzeros_v4i8(<4 x i8> %a) { ; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0 ; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 ; CHECK-NEXT: vslli.w $vr0, $vr0, 24 -; CHECK-NEXT: vsrai.w $vr0, $vr0, 24 ; CHECK-NEXT: vmskltz.w $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret From b93929300f9ae2a482741eb4139aab85b5896b4c Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Wed, 10 Dec 2025 15:02:55 +0800 Subject: [PATCH 4/4] Address weining's comments --- .../Target/LoongArch/LoongArchISelLowering.cpp | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index a75c2e67c5111..4d232028133db 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -657,16 +657,12 @@ SDValue LoongArchTargetLowering::lowerVectorSRL(SDValue Op, unsigned EltSize = VT.getScalarSizeInBits(); MVT GRLenVT = Subtarget.getGRLenVT(); - if (Op.getOpcode() == ISD::SRL) { - if (getVShiftAmt(Op.getOperand(1), EltSize, Amt) && Amt >= 0 && - Amt < EltSize) - return DAG.getNode(LoongArchISD::VSRLI, DL, VT, Op.getOperand(0), - DAG.getConstant(Amt, DL, GRLenVT)); - return DAG.getNode(LoongArchISD::VSRL, DL, VT, Op.getOperand(0), - Op.getOperand(1)); - } - - llvm_unreachable("unexpected shift opcode"); + assert(Op.getOpcode() == ISD::SRL && "unexpected shift opcode"); + if (getVShiftAmt(Op.getOperand(1), EltSize, Amt) && Amt >= 0 && Amt < EltSize) + return DAG.getNode(LoongArchISD::VSRLI, DL, VT, Op.getOperand(0), + DAG.getConstant(Amt, DL, GRLenVT)); + return DAG.getNode(LoongArchISD::VSRL, DL, VT, Op.getOperand(0), + Op.getOperand(1)); } // Helper to attempt to return a cheaper, bit-inverted version of \p V.