From 2c1a5671989bf735407822be0972100e3d15d3d3 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Wed, 15 May 2024 10:06:13 +0800 Subject: [PATCH] [LoongArch] Select {DIV,MOD}.{W,WU} instruction to eliminate explicit sign extension --- .../LoongArch/LoongArchISelLowering.cpp | 13 +++ .../Target/LoongArch/LoongArchISelLowering.h | 4 + .../Target/LoongArch/LoongArchInstrInfo.td | 6 ++ .../ir-instruction/sdiv-udiv-srem-urem.ll | 96 +++++++------------ 4 files changed, 57 insertions(+), 62 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 21d520656091c..abce5fe9efa05 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -139,6 +139,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); setOperationAction(ISD::BSWAP, MVT::i32, Custom); + setOperationAction({ISD::UDIV, ISD::UREM}, MVT::i32, Custom); } // Set operations for LA32 only. @@ -1665,6 +1666,10 @@ static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) { switch (Opcode) { default: llvm_unreachable("Unexpected opcode"); + case ISD::UDIV: + return LoongArchISD::DIV_WU; + case ISD::UREM: + return LoongArchISD::MOD_WU; case ISD::SHL: return LoongArchISD::SLL_W; case ISD::SRA: @@ -1841,6 +1846,12 @@ void LoongArchTargetLowering::ReplaceNodeResults( switch (N->getOpcode()) { default: llvm_unreachable("Don't know how to legalize this operation"); + case ISD::UDIV: + case ISD::UREM: + assert(VT == MVT::i32 && Subtarget.is64Bit() && + "Unexpected custom legalisation"); + Results.push_back(customLegalizeToWOp(N, DAG, 2, ISD::SIGN_EXTEND)); + break; case ISD::SHL: case ISD::SRA: case ISD::SRL: @@ -3445,6 +3456,8 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(BITREV_W) NODE_NAME_CASE(ROTR_W) NODE_NAME_CASE(ROTL_W) + NODE_NAME_CASE(DIV_WU) + NODE_NAME_CASE(MOD_WU) NODE_NAME_CASE(CLZ_W) NODE_NAME_CASE(CTZ_W) NODE_NAME_CASE(DBAR) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 31b4d65195634..8a2473cdfa68c 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -43,6 +43,10 @@ enum NodeType : unsigned { ROTL_W, ROTR_W, + // unsigned 32-bit integer division + DIV_WU, + MOD_WU, + // FPR<->GPR transfer operations MOVGR2FR_W_LA64, MOVFR2GR_S_LA64, diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td index f56f8f7e1179c..35ea9f07866d5 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -85,6 +85,8 @@ def loongarch_sll_w : SDNode<"LoongArchISD::SLL_W", SDT_LoongArchIntBinOpW>; def loongarch_sra_w : SDNode<"LoongArchISD::SRA_W", SDT_LoongArchIntBinOpW>; def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>; def loongarch_rotr_w : SDNode<"LoongArchISD::ROTR_W", SDT_LoongArchIntBinOpW>; +def loongarch_div_wu : SDNode<"LoongArchISD::DIV_WU", SDT_LoongArchIntBinOpW>; +def loongarch_mod_wu : SDNode<"LoongArchISD::MOD_WU", SDT_LoongArchIntBinOpW>; def loongarch_crc_w_b_w : SDNode<"LoongArchISD::CRC_W_B_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>; def loongarch_crc_w_h_w @@ -1110,9 +1112,13 @@ def : PatGprImm_32; def : PatGprGpr; def : PatGprGpr_32; def : PatGprGpr; +def : PatGprGpr_32; def : PatGprGpr; +def : PatGprGpr; def : PatGprGpr; +def : PatGprGpr_32; def : PatGprGpr; +def : PatGprGpr; def : PatGprGpr; def : PatGprGpr; def : PatGprGpr_32; diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll index 2064c398948fe..ab3eec240db3c 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll @@ -191,8 +191,7 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) { ; LA64: # %bb.0: # %entry ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: addi.w $a0, $a0, 0 -; LA64-NEXT: div.d $a0, $a0, $a1 -; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: div.w $a0, $a0, $a1 ; LA64-NEXT: ret ; ; LA32-TRAP-LABEL: sdiv_si32_ui32_ui32: @@ -208,12 +207,11 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) { ; LA64-TRAP: # %bb.0: # %entry ; LA64-TRAP-NEXT: addi.w $a1, $a1, 0 ; LA64-TRAP-NEXT: addi.w $a0, $a0, 0 -; LA64-TRAP-NEXT: div.d $a0, $a0, $a1 +; LA64-TRAP-NEXT: div.w $a0, $a0, $a1 ; LA64-TRAP-NEXT: bnez $a1, .LBB5_2 ; LA64-TRAP-NEXT: # %bb.1: # %entry ; LA64-TRAP-NEXT: break 7 ; LA64-TRAP-NEXT: .LBB5_2: # %entry -; LA64-TRAP-NEXT: addi.w $a0, $a0, 0 ; LA64-TRAP-NEXT: ret entry: %r = sdiv i32 %a, %b @@ -228,8 +226,7 @@ define signext i32 @sdiv_si32_si32_si32(i32 signext %a, i32 signext %b) { ; ; LA64-LABEL: sdiv_si32_si32_si32: ; LA64: # %bb.0: # %entry -; LA64-NEXT: div.d $a0, $a0, $a1 -; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: div.w $a0, $a0, $a1 ; LA64-NEXT: ret ; ; LA32-TRAP-LABEL: sdiv_si32_si32_si32: @@ -243,12 +240,11 @@ define signext i32 @sdiv_si32_si32_si32(i32 signext %a, i32 signext %b) { ; ; LA64-TRAP-LABEL: sdiv_si32_si32_si32: ; LA64-TRAP: # %bb.0: # %entry -; LA64-TRAP-NEXT: div.d $a0, $a0, $a1 +; LA64-TRAP-NEXT: div.w $a0, $a0, $a1 ; LA64-TRAP-NEXT: bnez $a1, .LBB6_2 ; LA64-TRAP-NEXT: # %bb.1: # %entry ; LA64-TRAP-NEXT: break 7 ; LA64-TRAP-NEXT: .LBB6_2: # %entry -; LA64-TRAP-NEXT: addi.w $a0, $a0, 0 ; LA64-TRAP-NEXT: ret entry: %r = sdiv i32 %a, %b @@ -407,9 +403,9 @@ define i32 @udiv_i32(i32 %a, i32 %b) { ; ; LA64-LABEL: udiv_i32: ; LA64: # %bb.0: # %entry -; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-NEXT: div.du $a0, $a0, $a1 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: div.wu $a0, $a0, $a1 ; LA64-NEXT: ret ; ; LA32-TRAP-LABEL: udiv_i32: @@ -423,9 +419,9 @@ define i32 @udiv_i32(i32 %a, i32 %b) { ; ; LA64-TRAP-LABEL: udiv_i32: ; LA64-TRAP: # %bb.0: # %entry -; LA64-TRAP-NEXT: bstrpick.d $a1, $a1, 31, 0 -; LA64-TRAP-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-TRAP-NEXT: div.du $a0, $a0, $a1 +; LA64-TRAP-NEXT: addi.w $a1, $a1, 0 +; LA64-TRAP-NEXT: addi.w $a0, $a0, 0 +; LA64-TRAP-NEXT: div.wu $a0, $a0, $a1 ; LA64-TRAP-NEXT: bnez $a1, .LBB11_2 ; LA64-TRAP-NEXT: # %bb.1: # %entry ; LA64-TRAP-NEXT: break 7 @@ -444,9 +440,7 @@ define i32 @udiv_ui32_si32_si32(i32 signext %a, i32 signext %b) { ; ; LA64-LABEL: udiv_ui32_si32_si32: ; LA64: # %bb.0: # %entry -; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-NEXT: div.du $a0, $a0, $a1 +; LA64-NEXT: div.wu $a0, $a0, $a1 ; LA64-NEXT: ret ; ; LA32-TRAP-LABEL: udiv_ui32_si32_si32: @@ -460,9 +454,7 @@ define i32 @udiv_ui32_si32_si32(i32 signext %a, i32 signext %b) { ; ; LA64-TRAP-LABEL: udiv_ui32_si32_si32: ; LA64-TRAP: # %bb.0: # %entry -; LA64-TRAP-NEXT: bstrpick.d $a1, $a1, 31, 0 -; LA64-TRAP-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-TRAP-NEXT: div.du $a0, $a0, $a1 +; LA64-TRAP-NEXT: div.wu $a0, $a0, $a1 ; LA64-TRAP-NEXT: bnez $a1, .LBB12_2 ; LA64-TRAP-NEXT: # %bb.1: # %entry ; LA64-TRAP-NEXT: break 7 @@ -481,10 +473,9 @@ define signext i32 @udiv_si32_ui32_ui32(i32 %a, i32 %b) { ; ; LA64-LABEL: udiv_si32_ui32_ui32: ; LA64: # %bb.0: # %entry -; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-NEXT: div.du $a0, $a0, $a1 +; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: div.wu $a0, $a0, $a1 ; LA64-NEXT: ret ; ; LA32-TRAP-LABEL: udiv_si32_ui32_ui32: @@ -498,14 +489,13 @@ define signext i32 @udiv_si32_ui32_ui32(i32 %a, i32 %b) { ; ; LA64-TRAP-LABEL: udiv_si32_ui32_ui32: ; LA64-TRAP: # %bb.0: # %entry -; LA64-TRAP-NEXT: bstrpick.d $a1, $a1, 31, 0 -; LA64-TRAP-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-TRAP-NEXT: div.du $a0, $a0, $a1 +; LA64-TRAP-NEXT: addi.w $a1, $a1, 0 +; LA64-TRAP-NEXT: addi.w $a0, $a0, 0 +; LA64-TRAP-NEXT: div.wu $a0, $a0, $a1 ; LA64-TRAP-NEXT: bnez $a1, .LBB13_2 ; LA64-TRAP-NEXT: # %bb.1: # %entry ; LA64-TRAP-NEXT: break 7 ; LA64-TRAP-NEXT: .LBB13_2: # %entry -; LA64-TRAP-NEXT: addi.w $a0, $a0, 0 ; LA64-TRAP-NEXT: ret entry: %r = udiv i32 %a, %b @@ -520,10 +510,7 @@ define signext i32 @udiv_si32_si32_si32(i32 signext %a, i32 signext %b) { ; ; LA64-LABEL: udiv_si32_si32_si32: ; LA64: # %bb.0: # %entry -; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-NEXT: div.du $a0, $a0, $a1 -; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: div.wu $a0, $a0, $a1 ; LA64-NEXT: ret ; ; LA32-TRAP-LABEL: udiv_si32_si32_si32: @@ -537,14 +524,11 @@ define signext i32 @udiv_si32_si32_si32(i32 signext %a, i32 signext %b) { ; ; LA64-TRAP-LABEL: udiv_si32_si32_si32: ; LA64-TRAP: # %bb.0: # %entry -; LA64-TRAP-NEXT: bstrpick.d $a1, $a1, 31, 0 -; LA64-TRAP-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-TRAP-NEXT: div.du $a0, $a0, $a1 +; LA64-TRAP-NEXT: div.wu $a0, $a0, $a1 ; LA64-TRAP-NEXT: bnez $a1, .LBB14_2 ; LA64-TRAP-NEXT: # %bb.1: # %entry ; LA64-TRAP-NEXT: break 7 ; LA64-TRAP-NEXT: .LBB14_2: # %entry -; LA64-TRAP-NEXT: addi.w $a0, $a0, 0 ; LA64-TRAP-NEXT: ret entry: %r = udiv i32 %a, %b @@ -995,9 +979,9 @@ define i32 @urem_i32(i32 %a, i32 %b) { ; ; LA64-LABEL: urem_i32: ; LA64: # %bb.0: # %entry -; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-NEXT: mod.du $a0, $a0, $a1 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: mod.wu $a0, $a0, $a1 ; LA64-NEXT: ret ; ; LA32-TRAP-LABEL: urem_i32: @@ -1011,9 +995,9 @@ define i32 @urem_i32(i32 %a, i32 %b) { ; ; LA64-TRAP-LABEL: urem_i32: ; LA64-TRAP: # %bb.0: # %entry -; LA64-TRAP-NEXT: bstrpick.d $a1, $a1, 31, 0 -; LA64-TRAP-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-TRAP-NEXT: mod.du $a0, $a0, $a1 +; LA64-TRAP-NEXT: addi.w $a1, $a1, 0 +; LA64-TRAP-NEXT: addi.w $a0, $a0, 0 +; LA64-TRAP-NEXT: mod.wu $a0, $a0, $a1 ; LA64-TRAP-NEXT: bnez $a1, .LBB27_2 ; LA64-TRAP-NEXT: # %bb.1: # %entry ; LA64-TRAP-NEXT: break 7 @@ -1032,9 +1016,7 @@ define i32 @urem_ui32_si32_si32(i32 signext %a, i32 signext %b) { ; ; LA64-LABEL: urem_ui32_si32_si32: ; LA64: # %bb.0: # %entry -; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-NEXT: mod.du $a0, $a0, $a1 +; LA64-NEXT: mod.wu $a0, $a0, $a1 ; LA64-NEXT: ret ; ; LA32-TRAP-LABEL: urem_ui32_si32_si32: @@ -1048,9 +1030,7 @@ define i32 @urem_ui32_si32_si32(i32 signext %a, i32 signext %b) { ; ; LA64-TRAP-LABEL: urem_ui32_si32_si32: ; LA64-TRAP: # %bb.0: # %entry -; LA64-TRAP-NEXT: bstrpick.d $a1, $a1, 31, 0 -; LA64-TRAP-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-TRAP-NEXT: mod.du $a0, $a0, $a1 +; LA64-TRAP-NEXT: mod.wu $a0, $a0, $a1 ; LA64-TRAP-NEXT: bnez $a1, .LBB28_2 ; LA64-TRAP-NEXT: # %bb.1: # %entry ; LA64-TRAP-NEXT: break 7 @@ -1069,10 +1049,9 @@ define signext i32 @urem_si32_ui32_ui32(i32 %a, i32 %b) { ; ; LA64-LABEL: urem_si32_ui32_ui32: ; LA64: # %bb.0: # %entry -; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-NEXT: mod.du $a0, $a0, $a1 +; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: mod.wu $a0, $a0, $a1 ; LA64-NEXT: ret ; ; LA32-TRAP-LABEL: urem_si32_ui32_ui32: @@ -1086,14 +1065,13 @@ define signext i32 @urem_si32_ui32_ui32(i32 %a, i32 %b) { ; ; LA64-TRAP-LABEL: urem_si32_ui32_ui32: ; LA64-TRAP: # %bb.0: # %entry -; LA64-TRAP-NEXT: bstrpick.d $a1, $a1, 31, 0 -; LA64-TRAP-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-TRAP-NEXT: mod.du $a0, $a0, $a1 +; LA64-TRAP-NEXT: addi.w $a1, $a1, 0 +; LA64-TRAP-NEXT: addi.w $a0, $a0, 0 +; LA64-TRAP-NEXT: mod.wu $a0, $a0, $a1 ; LA64-TRAP-NEXT: bnez $a1, .LBB29_2 ; LA64-TRAP-NEXT: # %bb.1: # %entry ; LA64-TRAP-NEXT: break 7 ; LA64-TRAP-NEXT: .LBB29_2: # %entry -; LA64-TRAP-NEXT: addi.w $a0, $a0, 0 ; LA64-TRAP-NEXT: ret entry: %r = urem i32 %a, %b @@ -1108,10 +1086,7 @@ define signext i32 @urem_si32_si32_si32(i32 signext %a, i32 signext %b) { ; ; LA64-LABEL: urem_si32_si32_si32: ; LA64: # %bb.0: # %entry -; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-NEXT: mod.du $a0, $a0, $a1 -; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: mod.wu $a0, $a0, $a1 ; LA64-NEXT: ret ; ; LA32-TRAP-LABEL: urem_si32_si32_si32: @@ -1125,14 +1100,11 @@ define signext i32 @urem_si32_si32_si32(i32 signext %a, i32 signext %b) { ; ; LA64-TRAP-LABEL: urem_si32_si32_si32: ; LA64-TRAP: # %bb.0: # %entry -; LA64-TRAP-NEXT: bstrpick.d $a1, $a1, 31, 0 -; LA64-TRAP-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-TRAP-NEXT: mod.du $a0, $a0, $a1 +; LA64-TRAP-NEXT: mod.wu $a0, $a0, $a1 ; LA64-TRAP-NEXT: bnez $a1, .LBB30_2 ; LA64-TRAP-NEXT: # %bb.1: # %entry ; LA64-TRAP-NEXT: break 7 ; LA64-TRAP-NEXT: .LBB30_2: # %entry -; LA64-TRAP-NEXT: addi.w $a0, $a0, 0 ; LA64-TRAP-NEXT: ret entry: %r = urem i32 %a, %b