From 2b8cb7d87fcb7c93ea4e60bed6185f05308c98f1 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Tue, 14 Feb 2023 08:39:19 +0800 Subject: [PATCH] [LoongArch] Make use of addu16i.d for adds with suitable immediates Ideally `addu16i.d` could be paired with `{ld,st}ptr` for faster memory accesses with 32-bit-aligned offsets (it was designed for this purpose), but it would require more work and the original use case (GP-relative accesses) does not exist any more with the current LoongArch psABI. It could still be used for accelerating additions of certain constants though, which is what this patch intends to do. Reviewed By: SixWeining, gonglingqin Differential Revision: https://reviews.llvm.org/D143710 --- .../Target/LoongArch/LoongArchInstrInfo.td | 40 +++++++ .../CodeGen/LoongArch/ir-instruction/add.ll | 110 +++++++----------- 2 files changed, 84 insertions(+), 66 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td index 0e968e040ce23..0606c65f9d3b1 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -328,6 +328,17 @@ def simm26_symbol : Operand { let DecoderMethod = "decodeSImmOperand<26, 2>"; } +// A 32-bit signed immediate with the lowest 16 bits zeroed, suitable for +// direct use with `addu16i.d`. +def simm16_lsl16 : Operand, + ImmLeaf(Imm);}]>; + +// A 32-bit signed immediate expressible with a pair of `addu16i.d + addi` for +// use in additions. +def simm32_hi16_lo12: Operand, ImmLeaf(Imm - SignExtend64<12>(Imm)); +}]>; + def BareSymbol : AsmOperandClass { let Name = "BareSymbol"; let RenderMethod = "addImmOperands"; @@ -363,6 +374,26 @@ def ImmSubFrom32 : SDNodeXFormgetValueType(0)); }]>; +// Return the lowest 12 bits of the signed immediate. +def LO12: SDNodeXFormgetTargetConstant(SignExtend64<12>(N->getSExtValue()), + SDLoc(N), N->getValueType(0)); +}]>; + +// Return the higher 16 bits of the signed immediate. +def HI16 : SDNodeXFormgetTargetConstant(N->getSExtValue() >> 16, SDLoc(N), + N->getValueType(0)); +}]>; + +// Return the higher 16 bits of the signed immediate, adjusted for use within an +// `addu16i.d + addi` pair. +def HI16ForAddu16idAddiPair: SDNodeXFormgetSExtValue(); + return CurDAG->getTargetConstant((Imm - SignExtend64<12>(Imm)) >> 16, + SDLoc(N), N->getValueType(0)); +}]>; + def BaseAddr : ComplexPattern; def AddrConstant : ComplexPattern; def NonFIBaseAddr : ComplexPattern; @@ -851,6 +882,15 @@ def : Pat<(i64 (mul (sext_inreg GPR:$rj, i32), (sext_inreg GPR:$rk, i32))), def : Pat<(i64 (mul (loongarch_bstrpick GPR:$rj, (i64 31), (i64 0)), (loongarch_bstrpick GPR:$rk, (i64 31), (i64 0)))), (MULW_D_WU GPR:$rj, GPR:$rk)>; + +def : Pat<(add GPR:$rj, simm16_lsl16:$imm), + (ADDU16I_D GPR:$rj, (HI16 $imm))>; +def : Pat<(add GPR:$rj, simm32_hi16_lo12:$imm), + (ADDI_D (ADDU16I_D GPR:$rj, (HI16ForAddu16idAddiPair $imm)), + (LO12 $imm))>; +def : Pat<(sext_inreg (add GPR:$rj, simm32_hi16_lo12:$imm), i32), + (ADDI_W (ADDU16I_D GPR:$rj, (HI16ForAddu16idAddiPair $imm)), + (LO12 $imm))>; } // Predicates = [IsLA64] def : PatGprGpr; diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/add.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/add.ll index 2bf6c319f8e4e..3763cc2307de0 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/add.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/add.ll @@ -183,7 +183,7 @@ define i64 @add_i64_3(i64 %x) { ret i64 %add } -;; TODO: emit `addu16i.d` for these cases. +;; Check that `addu16i.d` is emitted for these cases. define i32 @add_i32_0x12340000(i32 %x) { ; LA32-LABEL: add_i32_0x12340000: @@ -194,8 +194,7 @@ define i32 @add_i32_0x12340000(i32 %x) { ; ; LA64-LABEL: add_i32_0x12340000: ; LA64: # %bb.0: -; LA64-NEXT: lu12i.w $a1, 74560 -; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: addu16i.d $a0, $a0, 4660 ; LA64-NEXT: ret %add = add i32 %x, 305397760 ret i32 %add @@ -210,8 +209,8 @@ define signext i32 @add_i32_0x12340000_sext(i32 %x) { ; ; LA64-LABEL: add_i32_0x12340000_sext: ; LA64: # %bb.0: -; LA64-NEXT: lu12i.w $a1, 74560 -; LA64-NEXT: add.w $a0, $a0, $a1 +; LA64-NEXT: addu16i.d $a0, $a0, 4660 +; LA64-NEXT: addi.w $a0, $a0, 0 ; LA64-NEXT: ret %add = add i32 %x, 305397760 ret i32 %add @@ -229,8 +228,7 @@ define i64 @add_i64_0x12340000(i64 %x) { ; ; LA64-LABEL: add_i64_0x12340000: ; LA64: # %bb.0: -; LA64-NEXT: lu12i.w $a1, 74560 -; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: addu16i.d $a0, $a0, 4660 ; LA64-NEXT: ret %add = add i64 %x, 305397760 ret i64 %add @@ -245,8 +243,7 @@ define i32 @add_i32_0x7fff0000(i32 %x) { ; ; LA64-LABEL: add_i32_0x7fff0000: ; LA64: # %bb.0: -; LA64-NEXT: lu12i.w $a1, 524272 -; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: addu16i.d $a0, $a0, 32767 ; LA64-NEXT: ret %add = add i32 %x, 2147418112 ret i32 %add @@ -261,8 +258,8 @@ define signext i32 @add_i32_0x7fff0000_sext(i32 %x) { ; ; LA64-LABEL: add_i32_0x7fff0000_sext: ; LA64: # %bb.0: -; LA64-NEXT: lu12i.w $a1, 524272 -; LA64-NEXT: add.w $a0, $a0, $a1 +; LA64-NEXT: addu16i.d $a0, $a0, 32767 +; LA64-NEXT: addi.w $a0, $a0, 0 ; LA64-NEXT: ret %add = add i32 %x, 2147418112 ret i32 %add @@ -280,8 +277,7 @@ define i64 @add_i64_0x7fff0000(i64 %x) { ; ; LA64-LABEL: add_i64_0x7fff0000: ; LA64: # %bb.0: -; LA64-NEXT: lu12i.w $a1, 524272 -; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: addu16i.d $a0, $a0, 32767 ; LA64-NEXT: ret %add = add i64 %x, 2147418112 ret i64 %add @@ -296,8 +292,7 @@ define i32 @add_i32_minus_0x80000000(i32 %x) { ; ; LA64-LABEL: add_i32_minus_0x80000000: ; LA64: # %bb.0: -; LA64-NEXT: lu12i.w $a1, -524288 -; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: addu16i.d $a0, $a0, -32768 ; LA64-NEXT: ret %add = add i32 %x, -2147483648 ret i32 %add @@ -312,8 +307,8 @@ define signext i32 @add_i32_minus_0x80000000_sext(i32 %x) { ; ; LA64-LABEL: add_i32_minus_0x80000000_sext: ; LA64: # %bb.0: -; LA64-NEXT: lu12i.w $a1, -524288 -; LA64-NEXT: add.w $a0, $a0, $a1 +; LA64-NEXT: addu16i.d $a0, $a0, -32768 +; LA64-NEXT: addi.w $a0, $a0, 0 ; LA64-NEXT: ret %add = add i32 %x, -2147483648 ret i32 %add @@ -332,8 +327,7 @@ define i64 @add_i64_minus_0x80000000(i64 %x) { ; ; LA64-LABEL: add_i64_minus_0x80000000: ; LA64: # %bb.0: -; LA64-NEXT: lu12i.w $a1, -524288 -; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: addu16i.d $a0, $a0, -32768 ; LA64-NEXT: ret %add = add i64 %x, -2147483648 ret i64 %add @@ -348,8 +342,7 @@ define i32 @add_i32_minus_0x10000(i32 %x) { ; ; LA64-LABEL: add_i32_minus_0x10000: ; LA64: # %bb.0: -; LA64-NEXT: lu12i.w $a1, -16 -; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: addu16i.d $a0, $a0, -1 ; LA64-NEXT: ret %add = add i32 %x, -65536 ret i32 %add @@ -364,8 +357,8 @@ define signext i32 @add_i32_minus_0x10000_sext(i32 %x) { ; ; LA64-LABEL: add_i32_minus_0x10000_sext: ; LA64: # %bb.0: -; LA64-NEXT: lu12i.w $a1, -16 -; LA64-NEXT: add.w $a0, $a0, $a1 +; LA64-NEXT: addu16i.d $a0, $a0, -1 +; LA64-NEXT: addi.w $a0, $a0, 0 ; LA64-NEXT: ret %add = add i32 %x, -65536 ret i32 %add @@ -384,14 +377,13 @@ define i64 @add_i64_minus_0x10000(i64 %x) { ; ; LA64-LABEL: add_i64_minus_0x10000: ; LA64: # %bb.0: -; LA64-NEXT: lu12i.w $a1, -16 -; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: addu16i.d $a0, $a0, -1 ; LA64-NEXT: ret %add = add i64 %x, -65536 ret i64 %add } -;; TODO: use `addu16i.d + addi` for these cases. +;; Check that `addu16i.d + addi` is emitted for these cases. define i32 @add_i32_0x7fff07ff(i32 %x) { ; LA32-LABEL: add_i32_0x7fff07ff: @@ -403,9 +395,8 @@ define i32 @add_i32_0x7fff07ff(i32 %x) { ; ; LA64-LABEL: add_i32_0x7fff07ff: ; LA64: # %bb.0: -; LA64-NEXT: lu12i.w $a1, 524272 -; LA64-NEXT: ori $a1, $a1, 2047 -; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: addu16i.d $a0, $a0, 32767 +; LA64-NEXT: addi.d $a0, $a0, 2047 ; LA64-NEXT: ret %add = add i32 %x, 2147420159 ret i32 %add @@ -421,9 +412,8 @@ define signext i32 @add_i32_0x7fff07ff_sext(i32 %x) { ; ; LA64-LABEL: add_i32_0x7fff07ff_sext: ; LA64: # %bb.0: -; LA64-NEXT: lu12i.w $a1, 524272 -; LA64-NEXT: ori $a1, $a1, 2047 -; LA64-NEXT: add.w $a0, $a0, $a1 +; LA64-NEXT: addu16i.d $a0, $a0, 32767 +; LA64-NEXT: addi.w $a0, $a0, 2047 ; LA64-NEXT: ret %add = add i32 %x, 2147420159 ret i32 %add @@ -442,9 +432,8 @@ define i64 @add_i64_0x7fff07ff(i64 %x) { ; ; LA64-LABEL: add_i64_0x7fff07ff: ; LA64: # %bb.0: -; LA64-NEXT: lu12i.w $a1, 524272 -; LA64-NEXT: ori $a1, $a1, 2047 -; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: addu16i.d $a0, $a0, 32767 +; LA64-NEXT: addi.d $a0, $a0, 2047 ; LA64-NEXT: ret %add = add i64 %x, 2147420159 ret i64 %add @@ -460,9 +449,8 @@ define i32 @add_i32_0x7ffef800(i32 %x) { ; ; LA64-LABEL: add_i32_0x7ffef800: ; LA64: # %bb.0: -; LA64-NEXT: lu12i.w $a1, 524271 -; LA64-NEXT: ori $a1, $a1, 2048 -; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: addu16i.d $a0, $a0, 32767 +; LA64-NEXT: addi.d $a0, $a0, -2048 ; LA64-NEXT: ret %add = add i32 %x, 2147416064 ret i32 %add @@ -478,9 +466,8 @@ define signext i32 @add_i32_0x7ffef800_sext(i32 %x) { ; ; LA64-LABEL: add_i32_0x7ffef800_sext: ; LA64: # %bb.0: -; LA64-NEXT: lu12i.w $a1, 524271 -; LA64-NEXT: ori $a1, $a1, 2048 -; LA64-NEXT: add.w $a0, $a0, $a1 +; LA64-NEXT: addu16i.d $a0, $a0, 32767 +; LA64-NEXT: addi.w $a0, $a0, -2048 ; LA64-NEXT: ret %add = add i32 %x, 2147416064 ret i32 %add @@ -499,9 +486,8 @@ define i64 @add_i64_0x7ffef800(i64 %x) { ; ; LA64-LABEL: add_i64_0x7ffef800: ; LA64: # %bb.0: -; LA64-NEXT: lu12i.w $a1, 524271 -; LA64-NEXT: ori $a1, $a1, 2048 -; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: addu16i.d $a0, $a0, 32767 +; LA64-NEXT: addi.d $a0, $a0, -2048 ; LA64-NEXT: ret %add = add i64 %x, 2147416064 ret i64 %add @@ -521,10 +507,8 @@ define i64 @add_i64_minus_0x80000800(i64 %x) { ; ; LA64-LABEL: add_i64_minus_0x80000800: ; LA64: # %bb.0: -; LA64-NEXT: lu12i.w $a1, 524287 -; LA64-NEXT: ori $a1, $a1, 2048 -; LA64-NEXT: lu32i.d $a1, -1 -; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: addu16i.d $a0, $a0, -32768 +; LA64-NEXT: addi.d $a0, $a0, -2048 ; LA64-NEXT: ret %add = add i64 %x, -2147485696 ret i64 %add @@ -540,9 +524,8 @@ define i32 @add_i32_minus_0x23450679(i32 %x) { ; ; LA64-LABEL: add_i32_minus_0x23450679: ; LA64: # %bb.0: -; LA64-NEXT: lu12i.w $a1, -144465 -; LA64-NEXT: ori $a1, $a1, 2439 -; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: addu16i.d $a0, $a0, -9029 +; LA64-NEXT: addi.d $a0, $a0, -1657 ; LA64-NEXT: ret %add = add i32 %x, -591726201 ret i32 %add @@ -558,9 +541,8 @@ define signext i32 @add_i32_minus_0x23450679_sext(i32 %x) { ; ; LA64-LABEL: add_i32_minus_0x23450679_sext: ; LA64: # %bb.0: -; LA64-NEXT: lu12i.w $a1, -144465 -; LA64-NEXT: ori $a1, $a1, 2439 -; LA64-NEXT: add.w $a0, $a0, $a1 +; LA64-NEXT: addu16i.d $a0, $a0, -9029 +; LA64-NEXT: addi.w $a0, $a0, -1657 ; LA64-NEXT: ret %add = add i32 %x, -591726201 ret i32 %add @@ -580,9 +562,8 @@ define i64 @add_i64_minus_0x23450679(i64 %x) { ; ; LA64-LABEL: add_i64_minus_0x23450679: ; LA64: # %bb.0: -; LA64-NEXT: lu12i.w $a1, -144465 -; LA64-NEXT: ori $a1, $a1, 2439 -; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: addu16i.d $a0, $a0, -9029 +; LA64-NEXT: addi.d $a0, $a0, -1657 ; LA64-NEXT: ret %add = add i64 %x, -591726201 ret i64 %add @@ -598,9 +579,8 @@ define i32 @add_i32_minus_0x2345fedd(i32 %x) { ; ; LA64-LABEL: add_i32_minus_0x2345fedd: ; LA64: # %bb.0: -; LA64-NEXT: lu12i.w $a1, -144480 -; LA64-NEXT: ori $a1, $a1, 291 -; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: addu16i.d $a0, $a0, -9030 +; LA64-NEXT: addi.d $a0, $a0, 291 ; LA64-NEXT: ret %add = add i32 %x, -591789789 ret i32 %add @@ -616,9 +596,8 @@ define signext i32 @add_i32_minus_0x2345fedd_sext(i32 %x) { ; ; LA64-LABEL: add_i32_minus_0x2345fedd_sext: ; LA64: # %bb.0: -; LA64-NEXT: lu12i.w $a1, -144480 -; LA64-NEXT: ori $a1, $a1, 291 -; LA64-NEXT: add.w $a0, $a0, $a1 +; LA64-NEXT: addu16i.d $a0, $a0, -9030 +; LA64-NEXT: addi.w $a0, $a0, 291 ; LA64-NEXT: ret %add = add i32 %x, -591789789 ret i32 %add @@ -638,9 +617,8 @@ define i64 @add_i64_minus_0x2345fedd(i64 %x) { ; ; LA64-LABEL: add_i64_minus_0x2345fedd: ; LA64: # %bb.0: -; LA64-NEXT: lu12i.w $a1, -144480 -; LA64-NEXT: ori $a1, $a1, 291 -; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: addu16i.d $a0, $a0, -9030 +; LA64-NEXT: addi.d $a0, $a0, 291 ; LA64-NEXT: ret %add = add i64 %x, -591789789 ret i64 %add