diff --git a/llvm/lib/Target/EraVM/EraVMInstrInfo.td b/llvm/lib/Target/EraVM/EraVMInstrInfo.td index 56dc7d20ec5f..3faa823df496 100644 --- a/llvm/lib/Target/EraVM/EraVMInstrInfo.td +++ b/llvm/lib/Target/EraVM/EraVMInstrInfo.td @@ -284,9 +284,7 @@ let isSelect = 1, Uses = [Flags] in { let mayLoad = 1 in { def SELcrr : Pseudo<(outs GR256:$dst), (ins memop:$src, GR256:$src2, i256imm:$cc), [(set GR256:$dst, - (EraVMselectcc (load_code memaddr:$src), GR256:$src2, imm:$cc))]> { - let Constraints = "$dst = $src2"; - } + (EraVMselectcc (load_code memaddr:$src), GR256:$src2, imm:$cc))]>; def SELsrr : Pseudo<(outs GR256:$dst), (ins stackin:$src, GR256:$src2, i256imm:$cc), [(set GR256:$dst, (EraVMselectcc (load_stack stackaddr:$src), GR256:$src2, imm:$cc))]> { @@ -310,9 +308,7 @@ let mayLoad = 1 in { (EraVMselectcc (load_stack stackaddr:$src), imm16:$src2, imm:$cc))]>; def SELrcr : Pseudo<(outs GR256:$dst), (ins GR256:$src, memop:$src2, i256imm:$cc), [(set GR256:$dst, - (EraVMselectcc GR256:$src, (load_code memaddr:$src2), imm:$cc))]> { - let Constraints = "$dst = $src"; - } + (EraVMselectcc GR256:$src, (load_code memaddr:$src2), imm:$cc))]>; def SELicr : Pseudo<(outs GR256:$dst), (ins imm16:$src, memop:$src2, i256imm:$cc), [(set GR256:$dst, (EraVMselectcc imm16:$src, (load_code memaddr:$src2), imm:$cc))]>; @@ -350,6 +346,31 @@ def : Pat<(load_stack(EraVMselectcc stackaddr:$addr1, stackaddr:$addr2, imm:$cc) def : Pat<(load_code(EraVMselectcc memaddr:$addr1, memaddr:$addr2, imm:$cc)), (SELccr memaddr:$addr1, memaddr:$addr2, imm:$cc)>; +// Patterns for selectcc instructions with large immediates that will be loaded from constant pool. +// Following two patterns are to put zero immediate to R0 register. +def : Pat<(EraVMselectcc 0, large_imm:$src2, imm:$cc), + (SELrcr R0, (constant_pool imm:$src2), 0, imm:$cc)>; +def : Pat<(EraVMselectcc large_imm:$src1, 0, imm:$cc), + (SELcrr (constant_pool imm:$src1), 0, R0, imm:$cc)>; +def : Pat<(EraVMselectcc large_imm:$src1, GR256:$src2, imm:$cc), + (SELcrr (constant_pool imm:$src1), 0, GR256:$src2, imm:$cc)>; +def : Pat<(EraVMselectcc GR256:$src1, large_imm:$src2, imm:$cc), + (SELrcr GR256:$src1, (constant_pool imm:$src2), 0, imm:$cc)>; +def : Pat<(EraVMselectcc large_imm:$src1, imm16:$src2, imm:$cc), + (SELcir (constant_pool imm:$src1), 0, imm16:$src2, imm:$cc)>; +def : Pat<(EraVMselectcc imm16:$src1, large_imm:$src2, imm:$cc), + (SELicr imm16:$src1, (constant_pool imm:$src2), 0, imm:$cc)>; +def : Pat<(EraVMselectcc (load_code memaddr:$src1), large_imm:$src2, imm:$cc), + (SELccr memaddr:$src1, (constant_pool imm:$src2), 0, imm:$cc)>; +def : Pat<(EraVMselectcc large_imm:$src1, (load_code memaddr:$src2), imm:$cc), + (SELccr (constant_pool imm:$src1), 0, memaddr:$src2, imm:$cc)>; +def : Pat<(EraVMselectcc large_imm:$src1, large_imm:$src2, imm:$cc), + (SELccr (constant_pool imm:$src1), 0, (constant_pool imm:$src2), 0, imm:$cc)>; +def : Pat<(EraVMselectcc large_imm:$src1, (load_stack stackaddr:$src2), imm:$cc), + (SELcsr (constant_pool imm:$src1), 0, stackaddr:$src2, imm:$cc)>; +def : Pat<(EraVMselectcc (load_stack stackaddr:$src1), large_imm:$src2, imm:$cc), + (SELscr stackaddr:$src1, (constant_pool imm:$src2), 0, imm:$cc)>; + // TODO: CPR-1356 stack and code forms def : Pat<(int_eravm_ifeq GR256:$src0, GR256:$src1), (SELrrr GR256:$src0, GR256:$src1, COND_E.Encoding)>; def : Pat<(int_eravm_iflt GR256:$src0, GR256:$src1), (SELrrr GR256:$src0, GR256:$src1, COND_LT.Encoding)>; diff --git a/llvm/lib/Target/EraVM/EraVMTieSelectOperands.cpp b/llvm/lib/Target/EraVM/EraVMTieSelectOperands.cpp index 9ef5ee72e287..3a6ae74df006 100644 --- a/llvm/lib/Target/EraVM/EraVMTieSelectOperands.cpp +++ b/llvm/lib/Target/EraVM/EraVMTieSelectOperands.cpp @@ -45,11 +45,17 @@ class EraVMTieSelectOperands : public MachineFunctionPass { } private: - /// If MI is a pseudo SELrrr instruction (which is the most common case), + /// If MI is a pseudo select instruction (which is the most common case), /// then try to ask RA to coalesce an input register with the output, so that /// EraVMExpandSelectPass can have better results. /// \par Arg which argument to tie (in0 or in1). bool tryPlacingTie(MachineInstr &MI, EraVM::ArgumentKind Arg) const; + + /// Return true if we can place a tie for the given instruction and argument. + /// Given that we have already handled the majority of cases for single + /// register operand SELECTs, we will now focus on those with a code and + /// register operands and two register operands. + bool canPlaceTie(MachineInstr &MI, EraVM::ArgumentKind Arg) const; }; char EraVMTieSelectOperands::ID = 0; @@ -58,17 +64,22 @@ char EraVMTieSelectOperands::ID = 0; INITIALIZE_PASS(EraVMTieSelectOperands, DEBUG_TYPE, ERAVM_TIE_SELECT_OPERANDS_NAME, false, false) +bool EraVMTieSelectOperands::canPlaceTie(MachineInstr &MI, + EraVM::ArgumentKind Arg) const { + return MI.getOpcode() == EraVM::SELrrr || + (MI.getOpcode() == EraVM::SELcrr && Arg == EraVM::ArgumentKind::In1) || + (MI.getOpcode() == EraVM::SELrcr && Arg == EraVM::ArgumentKind::In0); +} + /// Try to create an implicit tie so that the register allocator can coalesce /// both registers. /// Return true if we managed to do so. bool EraVMTieSelectOperands::tryPlacingTie(MachineInstr &MI, EraVM::ArgumentKind Arg) const { assert(Arg == EraVM::ArgumentKind::In0 || Arg == EraVM::ArgumentKind::In1); - // Given that we've already made constraints on the case of single - // register operand SELECTs, we will now focus only on the case of two - // register selects. Also note that majority of the opportunities come - // from reg-reg selects. - if (MI.getOpcode() != EraVM::SELrrr) + + // Bail out if we cannot place a tie for the given instruction. + if (!canPlaceTie(MI, Arg)) return false; // Skip if the output register is already tied to an input register. diff --git a/llvm/test/CodeGen/EraVM/cmp-signed.ll b/llvm/test/CodeGen/EraVM/cmp-signed.ll index 9217f9fa3c3e..e68a0a45cd18 100644 --- a/llvm/test/CodeGen/EraVM/cmp-signed.ll +++ b/llvm/test/CodeGen/EraVM/cmp-signed.ll @@ -7,17 +7,17 @@ target triple = "eravm" define i1 @slt_not(i256 %a) { ; CHECK-LABEL: slt_not: ; CHECK: ; %bb.0: -; CHECK-NEXT: add @CPI0_0[0], r0, r3 ; CHECK-NEXT: sub.s! @CPI0_1[0], r1, r2 ; CHECK-NEXT: add r0, r0, r2 -; CHECK-NEXT: add.lt r3, r0, r2 +; CHECK-NEXT: add.lt @CPI0_0[0], r0, r2 ; CHECK-NEXT: and @CPI0_0[0], r1, r1 -; CHECK-NEXT: sub.s! @CPI0_0[0], r1, r4 -; CHECK-NEXT: add.le r0, r0, r3 -; CHECK-NEXT: xor @CPI0_0[0], r1, r1 +; CHECK-NEXT: xor @CPI0_0[0], r1, r3 ; CHECK-NEXT: sub.s! @CPI0_0[0], r1, r1 -; CHECK-NEXT: add.ne r2, r0, r3 -; CHECK-NEXT: sub! r3, r0, r1 +; CHECK-NEXT: add r0, r0, r1 +; CHECK-NEXT: add.gt @CPI0_0[0], r0, r1 +; CHECK-NEXT: sub.s! @CPI0_0[0], r3, r3 +; CHECK-NEXT: add.ne r2, r0, r1 +; CHECK-NEXT: sub! r1, r0, r1 ; CHECK-NEXT: add 0, r0, r1 ; CHECK-NEXT: add.ne 1, r0, r1 ; CHECK-NEXT: ret @@ -28,17 +28,17 @@ define i1 @slt_not(i256 %a) { define i1 @sgt_not(i256 %a) { ; CHECK-LABEL: sgt_not: ; CHECK: ; %bb.0: -; CHECK-NEXT: add @CPI1_0[0], r0, r3 ; CHECK-NEXT: sub.s! @CPI1_1[0], r1, r2 ; CHECK-NEXT: add r0, r0, r2 -; CHECK-NEXT: add.gt r3, r0, r2 +; CHECK-NEXT: add.gt @CPI1_0[0], r0, r2 ; CHECK-NEXT: and @CPI1_0[0], r1, r1 -; CHECK-NEXT: sub.s! @CPI1_0[0], r1, r4 -; CHECK-NEXT: add.ge r0, r0, r3 -; CHECK-NEXT: xor @CPI1_0[0], r1, r1 +; CHECK-NEXT: xor @CPI1_0[0], r1, r3 ; CHECK-NEXT: sub.s! @CPI1_0[0], r1, r1 -; CHECK-NEXT: add.ne r2, r0, r3 -; CHECK-NEXT: sub! r3, r0, r1 +; CHECK-NEXT: add r0, r0, r1 +; CHECK-NEXT: add.lt @CPI1_0[0], r0, r1 +; CHECK-NEXT: sub.s! @CPI1_0[0], r3, r3 +; CHECK-NEXT: add.ne r2, r0, r1 +; CHECK-NEXT: sub! r1, r0, r1 ; CHECK-NEXT: add 0, r0, r1 ; CHECK-NEXT: add.ne 1, r0, r1 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/EraVM/ra_regress.ll b/llvm/test/CodeGen/EraVM/ra_regress.ll index 9514c023496e..cc37486730d8 100644 --- a/llvm/test/CodeGen/EraVM/ra_regress.ll +++ b/llvm/test/CodeGen/EraVM/ra_regress.ll @@ -8,13 +8,14 @@ define i256 @test_length(i256 %arg1) { entry: ; CHECK: sub.s! 36, r{{[0-9]+}}, r{{[[0-9]+}} ; CHECK-NEXT: add r0, r{{[0-9]+}}, r{{[[0-9]+}} -; CHECK-NEXT: add.lt r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}} +; CHECK-NEXT: add.lt @CPI0_0[0], r{{[0-9]+}}, r{{[0-9]+}} %comparison_result24 = icmp slt i256 35, %arg1 %comparison_result_extended25 = zext i1 %comparison_result24 to i256 ; CHECK: and @CPI0_0[0], r{{[0-9]+}}, r{{[[0-9]+}} ; CHECK-NEXT: sub! r{{[0-9]+}}, r0, r{{[[0-9]+}} -; CHECK-NEXT: add.le r0, r{{[0-9]+}}, r{{[[0-9]+}} +; CHECK-NEXT: add r0, r0, r{{[[0-9]+}} +; CHECK-NEXT: add.gt @CPI0_0[0], r{{[0-9]+}}, r{{[[0-9]+}} %comparison_result26 = icmp eq i256 %comparison_result_extended25, 0 %comparison_result_extended27 = zext i1 %comparison_result26 to i256 diff --git a/llvm/test/CodeGen/EraVM/selectcc.ll b/llvm/test/CodeGen/EraVM/selectcc.ll index 0358bbced6d9..b913408ba510 100644 --- a/llvm/test/CodeGen/EraVM/selectcc.ll +++ b/llvm/test/CodeGen/EraVM/selectcc.ll @@ -57,9 +57,9 @@ define i256 @selcrr(i256 %v1, i256 %v2, i256 %v3, i256 %v4) { define i256 @selcrr_zero_cp(i256 %v1, i256 %v2, i256 %v3, i256 %v4) { ; CHECK-LABEL: selcrr_zero_cp: ; CHECK: ; %bb.0: -; CHECK-NEXT: add @CPI4_0[0], r0, r1 -; CHECK-NEXT: sub! r3, r4, r2 -; CHECK-NEXT: add.ge r0, r0, r1 +; CHECK-NEXT: sub! r3, r4, r1 +; CHECK-NEXT: add r0, r0, r1 +; CHECK-NEXT: add.lt @CPI4_0[0], r0, r1 ; CHECK-NEXT: ret %1 = icmp ult i256 %v3, %v4 %2 = select i1 %1, i256 123456789, i256 0 @@ -69,10 +69,8 @@ define i256 @selcrr_zero_cp(i256 %v1, i256 %v2, i256 %v3, i256 %v4) { define i256 @selcrr_cp(i256 %v1, i256 %v2, i256 %v3, i256 %v4) { ; CHECK-LABEL: selcrr_cp: ; CHECK: ; %bb.0: -; CHECK-NEXT: add @CPI5_0[0], r0, r2 -; CHECK-NEXT: sub! r3, r4, r3 -; CHECK-NEXT: add.ge r1, r0, r2 -; CHECK-NEXT: add r2, r0, r1 +; CHECK-NEXT: sub! r3, r4, r2 +; CHECK-NEXT: add.lt @CPI5_0[0], r0, r1 ; CHECK-NEXT: ret %1 = icmp ult i256 %v3, %v4 %2 = select i1 %1, i256 123456789, i256 %v1 @@ -132,9 +130,9 @@ define i256 @selcir(i256 %v1, i256 %v2, i256 %v3, i256 %v4) { define i256 @selcir_cp(i256 %v1, i256 %v2, i256 %v3, i256 %v4) { ; CHECK-LABEL: selcir_cp: ; CHECK: ; %bb.0: -; CHECK-NEXT: add @CPI10_0[0], r0, r1 -; CHECK-NEXT: sub! r3, r4, r2 -; CHECK-NEXT: add.le 42, r0, r1 +; CHECK-NEXT: sub! r3, r4, r1 +; CHECK-NEXT: add 42, r0, r1 +; CHECK-NEXT: add.gt @CPI10_0[0], r0, r1 ; CHECK-NEXT: ret %1 = icmp ugt i256 %v3, %v4 %2 = select i1 %1, i256 123456789, i256 42 @@ -171,8 +169,8 @@ define i256 @selrcr(i256 %v1, i256 %v2, i256 %v3, i256 %v4) { define i256 @selrcr_zero_cp(i256 %v1, i256 %v2, i256 %v3, i256 %v4) { ; CHECK-LABEL: selrcr_zero_cp: ; CHECK: ; %bb.0: +; CHECK-NEXT: sub! r3, r4, r1 ; CHECK-NEXT: add @CPI13_0[0], r0, r1 -; CHECK-NEXT: sub! r3, r4, r2 ; CHECK-NEXT: add.lt r0, r0, r1 ; CHECK-NEXT: ret %1 = icmp ult i256 %v3, %v4 @@ -183,9 +181,8 @@ define i256 @selrcr_zero_cp(i256 %v1, i256 %v2, i256 %v3, i256 %v4) { define i256 @selrcr_cp(i256 %v1, i256 %v2, i256 %v3, i256 %v4) { ; CHECK-LABEL: selrcr_cp: ; CHECK: ; %bb.0: -; CHECK-NEXT: add @CPI14_0[0], r0, r2 -; CHECK-NEXT: sub! r3, r4, r3 -; CHECK-NEXT: add.ge r2, r0, r1 +; CHECK-NEXT: sub! r3, r4, r2 +; CHECK-NEXT: add.ge @CPI14_0[0], r0, r1 ; CHECK-NEXT: ret %1 = icmp ult i256 %v3, %v4 %2 = select i1 %1, i256 %v1, i256 123456789 @@ -208,8 +205,8 @@ define i256 @selicr(i256 %v1, i256 %v2, i256 %v3, i256 %v4) { define i256 @selicr_cp(i256 %v1, i256 %v2, i256 %v3, i256 %v4) { ; CHECK-LABEL: selicr_cp: ; CHECK: ; %bb.0: +; CHECK-NEXT: sub! r3, r4, r1 ; CHECK-NEXT: add @CPI16_0[0], r0, r1 -; CHECK-NEXT: sub! r3, r4, r2 ; CHECK-NEXT: add.lt 42, r0, r1 ; CHECK-NEXT: ret %1 = icmp ult i256 %v3, %v4 @@ -234,8 +231,8 @@ define i256 @selccr_cl_cl(i256 %v1, i256 %v2, i256 %v3, i256 %v4) { define i256 @selccr_cl_cp(i256 %v1, i256 %v2, i256 %v3, i256 %v4) { ; CHECK-LABEL: selccr_cl_cp: ; CHECK: ; %bb.0: +; CHECK-NEXT: sub! r3, r4, r1 ; CHECK-NEXT: add @CPI18_0[0], r0, r1 -; CHECK-NEXT: sub! r3, r4, r2 ; CHECK-NEXT: add.lt @val[0], r0, r1 ; CHECK-NEXT: ret %1 = icmp ult i256 %v3, %v4 @@ -247,9 +244,9 @@ define i256 @selccr_cl_cp(i256 %v1, i256 %v2, i256 %v3, i256 %v4) { define i256 @selccr_cp_cl(i256 %v1, i256 %v2, i256 %v3, i256 %v4) { ; CHECK-LABEL: selccr_cp_cl: ; CHECK: ; %bb.0: -; CHECK-NEXT: add @CPI19_0[0], r0, r1 -; CHECK-NEXT: sub! r3, r4, r2 -; CHECK-NEXT: add.ge @val[0], r0, r1 +; CHECK-NEXT: sub! r3, r4, r1 +; CHECK-NEXT: add @val[0], r0, r1 +; CHECK-NEXT: add.lt @CPI19_0[0], r0, r1 ; CHECK-NEXT: ret %1 = icmp ult i256 %v3, %v4 %const = load i256, i256 addrspace(4)* @val @@ -260,10 +257,9 @@ define i256 @selccr_cp_cl(i256 %v1, i256 %v2, i256 %v3, i256 %v4) { define i256 @selccr_cp_cp(i256 %v1, i256 %v2, i256 %v3, i256 %v4) { ; CHECK-LABEL: selccr_cp_cp: ; CHECK: ; %bb.0: -; CHECK-NEXT: add @CPI20_0[0], r0, r2 +; CHECK-NEXT: sub! r3, r4, r1 ; CHECK-NEXT: add @CPI20_1[0], r0, r1 -; CHECK-NEXT: sub! r3, r4, r3 -; CHECK-NEXT: add.ge r2, r0, r1 +; CHECK-NEXT: add.lt @CPI20_0[0], r0, r1 ; CHECK-NEXT: ret %1 = icmp ult i256 %v3, %v4 %2 = select i1 %1, i256 12345678, i256 123456789 @@ -290,8 +286,8 @@ define i256 @selscr_cp(i256 %v1, i256 %v2, i256 %v3, i256 %v4) { ; CHECK-LABEL: selscr_cp: ; CHECK: ; %bb.0: ; CHECK-NEXT: nop stack+=[1 + r0] +; CHECK-NEXT: sub! r3, r4, r1 ; CHECK-NEXT: add @CPI22_0[0], r0, r1 -; CHECK-NEXT: sub! r3, r4, r2 ; CHECK-NEXT: add.lt stack-[1], r0, r1 ; CHECK-NEXT: ret %ptr = alloca i256 @@ -350,9 +346,9 @@ define i256 @selcsr_cp(i256 %v1, i256 %v2, i256 %v3, i256 %v4) { ; CHECK-LABEL: selcsr_cp: ; CHECK: ; %bb.0: ; CHECK-NEXT: nop stack+=[1 + r0] -; CHECK-NEXT: add @CPI26_0[0], r0, r1 -; CHECK-NEXT: sub! r3, r4, r2 -; CHECK-NEXT: add.ge stack-[1], r0, r1 +; CHECK-NEXT: sub! r3, r4, r1 +; CHECK-NEXT: add stack-[1], r0, r1 +; CHECK-NEXT: add.lt @CPI26_0[0], r0, r1 ; CHECK-NEXT: ret %data = alloca i256 %1 = icmp ult i256 %v3, %v4