From a46d620835fa6c9d603b7aa3dacc660daa3870d9 Mon Sep 17 00:00:00 2001 From: Qi Zhao Date: Tue, 18 Nov 2025 19:08:42 +0800 Subject: [PATCH 1/3] [LoongArch] Add late branch optimisation pass This commit adds a new target specific optimization pass for LoongArch to convert conditional branches into unconditional branches when the condition can be statically evaluated. Similar to riscv. --- llvm/lib/Target/LoongArch/CMakeLists.txt | 1 + llvm/lib/Target/LoongArch/LoongArch.h | 2 + .../LoongArch/LoongArchLateBranchOpt.cpp | 195 ++++++++++++++++++ .../LoongArch/LoongArchTargetMachine.cpp | 7 +- llvm/test/CodeGen/LoongArch/jr-without-ra.ll | 2 - llvm/test/CodeGen/LoongArch/opt-pipeline.ll | 1 + 6 files changed, 205 insertions(+), 3 deletions(-) create mode 100644 llvm/lib/Target/LoongArch/LoongArchLateBranchOpt.cpp diff --git a/llvm/lib/Target/LoongArch/CMakeLists.txt b/llvm/lib/Target/LoongArch/CMakeLists.txt index 0f674b1b0fa9e..cac6b3aa1051a 100644 --- a/llvm/lib/Target/LoongArch/CMakeLists.txt +++ b/llvm/lib/Target/LoongArch/CMakeLists.txt @@ -23,6 +23,7 @@ add_llvm_target(LoongArchCodeGen LoongArchInstrInfo.cpp LoongArchISelDAGToDAG.cpp LoongArchISelLowering.cpp + LoongArchLateBranchOpt.cpp LoongArchMCInstLower.cpp LoongArchMergeBaseOffset.cpp LoongArchOptWInstrs.cpp diff --git a/llvm/lib/Target/LoongArch/LoongArch.h b/llvm/lib/Target/LoongArch/LoongArch.h index e5b3083348792..f123d42426dbd 100644 --- a/llvm/lib/Target/LoongArch/LoongArch.h +++ b/llvm/lib/Target/LoongArch/LoongArch.h @@ -37,6 +37,7 @@ FunctionPass *createLoongArchDeadRegisterDefinitionsPass(); FunctionPass *createLoongArchExpandAtomicPseudoPass(); FunctionPass *createLoongArchISelDag(LoongArchTargetMachine &TM, CodeGenOptLevel OptLevel); +FunctionPass *createLoongArchLateBranchOptPass(); FunctionPass *createLoongArchMergeBaseOffsetOptPass(); FunctionPass *createLoongArchOptWInstrsPass(); FunctionPass *createLoongArchPreRAExpandPseudoPass(); @@ -45,6 +46,7 @@ void initializeLoongArchAsmPrinterPass(PassRegistry &); void initializeLoongArchDAGToDAGISelLegacyPass(PassRegistry &); void initializeLoongArchDeadRegisterDefinitionsPass(PassRegistry &); void initializeLoongArchExpandAtomicPseudoPass(PassRegistry &); +void initializeLoongArchLateBranchOptPass(PassRegistry &); void initializeLoongArchMergeBaseOffsetOptPass(PassRegistry &); void initializeLoongArchOptWInstrsPass(PassRegistry &); void initializeLoongArchPreRAExpandPseudoPass(PassRegistry &); diff --git a/llvm/lib/Target/LoongArch/LoongArchLateBranchOpt.cpp b/llvm/lib/Target/LoongArch/LoongArchLateBranchOpt.cpp new file mode 100644 index 0000000000000..00474c08547ec --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchLateBranchOpt.cpp @@ -0,0 +1,195 @@ +//===-- LoongArchLateBranchOpt.cpp - Late Stage Branch Optimization -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// This file provides LoongArch specific target optimizations, currently it's +/// limited to convert conditional branches into unconditional branches when +/// the condition can be statically evaluated. +/// +//===----------------------------------------------------------------------===// + +#include "LoongArchInstrInfo.h" +#include "LoongArchSubtarget.h" + +using namespace llvm; + +#define LOONGARCH_LATE_BRANCH_OPT_NAME "LoongArch Late Branch Optimisation Pass" + +namespace { + +struct LoongArchLateBranchOpt : public MachineFunctionPass { + static char ID; + + LoongArchLateBranchOpt() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { + return LOONGARCH_LATE_BRANCH_OPT_NAME; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &Fn) override; + +private: + bool runOnBasicBlock(MachineBasicBlock &MBB) const; + + bool isLoadImm(const MachineInstr *MI, int64_t &Imm) const; + bool isFromLoadImm(const MachineOperand &Op, int64_t &Imm) const; + + bool evaluateCondBranch(unsigned Opc, int64_t C0, int64_t C1) const; + + const LoongArchSubtarget *ST = nullptr; + MachineRegisterInfo *MRI; +}; +} // namespace + +char LoongArchLateBranchOpt::ID = 0; +INITIALIZE_PASS(LoongArchLateBranchOpt, "loongarch-late-branch-opt", + LOONGARCH_LATE_BRANCH_OPT_NAME, false, false) + +// Return true if the instruction is a load immediate instruction. +// TODO: Need more consideration? +bool LoongArchLateBranchOpt::isLoadImm(const MachineInstr *MI, + int64_t &Imm) const { + unsigned Addi = ST->is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W; + if (MI->getOpcode() == Addi && MI->getOperand(1).isReg() && + MI->getOperand(1).getReg() == LoongArch::R0) { + Imm = MI->getOperand(2).getImm(); + return true; + } + if (MI->getOpcode() == LoongArch::ORI && MI->getOperand(1).isReg() && + MI->getOperand(1).getReg() == LoongArch::R0) { + Imm = MI->getOperand(2).getImm(); + return true; + } + return false; +} + +// Return true if the operand is a load immediate instruction and +// sets Imm to the immediate value. +bool LoongArchLateBranchOpt::isFromLoadImm(const MachineOperand &Op, + int64_t &Imm) const { + // Either a load from immediate instruction or R0. + if (!Op.isReg()) + return false; + + Register Reg = Op.getReg(); + if (Reg == LoongArch::R0) { + Imm = 0; + return true; + } + return Reg.isVirtual() && isLoadImm(MRI->getVRegDef(Reg), Imm); +} + +// Return the result of the evaluation of 'C0 CC C1', where CC is the +// condition of Opc and C1 is always zero when Opc is B{EQ/NE/CEQ/CNE}Z. +bool LoongArchLateBranchOpt::evaluateCondBranch(unsigned Opc, int64_t C0, + int64_t C1) const { + switch (Opc) { + default: + llvm_unreachable("Unexpected Opcode."); + case LoongArch::BEQ: + case LoongArch::BEQZ: + case LoongArch::BCEQZ: + return C0 == C1; + case LoongArch::BNE: + case LoongArch::BNEZ: + case LoongArch::BCNEZ: + return C0 != C1; + case LoongArch::BLT: + return C0 < C1; + case LoongArch::BGE: + return C0 >= C1; + case LoongArch::BLTU: + return (uint64_t)C0 < (uint64_t)C1; + case LoongArch::BGEU: + return (uint64_t)C0 >= (uint64_t)C1; + } +} + +bool LoongArchLateBranchOpt::runOnBasicBlock(MachineBasicBlock &MBB) const { + const LoongArchInstrInfo &TII = *ST->getInstrInfo(); + MachineBasicBlock *TBB, *FBB; + SmallVector Cond; + + if (TII.analyzeBranch(MBB, TBB, FBB, Cond, /*AllowModify=*/false)) + return false; + + // LoongArch conditional branch instructions compare two operands (i.e. + // Opc C0, C1, TBB) or one operand with immediate zero (i.e. Opc C0, TBB). + if (!TBB || (Cond.size() != 2 && Cond.size() != 3)) + return false; + + // Try and convert a conditional branch that can be evaluated statically + // into an unconditional branch. + int64_t C0 = 0, C1 = 0; + unsigned Opc = Cond[0].getImm(); + switch (Opc) { + default: + llvm_unreachable("Unexpected Opcode."); + case LoongArch::BEQ: + case LoongArch::BNE: + case LoongArch::BLT: + case LoongArch::BGE: + case LoongArch::BLTU: + case LoongArch::BGEU: + if (!isFromLoadImm(Cond[1], C0) || !isFromLoadImm(Cond[2], C1)) + return false; + break; + case LoongArch::BEQZ: + case LoongArch::BNEZ: + case LoongArch::BCEQZ: + case LoongArch::BCNEZ: + if (!isFromLoadImm(Cond[1], C0)) + return false; + break; + } + + MachineBasicBlock *Folded = evaluateCondBranch(Opc, C0, C1) ? TBB : FBB; + + // At this point, its legal to optimize. + TII.removeBranch(MBB); + + // Only need to insert a branch if we're not falling through. + if (Folded) { + DebugLoc DL = MBB.findBranchDebugLoc(); + TII.insertBranch(MBB, Folded, nullptr, {}, DL); + } + + // Update the successors. Remove them all and add back the correct one. + while (!MBB.succ_empty()) + MBB.removeSuccessor(MBB.succ_end() - 1); + + // If it's a fallthrough, we need to figure out where MBB is going. + if (!Folded) { + MachineFunction::iterator Fallthrough = ++MBB.getIterator(); + if (Fallthrough != MBB.getParent()->end()) + MBB.addSuccessor(&*Fallthrough); + } else + MBB.addSuccessor(Folded); + + return true; +} + +bool LoongArchLateBranchOpt::runOnMachineFunction(MachineFunction &Fn) { + if (skipFunction(Fn.getFunction())) + return false; + + ST = &Fn.getSubtarget(); + MRI = &Fn.getRegInfo(); + + bool Changed = false; + for (MachineBasicBlock &MBB : Fn) + Changed |= runOnBasicBlock(MBB); + return Changed; +} + +FunctionPass *llvm::createLoongArchLateBranchOptPass() { + return new LoongArchLateBranchOpt(); +} diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp index 92a9388e5cb7b..ff878b51a2701 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp @@ -37,6 +37,7 @@ LLVMInitializeLoongArchTarget() { RegisterTargetMachine Y(getTheLoongArch64Target()); auto *PR = PassRegistry::getPassRegistry(); initializeLoongArchDeadRegisterDefinitionsPass(*PR); + initializeLoongArchLateBranchOptPass(*PR); initializeLoongArchMergeBaseOffsetOptPass(*PR); initializeLoongArchOptWInstrsPass(*PR); initializeLoongArchPreRAExpandPseudoPass(*PR); @@ -205,7 +206,11 @@ LoongArchTargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(std::make_unique(this, F)); } -void LoongArchPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); } +void LoongArchPassConfig::addPreEmitPass() { + if (getOptLevel() != CodeGenOptLevel::None) + addPass(createLoongArchLateBranchOptPass()); + addPass(&BranchRelaxationPassID); +} void LoongArchPassConfig::addPreEmitPass2() { addPass(createLoongArchExpandPseudoPass()); diff --git a/llvm/test/CodeGen/LoongArch/jr-without-ra.ll b/llvm/test/CodeGen/LoongArch/jr-without-ra.ll index 1a1fe0e2b19e2..750ff5bc6f2a4 100644 --- a/llvm/test/CodeGen/LoongArch/jr-without-ra.ll +++ b/llvm/test/CodeGen/LoongArch/jr-without-ra.ll @@ -74,7 +74,6 @@ define void @jr_without_ra(ptr %rtwdev, ptr %chan, ptr %h2c, i8 %.pre, i1 %cmp.i ; CHECK-NEXT: # %bb.5: # %calc_6g.i ; CHECK-NEXT: # in Loop: Header=BB0_4 Depth=1 ; CHECK-NEXT: move $s7, $zero -; CHECK-NEXT: bnez $zero, .LBB0_8 ; CHECK-NEXT: # %bb.6: # %calc_6g.i ; CHECK-NEXT: # in Loop: Header=BB0_4 Depth=1 ; CHECK-NEXT: slli.d $s8, $zero, 3 @@ -120,7 +119,6 @@ define void @jr_without_ra(ptr %rtwdev, ptr %chan, ptr %h2c, i8 %.pre, i1 %cmp.i ; CHECK-NEXT: bnez $s3, .LBB0_1 ; CHECK-NEXT: # %bb.14: # %phy_tssi_get_ofdm_trim_de.exit ; CHECK-NEXT: # in Loop: Header=BB0_4 Depth=1 -; CHECK-NEXT: bnez $zero, .LBB0_3 ; CHECK-NEXT: b .LBB0_2 ; CHECK-NEXT: .LBB0_15: # %sw.bb9.i.i ; CHECK-NEXT: ld.d $s8, $sp, 8 # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll index 661f67d4989c4..d953cef1fd4c9 100644 --- a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll +++ b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll @@ -166,6 +166,7 @@ ; LAXX-NEXT: Insert fentry calls ; LAXX-NEXT: Insert XRay ops ; LAXX-NEXT: Implement the 'patchable-function' attribute +; LAXX-NEXT: LoongArch Late Branch Optimisation Pass ; LAXX-NEXT: Branch relaxation pass ; LAXX-NEXT: Contiguously Lay Out Funclets ; LAXX-NEXT: Remove Loads Into Fake Uses From 50e77026750583eed56e2e2ef5fdb5787ac410af Mon Sep 17 00:00:00 2001 From: Qi Zhao Date: Wed, 19 Nov 2025 15:14:55 +0800 Subject: [PATCH 2/3] tests passed --- .../Inputs/loongarch_generated_funcs.ll.generated.expected | 2 +- .../Inputs/loongarch_generated_funcs.ll.nogenerated.expected | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.generated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.generated.expected index eda7e771c128b..005224e8951d5 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.generated.expected +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.generated.expected @@ -75,7 +75,7 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" } ; CHECK-NEXT: st.w $zero, $fp, -12 ; CHECK-NEXT: st.w $zero, $fp, -16 ; CHECK-NEXT: ori $a0, $zero, 1 -; CHECK-NEXT: beq $zero, $zero, .LBB0_3 +; CHECK-NEXT: b .LBB0_3 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: st.w $a0, $fp, -24 ; CHECK-NEXT: ld.w $a0, $fp, -16 diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.nogenerated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.nogenerated.expected index aab63fa7176c1..f58e876277313 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.nogenerated.expected +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.nogenerated.expected @@ -16,7 +16,7 @@ define dso_local i32 @check_boundaries() #0 { ; CHECK-NEXT: st.w $zero, $fp, -12 ; CHECK-NEXT: st.w $zero, $fp, -16 ; CHECK-NEXT: ori $a0, $zero, 1 -; CHECK-NEXT: beq $zero, $zero, .LBB0_3 +; CHECK-NEXT: b .LBB0_3 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: st.w $a0, $fp, -24 ; CHECK-NEXT: ld.w $a0, $fp, -16 From 208be487d8316896b5de8ddbc24f1bd5728aa703 Mon Sep 17 00:00:00 2001 From: Qi Zhao Date: Wed, 19 Nov 2025 15:17:28 +0800 Subject: [PATCH 3/3] update isLoadImm --- .../LoongArch/LoongArchLateBranchOpt.cpp | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchLateBranchOpt.cpp b/llvm/lib/Target/LoongArch/LoongArchLateBranchOpt.cpp index 00474c08547ec..356ebd51df34f 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLateBranchOpt.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchLateBranchOpt.cpp @@ -54,21 +54,27 @@ INITIALIZE_PASS(LoongArchLateBranchOpt, "loongarch-late-branch-opt", LOONGARCH_LATE_BRANCH_OPT_NAME, false, false) // Return true if the instruction is a load immediate instruction. -// TODO: Need more consideration? +// TODO: More consideration? bool LoongArchLateBranchOpt::isLoadImm(const MachineInstr *MI, int64_t &Imm) const { - unsigned Addi = ST->is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W; - if (MI->getOpcode() == Addi && MI->getOperand(1).isReg() && - MI->getOperand(1).getReg() == LoongArch::R0) { - Imm = MI->getOperand(2).getImm(); + unsigned Shift = 0; + switch (MI->getOpcode()) { + default: + return false; + case LoongArch::LU52I_D: + Shift = 52; + [[fallthrough]]; + case LoongArch::ORI: + case LoongArch::ADDI_W: + if (!MI->getOperand(1).isReg() || + MI->getOperand(1).getReg() != LoongArch::R0) + return false; + Imm = MI->getOperand(2).getImm() << Shift; return true; - } - if (MI->getOpcode() == LoongArch::ORI && MI->getOperand(1).isReg() && - MI->getOperand(1).getReg() == LoongArch::R0) { - Imm = MI->getOperand(2).getImm(); + case LoongArch::LU12I_W: + Imm = MI->getOperand(1).getImm() << 12; return true; } - return false; } // Return true if the operand is a load immediate instruction and