-
Notifications
You must be signed in to change notification settings - Fork 11k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Riscv branch peephole opt #90451
base: main
Are you sure you want to change the base?
Riscv branch peephole opt #90451
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Zhijin Zeng (zengdage) ChangesAfter
Full diff: https://github.com/llvm/llvm-project/pull/90451.diff 6 Files Affected:
diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt
index 8715403f3839a6..429a22a5323795 100644
--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -45,6 +45,7 @@ add_llvm_target(RISCVCodeGen
RISCVISelLowering.cpp
RISCVMachineFunctionInfo.cpp
RISCVMergeBaseOffset.cpp
+ RISCVMIPeepholeOpt.cpp
RISCVOptWInstrs.cpp
RISCVPostRAExpandPseudoInsts.cpp
RISCVRedundantCopyElimination.cpp
diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h
index d405395dcf9ec4..d808eca6169662 100644
--- a/llvm/lib/Target/RISCV/RISCV.h
+++ b/llvm/lib/Target/RISCV/RISCV.h
@@ -78,6 +78,9 @@ void initializeRISCVRedundantCopyEliminationPass(PassRegistry &);
FunctionPass *createRISCVMoveMergePass();
void initializeRISCVMoveMergePass(PassRegistry &);
+FunctionPass *createRISCVMIPeepholeOptPass();
+void initializeRISCVMIPeepholeOptPass(PassRegistry &);
+
FunctionPass *createRISCVPushPopOptimizationPass();
void initializeRISCVPushPopOptPass(PassRegistry &);
diff --git a/llvm/lib/Target/RISCV/RISCVMIPeepholeOpt.cpp b/llvm/lib/Target/RISCV/RISCVMIPeepholeOpt.cpp
new file mode 100644
index 00000000000000..e7d8328985ceda
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVMIPeepholeOpt.cpp
@@ -0,0 +1,121 @@
+//===- RISCVMIPeepholeOpt.cpp - RISC-V MI peephole optimization pass ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs below peephole optimizations on MIR level.
+//
+// 1. Remove redundant branch instrunctions which may be generated by
+// block-placement.
+//
+// bne a1, a1, %bb.1
+// blt a1, a1, %bb.1
+// bltu a1, a1, %bb.1
+//
+// These instruction is bound to fallthrough to next basic block, rather
+// than into the %bb.1, so it should be removed here.
+//
+// 2. beq a1, a1, %bb.1 -> j %bb.1
+// bge a1, a1, %bb.1 -> j %bb.1
+// bgeu a1, a1, %bb.1 -> j %bb.1
+//
+// These instruction is bound to go into %bb.1, so it should be replaced by
+// j pseudo instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCVInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-mi-peephole-opt"
+
+namespace {
+
+struct RISCVMIPeepholeOpt : public MachineFunctionPass {
+ static char ID;
+
+ RISCVMIPeepholeOpt() : MachineFunctionPass(ID) {
+ initializeRISCVMIPeepholeOptPass(*PassRegistry::getPassRegistry());
+ }
+
+ const RISCVInstrInfo *TII;
+
+ bool visitBranch(MachineInstr &MI);
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override {
+ return "RISC-V MI Peephole Optimization pass";
+ }
+};
+
+char RISCVMIPeepholeOpt::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(RISCVMIPeepholeOpt, "riscv-mi-peephole-opt",
+ "RISC-V MI Peephole Optimization", false, false)
+
+bool RISCVMIPeepholeOpt::visitBranch(MachineInstr &MI) {
+ Register FirstReg = MI.getOperand(0).getReg();
+ Register SecondReg = MI.getOperand(1).getReg();
+
+ if (FirstReg != SecondReg)
+ return false;
+
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case RISCV::BEQ:
+ case RISCV::BGE:
+ case RISCV::BGEU:
+ BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(RISCV::JAL))
+ .addReg(RISCV::X0)
+ .add(MI.getOperand(2));
+ break;
+ }
+ MI.eraseFromParent();
+
+ return true;
+}
+
+bool RISCVMIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ TII = static_cast<const RISCVInstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+ bool Changed = false;
+
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : make_early_inc_range(MBB)) {
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case RISCV::BNE:
+ case RISCV::BEQ:
+ case RISCV::BLT:
+ case RISCV::BLTU:
+ case RISCV::BGE:
+ case RISCV::BGEU:
+ Changed |= visitBranch(MI);
+ break;
+ }
+ }
+ }
+
+ return Changed;
+}
+
+FunctionPass *llvm::createRISCVMIPeepholeOptPass() {
+ return new RISCVMIPeepholeOpt();
+}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 0876f46728a10c..1af33aac601698 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -111,6 +111,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
initializeRISCVCodeGenPreparePass(*PR);
initializeRISCVPostRAExpandPseudoPass(*PR);
initializeRISCVMergeBaseOffsetOptPass(*PR);
+ initializeRISCVMIPeepholeOptPass(*PR);
initializeRISCVOptWInstrsPass(*PR);
initializeRISCVPreRAExpandPseudoPass(*PR);
initializeRISCVExpandPseudoPass(*PR);
@@ -501,6 +502,7 @@ void RISCVPassConfig::addPreEmitPass() {
void RISCVPassConfig::addPreEmitPass2() {
if (TM->getOptLevel() != CodeGenOptLevel::None) {
addPass(createRISCVMoveMergePass());
+ addPass(createRISCVMIPeepholeOptPass());
// Schedule PushPop Optimization before expansion of Pseudo instruction,
// ensuring return instruction is detected correctly.
addPass(createRISCVPushPopOptimizationPass());
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index 4121d111091117..731d1aea8c14a0 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -195,6 +195,7 @@
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: Stack Frame Layout Analysis
; CHECK-NEXT: RISC-V Zcmp move merging pass
+; CHECK-NEXT: RISC-V MI Peephole Optimization pass
; CHECK-NEXT: RISC-V Zcmp Push/Pop optimization pass
; CHECK-NEXT: RISC-V pseudo instruction expansion pass
; CHECK-NEXT: RISC-V atomic pseudo instruction expansion pass
diff --git a/llvm/test/CodeGen/RISCV/peephole-branch.mir b/llvm/test/CodeGen/RISCV/peephole-branch.mir
new file mode 100644
index 00000000000000..4c5d087269be39
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/peephole-branch.mir
@@ -0,0 +1,464 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -o - %s -mtriple=riscv32 -run-pass=riscv-mi-peephole-opt | FileCheck %s
+
+--- |
+ define void @peephole_bne(ptr %a, ptr %b, ptr %c) {
+ entry:
+ %e = load i32, ptr %a, align 4
+ %p = icmp ne i32 %e, %e
+ br i1 %p, label %block1, label %block2
+
+ block1: ; preds = %entry
+ store i32 %e, ptr %b, align 4
+ br label %end_block
+
+ block2: ; preds = %entry
+ store i32 87, ptr %c, align 4
+ br label %end_block
+
+ end_block: ; preds = %block2, %block1
+ ret void
+ }
+
+ define void @peephole_beq(ptr %a, ptr %b, ptr %c) {
+ entry:
+ %e = load i32, ptr %a, align 4
+ %p = icmp eq i32 %e, %e
+ br i1 %p, label %block1, label %block2
+
+ block1: ; preds = %entry
+ store i32 %e, ptr %b, align 4
+ br label %end_block
+
+ block2: ; preds = %entry
+ store i32 87, ptr %c, align 4
+ br label %end_block
+
+ end_block: ; preds = %block2, %block1
+ ret void
+ }
+
+ define void @peephole_blt(ptr %a, ptr %b, ptr %c) {
+ entry:
+ %e = load i32, ptr %a, align 4
+ %p = icmp slt i32 %e, %e
+ br i1 %p, label %block1, label %block2
+
+ block1: ; preds = %entry
+ store i32 %e, ptr %b, align 4
+ br label %end_block
+
+ block2: ; preds = %entry
+ store i32 87, ptr %c, align 4
+ br label %end_block
+
+ end_block: ; preds = %block2, %block1
+ ret void
+ }
+
+ define void @peephole_bltu(ptr %a, ptr %b, ptr %c) {
+ entry:
+ %e = load i32, ptr %a, align 4
+ %p = icmp ult i32 %e, %e
+ br i1 %p, label %block1, label %block2
+
+ block1: ; preds = %entry
+ store i32 %e, ptr %b, align 4
+ br label %end_block
+
+ block2: ; preds = %entry
+ store i32 87, ptr %c, align 4
+ br label %end_block
+
+ end_block: ; preds = %block2, %block1
+ ret void
+ }
+
+ define void @peephole_bge(ptr %a, ptr %b, ptr %c) {
+ entry:
+ %e = load i32, ptr %a, align 4
+ %p = icmp sge i32 %e, %e
+ br i1 %p, label %block1, label %block2
+
+ block1: ; preds = %entry
+ store i32 %e, ptr %b, align 4
+ br label %end_block
+
+ block2: ; preds = %entry
+ store i32 87, ptr %c, align 4
+ br label %end_block
+
+ end_block: ; preds = %block2, %block1
+ ret void
+ }
+
+ define void @peephole_bgeu(ptr %a, ptr %b, ptr %c) {
+ entry:
+ %e = load i32, ptr %a, align 4
+ %p = icmp uge i32 %e, %e
+ br i1 %p, label %block1, label %block2
+
+ block1: ; preds = %entry
+ store i32 %e, ptr %b, align 4
+ br label %end_block
+
+ block2: ; preds = %entry
+ store i32 87, ptr %c, align 4
+ br label %end_block
+
+ end_block: ; preds = %block2, %block1
+ ret void
+ }
+
+...
+---
+name: peephole_bne
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10' }
+ - { reg: '$x11' }
+ - { reg: '$x12' }
+body: |
+ ; CHECK-LABEL: name: peephole_bne
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $x10, $x11, $x12
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $x13 = ADDI $x0, 1
+ ; CHECK-NEXT: PseudoBR %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.block1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $x10 = LW killed renamable $x10, 0
+ ; CHECK-NEXT: SW killed renamable $x10, killed renamable $x11, 0
+ ; CHECK-NEXT: PseudoBR %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2.block2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $x12
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $x10 = ADDI $x0, 87
+ ; CHECK-NEXT: SW killed renamable $x10, killed renamable $x12, 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3.end_block:
+ ; CHECK-NEXT: PseudoRET
+ bb.0.entry:
+ successors: %bb.1, %bb.2
+ liveins: $x10, $x11, $x12
+
+ renamable $x13 = ADDI $x0, 1
+ BNE killed renamable $x13, $x13, %bb.2
+ PseudoBR %bb.1
+
+ bb.1.block1:
+ liveins: $x10, $x11
+
+ renamable $x10 = LW killed renamable $x10, 0
+ SW killed renamable $x10, killed renamable $x11, 0
+ PseudoBR %bb.3
+
+ bb.2.block2:
+ liveins: $x12
+
+ renamable $x10 = ADDI $x0, 87
+ SW killed renamable $x10, killed renamable $x12, 0
+
+ bb.3.end_block:
+ PseudoRET
+
+...
+---
+name: peephole_beq
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10' }
+ - { reg: '$x11' }
+ - { reg: '$x12' }
+body: |
+ ; CHECK-LABEL: name: peephole_beq
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $x10, $x11, $x12
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: JAL $x0, %bb.2
+ ; CHECK-NEXT: PseudoBR %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.block1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $x10 = LW killed renamable $x10, 0
+ ; CHECK-NEXT: SW killed renamable $x10, killed renamable $x11, 0
+ ; CHECK-NEXT: PseudoBR %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2.block2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $x12
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $x10 = ADDI $x0, 87
+ ; CHECK-NEXT: SW killed renamable $x10, killed renamable $x12, 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3.end_block:
+ ; CHECK-NEXT: PseudoRET
+ bb.0.entry:
+ successors: %bb.1, %bb.2
+ liveins: $x10, $x11, $x12
+
+ BEQ $x0, $x0, %bb.2
+ PseudoBR %bb.1
+
+ bb.1.block1:
+ liveins: $x10, $x11
+
+ renamable $x10 = LW killed renamable $x10, 0
+ SW killed renamable $x10, killed renamable $x11, 0
+ PseudoBR %bb.3
+
+ bb.2.block2:
+ liveins: $x12
+
+ renamable $x10 = ADDI $x0, 87
+ SW killed renamable $x10, killed renamable $x12, 0
+
+ bb.3.end_block:
+ PseudoRET
+
+...
+---
+name: peephole_blt
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10' }
+ - { reg: '$x11' }
+ - { reg: '$x12' }
+body: |
+ ; CHECK-LABEL: name: peephole_blt
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $x10, $x11, $x12
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $x13 = ADDI $x0, 1
+ ; CHECK-NEXT: PseudoBR %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.block1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $x10 = LW killed renamable $x10, 0
+ ; CHECK-NEXT: SW killed renamable $x10, killed renamable $x11, 0
+ ; CHECK-NEXT: PseudoBR %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2.block2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $x12
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $x10 = ADDI $x0, 87
+ ; CHECK-NEXT: SW killed renamable $x10, killed renamable $x12, 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3.end_block:
+ ; CHECK-NEXT: PseudoRET
+ bb.0.entry:
+ successors: %bb.1, %bb.2
+ liveins: $x10, $x11, $x12
+
+ renamable $x13 = ADDI $x0, 1
+ BLT killed renamable $x13, $x13, %bb.2
+ PseudoBR %bb.1
+
+ bb.1.block1:
+ liveins: $x10, $x11
+
+ renamable $x10 = LW killed renamable $x10, 0
+ SW killed renamable $x10, killed renamable $x11, 0
+ PseudoBR %bb.3
+
+ bb.2.block2:
+ liveins: $x12
+
+ renamable $x10 = ADDI $x0, 87
+ SW killed renamable $x10, killed renamable $x12, 0
+
+ bb.3.end_block:
+ PseudoRET
+
+...
+---
+name: peephole_bltu
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10' }
+ - { reg: '$x11' }
+ - { reg: '$x12' }
+body: |
+ ; CHECK-LABEL: name: peephole_bltu
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $x10, $x11, $x12
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $x13 = ADDI $x0, 1
+ ; CHECK-NEXT: PseudoBR %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.block1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $x10 = LW killed renamable $x10, 0
+ ; CHECK-NEXT: SW killed renamable $x10, killed renamable $x11, 0
+ ; CHECK-NEXT: PseudoBR %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2.block2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $x12
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $x10 = ADDI $x0, 87
+ ; CHECK-NEXT: SW killed renamable $x10, killed renamable $x12, 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3.end_block:
+ ; CHECK-NEXT: PseudoRET
+ bb.0.entry:
+ successors: %bb.1, %bb.2
+ liveins: $x10, $x11, $x12
+
+ renamable $x13 = ADDI $x0, 1
+ BLTU killed renamable $x13, $x13, %bb.2
+ PseudoBR %bb.1
+
+ bb.1.block1:
+ liveins: $x10, $x11
+
+ renamable $x10 = LW killed renamable $x10, 0
+ SW killed renamable $x10, killed renamable $x11, 0
+ PseudoBR %bb.3
+
+ bb.2.block2:
+ liveins: $x12
+
+ renamable $x10 = ADDI $x0, 87
+ SW killed renamable $x10, killed renamable $x12, 0
+
+ bb.3.end_block:
+ PseudoRET
+
+...
+---
+name: peephole_bge
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10' }
+ - { reg: '$x11' }
+ - { reg: '$x12' }
+body: |
+ ; CHECK-LABEL: name: peephole_bge
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $x10, $x11, $x12
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: JAL $x0, %bb.2
+ ; CHECK-NEXT: PseudoBR %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.block1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $x10 = LW killed renamable $x10, 0
+ ; CHECK-NEXT: SW killed renamable $x10, killed renamable $x11, 0
+ ; CHECK-NEXT: PseudoBR %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2.block2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $x12
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $x10 = ADDI $x0, 87
+ ; CHECK-NEXT: SW killed renamable $x10, killed renamable $x12, 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3.end_block:
+ ; CHECK-NEXT: PseudoRET
+ bb.0.entry:
+ successors: %bb.1, %bb.2
+ liveins: $x10, $x11, $x12
+
+ BGE $x0, $x0, %bb.2
+ PseudoBR %bb.1
+
+ bb.1.block1:
+ liveins: $x10, $x11
+
+ renamable $x10 = LW killed renamable $x10, 0
+ SW killed renamable $x10, killed renamable $x11, 0
+ PseudoBR %bb.3
+
+ bb.2.block2:
+ liveins: $x12
+
+ renamable $x10 = ADDI $x0, 87
+ SW killed renamable $x10, killed renamable $x12, 0
+
+ bb.3.end_block:
+ PseudoRET
+
+...
+---
+name: peephole_bgeu
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10' }
+ - { reg: '$x11' }
+ - { reg: '$x12' }
+body: |
+ ; CHECK-LABEL: name: peephole_bgeu
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $x10, $x11, $x12
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: JAL $x0, %bb.2
+ ; CHECK-NEXT: PseudoBR %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.block1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $x10 = LW killed renamable $x10, 0
+ ; CHECK-NEXT: SW killed renamable $x10, killed renamable $x11, 0
+ ; CHECK-NEXT: PseudoBR %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2.block2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $x12
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $x10 = ADDI $x0, 87
+ ; CHECK-NEXT: SW killed renamable $x10, killed renamable $x12, 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3.end_block:
+ ; CHECK-NEXT: PseudoRET
+ bb.0.entry:
+ successors: %bb.1, %bb.2
+ liveins: $x10, $x11, $x12
+
+ BGEU $x0, $x0, %bb.2
+ PseudoBR %bb.1
+
+ bb.1.block1:
+ liveins: $x10, $x11
+
+ renamable $x10 = LW killed renamable $x10, 0
+ SW killed renamable $x10, killed renamable $x11, 0
+ PseudoBR %bb.3
+
+ bb.2.block2:
+ liveins: $x12
+
+ renamable $x10 = ADDI $x0, 87
+ SW killed renamable $x10, killed renamable $x12, 0
+
+ bb.3.end_block:
+ PseudoRET
+
+...
|
Do you have an end-to-end C language example that will produce these instructions? |
The following is the minimal code which can produce the
Build command: |
That compiles to an empty function on compiler explorer https://godbolt.org/z/ss3fqYTo4 |
Is this a widespread problem? In my local testing I see 2 places in perlbench. Do you expect a performance gain from fixing this on perlbench? I'm hesitant to add a new pass for a problem that occurs twice if it doesn't improve performance. |
if (skipFunction(MF.getFunction())) | ||
return false; | ||
|
||
TII = static_cast<const RISCVInstrInfo *>(MF.getSubtarget().getInstrInfo()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
TII = MF.getSubtarget<RISCVSubtarget>().getInstrInfo();
Or you can change TII
to a generic TargetInstrInfo*
and use
TII = MF.getSubtarget().getInstrInfo()
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have changed it.
case RISCV::BEQ: | ||
case RISCV::BGE: | ||
case RISCV::BGEU: | ||
BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(RISCV::JAL)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should use RISCV::PseudoBR
and drop the X0
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have changed it.
Sorry, these are 13 places in my local SPEC2006 perlbench. Build command: A list of these instruction from the above build command. If add
And my llvm project based on
|
9420428
to
6e334c2
Compare
Negligible performance gain for perlbench, may be 0.1%. |
My grep of the disassembly missed the beq/bne x0, x0 cases because they are beqz/bnez with the compressed extension and I didn't check for that. |
Hello, is this pr worth merging? If not, should I close it? |
6e334c2
to
e7dc37d
Compare
After block-placement and machine-cp, the following situations may require optimization. ``` bne a0, a0, %bb.2 -> remove it blt a0, a0, %bb.2 -> remove it bltu a0, a0, %bb.2 -> remove it beq a0, a0, %bb.2 -> br, %bb.2 bge a0, a0, %bb.2 -> br, %bb.2 bgeu a0, a0, %bb.2 -> br, %bb.2 ```
e7dc37d
to
0c58437
Compare
After
block-placement
andmachine-cp
, the following situations may exist and require optimization. I don't know how to do this optimization, so I try to add a new pass and run it aftermachine-cp
. Maybe this is not the right way to do it.