Skip to content

Commit

Permalink
[RISCV] Enable machine copy propagation for copy-like instructions
Browse files Browse the repository at this point in the history
Like what has been done in AArch64 (D125335).

We enable this under `-O2` to show the codegen diffs here but we
may only do this under `-O3` like AArch64.

There are two cases that we may produce these eliminable copies:
1. ISel of `FrameIndex`. Like `rvv/fixed-vectors-calling-conv.ll`.
2. Tail duplication. Like `select-optimize-multiple.ll`.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D144535
  • Loading branch information
pcwang-thead committed Mar 7, 2023
1 parent 3b1240e commit 5fdab3c
Show file tree
Hide file tree
Showing 13 changed files with 103 additions and 38 deletions.
11 changes: 11 additions & 0 deletions llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
Expand Up @@ -66,6 +66,11 @@ static cl::opt<int> RVVVectorBitsMinOpt(
"autovectorization with fixed width vectors."),
cl::init(-1), cl::Hidden);

static cl::opt<bool> EnableRISCVCopyPropagation(
"riscv-enable-copy-propagation",
cl::desc("Enable the copy propagation with RISCV copy instr"),
cl::init(true), cl::Hidden);

extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
Expand Down Expand Up @@ -336,6 +341,12 @@ void RISCVPassConfig::addPreEmitPass() {

void RISCVPassConfig::addPreEmitPass2() {
addPass(createRISCVExpandPseudoPass());

// Do the copy propagation after expanding pseudos because we may produce some
// MVs when expanding.
if (TM->getOptLevel() >= CodeGenOpt::Default && EnableRISCVCopyPropagation)
addPass(createMachineCopyPropagationPass(true));

// Schedule the expansion of AMOs at the last possible moment, avoiding the
// possibility for other passes to break the requirements for forward
// progress in the LR/SC block.
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/RISCV/O3-pipeline.ll
Expand Up @@ -176,6 +176,7 @@
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: Stack Frame Layout Analysis
; CHECK-NEXT: RISCV pseudo instruction expansion pass
; CHECK-NEXT: Machine Copy Propagation Pass
; CHECK-NEXT: RISCV atomic pseudo instruction expansion pass
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Machine Optimization Remark Emitter
Expand Down
64 changes: 64 additions & 0 deletions llvm/test/CodeGen/RISCV/copyprop.ll
@@ -0,0 +1,64 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -O3 -mtriple=riscv64 -riscv-enable-copy-propagation=false | FileCheck %s --check-prefix=NOPROP
; RUN: llc < %s -O3 -mtriple=riscv64 -riscv-enable-copy-propagation=true | FileCheck %s --check-prefix=PROP

define void @copyprop_after_mbp(i32 %v, i32* %a, i32* %b, i32* %c, i32* %d) {
; NOPROP-LABEL: copyprop_after_mbp:
; NOPROP: # %bb.0:
; NOPROP-NEXT: sext.w a0, a0
; NOPROP-NEXT: li a5, 10
; NOPROP-NEXT: bne a0, a5, .LBB0_2
; NOPROP-NEXT: # %bb.1: # %bb.0
; NOPROP-NEXT: li a0, 15
; NOPROP-NEXT: sw a0, 0(a2)
; NOPROP-NEXT: li a0, 1
; NOPROP-NEXT: sw a0, 0(a1)
; NOPROP-NEXT: li a0, 12
; NOPROP-NEXT: sw a0, 0(a4)
; NOPROP-NEXT: ret
; NOPROP-NEXT: .LBB0_2: # %bb.1
; NOPROP-NEXT: li a0, 0
; NOPROP-NEXT: li a2, 25
; NOPROP-NEXT: sw a2, 0(a3)
; NOPROP-NEXT: sw a0, 0(a1)
; NOPROP-NEXT: li a0, 12
; NOPROP-NEXT: sw a0, 0(a4)
; NOPROP-NEXT: ret
;
; PROP-LABEL: copyprop_after_mbp:
; PROP: # %bb.0:
; PROP-NEXT: sext.w a0, a0
; PROP-NEXT: li a5, 10
; PROP-NEXT: bne a0, a5, .LBB0_2
; PROP-NEXT: # %bb.1: # %bb.0
; PROP-NEXT: li a0, 15
; PROP-NEXT: sw a0, 0(a2)
; PROP-NEXT: li a0, 1
; PROP-NEXT: sw a0, 0(a1)
; PROP-NEXT: li a0, 12
; PROP-NEXT: sw a0, 0(a4)
; PROP-NEXT: ret
; PROP-NEXT: .LBB0_2: # %bb.1
; PROP-NEXT: li a2, 25
; PROP-NEXT: sw a2, 0(a3)
; PROP-NEXT: sw zero, 0(a1)
; PROP-NEXT: li a0, 12
; PROP-NEXT: sw a0, 0(a4)
; PROP-NEXT: ret
%1 = icmp eq i32 %v, 10
br i1 %1, label %bb.0, label %bb.1

bb.0:
store i32 15, i32* %b, align 4
br label %bb.2

bb.1:
store i32 25, i32* %c, align 4
br label %bb.2

bb.2:
%2 = phi i32 [ 1, %bb.0 ], [ 0, %bb.1 ]
store i32 %2, i32* %a, align 4
store i32 12, i32* %d, align 4
ret void
}
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/RISCV/fpclamptosat.ll
Expand Up @@ -92,7 +92,7 @@ define i32 @stest_f64i32(double %x) {
; RV64IFD-NEXT: ret
; RV64IFD-NEXT: .LBB0_3: # %entry
; RV64IFD-NEXT: mv a0, a2
; RV64IFD-NEXT: blt a1, a0, .LBB0_2
; RV64IFD-NEXT: blt a1, a2, .LBB0_2
; RV64IFD-NEXT: .LBB0_4: # %entry
; RV64IFD-NEXT: lui a0, 524288
; RV64IFD-NEXT: ret
Expand Down Expand Up @@ -273,7 +273,7 @@ define i32 @stest_f32i32(float %x) {
; RV64-NEXT: ret
; RV64-NEXT: .LBB3_3: # %entry
; RV64-NEXT: mv a0, a2
; RV64-NEXT: blt a1, a0, .LBB3_2
; RV64-NEXT: blt a1, a2, .LBB3_2
; RV64-NEXT: .LBB3_4: # %entry
; RV64-NEXT: lui a0, 524288
; RV64-NEXT: ret
Expand Down Expand Up @@ -2043,7 +2043,7 @@ define i32 @stest_f64i32_mm(double %x) {
; RV64IFD-NEXT: ret
; RV64IFD-NEXT: .LBB27_3: # %entry
; RV64IFD-NEXT: mv a0, a2
; RV64IFD-NEXT: blt a1, a0, .LBB27_2
; RV64IFD-NEXT: blt a1, a2, .LBB27_2
; RV64IFD-NEXT: .LBB27_4: # %entry
; RV64IFD-NEXT: lui a0, 524288
; RV64IFD-NEXT: ret
Expand Down Expand Up @@ -2211,7 +2211,7 @@ define i32 @stest_f32i32_mm(float %x) {
; RV64-NEXT: ret
; RV64-NEXT: .LBB30_3: # %entry
; RV64-NEXT: mv a0, a2
; RV64-NEXT: blt a1, a0, .LBB30_2
; RV64-NEXT: blt a1, a2, .LBB30_2
; RV64-NEXT: .LBB30_4: # %entry
; RV64-NEXT: lui a0, 524288
; RV64-NEXT: ret
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll
Expand Up @@ -27,7 +27,7 @@ define <2 x i32> @stest_f64i32(<2 x double> %x) {
; CHECK-NOV-NEXT: blt a0, a3, .LBB0_2
; CHECK-NOV-NEXT: .LBB0_6: # %entry
; CHECK-NOV-NEXT: mv a0, a3
; CHECK-NOV-NEXT: blt a2, a0, .LBB0_3
; CHECK-NOV-NEXT: blt a2, a3, .LBB0_3
; CHECK-NOV-NEXT: .LBB0_7: # %entry
; CHECK-NOV-NEXT: lui a0, 524288
; CHECK-NOV-NEXT: blt a2, a1, .LBB0_4
Expand Down Expand Up @@ -182,7 +182,7 @@ define <4 x i32> @stest_f32i32(<4 x float> %x) {
; CHECK-NOV-NEXT: blt a5, a6, .LBB3_4
; CHECK-NOV-NEXT: .LBB3_13: # %entry
; CHECK-NOV-NEXT: mv a5, a6
; CHECK-NOV-NEXT: blt a3, a5, .LBB3_5
; CHECK-NOV-NEXT: blt a3, a6, .LBB3_5
; CHECK-NOV-NEXT: .LBB3_14: # %entry
; CHECK-NOV-NEXT: lui a5, 524288
; CHECK-NOV-NEXT: blt a3, a4, .LBB3_6
Expand Down Expand Up @@ -431,7 +431,7 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: blt a3, a4, .LBB6_4
; CHECK-NOV-NEXT: .LBB6_13: # %entry
; CHECK-NOV-NEXT: mv a3, a4
; CHECK-NOV-NEXT: blt a1, a3, .LBB6_5
; CHECK-NOV-NEXT: blt a1, a4, .LBB6_5
; CHECK-NOV-NEXT: .LBB6_14: # %entry
; CHECK-NOV-NEXT: lui a3, 524288
; CHECK-NOV-NEXT: blt a1, a2, .LBB6_6
Expand Down Expand Up @@ -3324,7 +3324,7 @@ define <2 x i32> @stest_f64i32_mm(<2 x double> %x) {
; CHECK-NOV-NEXT: blt a0, a3, .LBB27_2
; CHECK-NOV-NEXT: .LBB27_6: # %entry
; CHECK-NOV-NEXT: mv a0, a3
; CHECK-NOV-NEXT: blt a2, a0, .LBB27_3
; CHECK-NOV-NEXT: blt a2, a3, .LBB27_3
; CHECK-NOV-NEXT: .LBB27_7: # %entry
; CHECK-NOV-NEXT: lui a0, 524288
; CHECK-NOV-NEXT: blt a2, a1, .LBB27_4
Expand Down Expand Up @@ -3474,7 +3474,7 @@ define <4 x i32> @stest_f32i32_mm(<4 x float> %x) {
; CHECK-NOV-NEXT: blt a5, a6, .LBB30_4
; CHECK-NOV-NEXT: .LBB30_13: # %entry
; CHECK-NOV-NEXT: mv a5, a6
; CHECK-NOV-NEXT: blt a3, a5, .LBB30_5
; CHECK-NOV-NEXT: blt a3, a6, .LBB30_5
; CHECK-NOV-NEXT: .LBB30_14: # %entry
; CHECK-NOV-NEXT: lui a5, 524288
; CHECK-NOV-NEXT: blt a3, a4, .LBB30_6
Expand Down Expand Up @@ -3718,7 +3718,7 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: blt a3, a4, .LBB33_4
; CHECK-NOV-NEXT: .LBB33_13: # %entry
; CHECK-NOV-NEXT: mv a3, a4
; CHECK-NOV-NEXT: blt a1, a3, .LBB33_5
; CHECK-NOV-NEXT: blt a1, a4, .LBB33_5
; CHECK-NOV-NEXT: .LBB33_14: # %entry
; CHECK-NOV-NEXT: lui a3, 524288
; CHECK-NOV-NEXT: blt a1, a2, .LBB33_6
Expand Down
12 changes: 4 additions & 8 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll
Expand Up @@ -1188,17 +1188,15 @@ define <32 x i32> @vector_arg_via_stack(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4,
; LMULMAX8-LABEL: vector_arg_via_stack:
; LMULMAX8: # %bb.0:
; LMULMAX8-NEXT: li a0, 32
; LMULMAX8-NEXT: mv a1, sp
; LMULMAX8-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; LMULMAX8-NEXT: vle32.v v16, (a1)
; LMULMAX8-NEXT: vle32.v v16, (sp)
; LMULMAX8-NEXT: vadd.vv v8, v8, v16
; LMULMAX8-NEXT: ret
;
; LMULMAX4-LABEL: vector_arg_via_stack:
; LMULMAX4: # %bb.0:
; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; LMULMAX4-NEXT: mv a0, sp
; LMULMAX4-NEXT: vle32.v v16, (a0)
; LMULMAX4-NEXT: vle32.v v16, (sp)
; LMULMAX4-NEXT: addi a0, sp, 64
; LMULMAX4-NEXT: vle32.v v20, (a0)
; LMULMAX4-NEXT: vadd.vv v8, v8, v16
Expand All @@ -1210,8 +1208,7 @@ define <32 x i32> @vector_arg_via_stack(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4,
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT: addi a0, sp, 64
; LMULMAX2-NEXT: vle32.v v16, (a0)
; LMULMAX2-NEXT: mv a0, sp
; LMULMAX2-NEXT: vle32.v v18, (a0)
; LMULMAX2-NEXT: vle32.v v18, (sp)
; LMULMAX2-NEXT: addi a0, sp, 32
; LMULMAX2-NEXT: vle32.v v20, (a0)
; LMULMAX2-NEXT: addi a0, sp, 96
Expand All @@ -1235,8 +1232,7 @@ define <32 x i32> @vector_arg_via_stack(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4,
; LMULMAX1-NEXT: vle32.v v19, (a0)
; LMULMAX1-NEXT: addi a0, sp, 32
; LMULMAX1-NEXT: vle32.v v20, (a0)
; LMULMAX1-NEXT: mv a0, sp
; LMULMAX1-NEXT: vle32.v v21, (a0)
; LMULMAX1-NEXT: vle32.v v21, (sp)
; LMULMAX1-NEXT: addi a0, sp, 16
; LMULMAX1-NEXT: vle32.v v22, (a0)
; LMULMAX1-NEXT: addi a0, sp, 48
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/RISCV/sadd_sat.ll
Expand Up @@ -37,7 +37,7 @@ define signext i32 @func(i32 signext %x, i32 signext %y) nounwind {
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB0_3:
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: blt a1, a0, .LBB0_2
; RV64I-NEXT: blt a1, a2, .LBB0_2
; RV64I-NEXT: .LBB0_4:
; RV64I-NEXT: lui a0, 524288
; RV64I-NEXT: ret
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/RISCV/sadd_sat_plus.ll
Expand Up @@ -40,7 +40,7 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB0_3:
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: blt a1, a0, .LBB0_2
; RV64I-NEXT: blt a1, a2, .LBB0_2
; RV64I-NEXT: .LBB0_4:
; RV64I-NEXT: lui a0, 524288
; RV64I-NEXT: ret
Expand Down
14 changes: 4 additions & 10 deletions llvm/test/CodeGen/RISCV/select-optimize-multiple.ll
Expand Up @@ -267,11 +267,9 @@ define i32 @cmovccdep(i32 signext %a, i32 %b, i32 %c, i32 %d) nounwind {
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB6_3: # %entry
; RV32I-NEXT: mv a1, a2
; RV32I-NEXT: mv a2, a1
; RV32I-NEXT: beq a0, a4, .LBB6_2
; RV32I-NEXT: .LBB6_4: # %entry
; RV32I-NEXT: mv a2, a3
; RV32I-NEXT: add a0, a1, a2
; RV32I-NEXT: add a0, a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: cmovccdep:
Expand All @@ -286,11 +284,9 @@ define i32 @cmovccdep(i32 signext %a, i32 %b, i32 %c, i32 %d) nounwind {
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB6_3: # %entry
; RV64I-NEXT: mv a1, a2
; RV64I-NEXT: mv a2, a1
; RV64I-NEXT: beq a0, a4, .LBB6_2
; RV64I-NEXT: .LBB6_4: # %entry
; RV64I-NEXT: mv a2, a3
; RV64I-NEXT: addw a0, a1, a2
; RV64I-NEXT: addw a0, a1, a3
; RV64I-NEXT: ret
entry:
%cmp = icmp eq i32 %a, 123
Expand All @@ -317,8 +313,7 @@ define i32 @cmovdiffcc(i1 %a, i1 %b, i32 %c, i32 %d, i32 %e, i32 %f) nounwind {
; RV32I-NEXT: mv a2, a3
; RV32I-NEXT: bnez a1, .LBB7_2
; RV32I-NEXT: .LBB7_4: # %entry
; RV32I-NEXT: mv a4, a5
; RV32I-NEXT: add a0, a2, a4
; RV32I-NEXT: add a0, a2, a5
; RV32I-NEXT: ret
;
; RV64I-LABEL: cmovdiffcc:
Expand All @@ -335,8 +330,7 @@ define i32 @cmovdiffcc(i1 %a, i1 %b, i32 %c, i32 %d, i32 %e, i32 %f) nounwind {
; RV64I-NEXT: mv a2, a3
; RV64I-NEXT: bnez a1, .LBB7_2
; RV64I-NEXT: .LBB7_4: # %entry
; RV64I-NEXT: mv a4, a5
; RV64I-NEXT: addw a0, a2, a4
; RV64I-NEXT: addw a0, a2, a5
; RV64I-NEXT: ret
entry:
%cond1 = select i1 %a, i32 %c, i32 %d
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/RISCV/split-offsets.ll
Expand Up @@ -61,7 +61,7 @@ define void @test2(ptr %sp, ptr %t, i32 %n) {
; RV32I-NEXT: addi a4, a4, -1920
; RV32I-NEXT: add a1, a1, a4
; RV32I-NEXT: add a0, a0, a4
; RV32I-NEXT: bge a3, a2, .LBB1_2
; RV32I-NEXT: blez a2, .LBB1_2
; RV32I-NEXT: .LBB1_1: # %while_body
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: addi a4, a3, 1
Expand All @@ -70,7 +70,7 @@ define void @test2(ptr %sp, ptr %t, i32 %n) {
; RV32I-NEXT: sw a4, 0(a1)
; RV32I-NEXT: sw a3, 4(a1)
; RV32I-NEXT: mv a3, a4
; RV32I-NEXT: blt a3, a2, .LBB1_1
; RV32I-NEXT: blt a4, a2, .LBB1_1
; RV32I-NEXT: .LBB1_2: # %while_end
; RV32I-NEXT: ret
;
Expand All @@ -83,7 +83,7 @@ define void @test2(ptr %sp, ptr %t, i32 %n) {
; RV64I-NEXT: add a1, a1, a4
; RV64I-NEXT: add a0, a0, a4
; RV64I-NEXT: sext.w a2, a2
; RV64I-NEXT: bge a3, a2, .LBB1_2
; RV64I-NEXT: blez a2, .LBB1_2
; RV64I-NEXT: .LBB1_1: # %while_body
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: addiw a4, a3, 1
Expand All @@ -92,7 +92,7 @@ define void @test2(ptr %sp, ptr %t, i32 %n) {
; RV64I-NEXT: sw a4, 0(a1)
; RV64I-NEXT: sw a3, 4(a1)
; RV64I-NEXT: mv a3, a4
; RV64I-NEXT: blt a3, a2, .LBB1_1
; RV64I-NEXT: blt a4, a2, .LBB1_1
; RV64I-NEXT: .LBB1_2: # %while_end
; RV64I-NEXT: ret
entry:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/RISCV/ssub_sat.ll
Expand Up @@ -37,7 +37,7 @@ define signext i32 @func(i32 signext %x, i32 signext %y) nounwind {
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB0_3:
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: blt a1, a0, .LBB0_2
; RV64I-NEXT: blt a1, a2, .LBB0_2
; RV64I-NEXT: .LBB0_4:
; RV64I-NEXT: lui a0, 524288
; RV64I-NEXT: ret
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/RISCV/ssub_sat_plus.ll
Expand Up @@ -40,7 +40,7 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB0_3:
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: blt a1, a0, .LBB0_2
; RV64I-NEXT: blt a1, a2, .LBB0_2
; RV64I-NEXT: .LBB0_4:
; RV64I-NEXT: lui a0, 524288
; RV64I-NEXT: ret
Expand Down
Expand Up @@ -10,10 +10,9 @@ define ptr @foo(ptr %a0, ptr %a1, i64 %a2) {
; CHECK-NEXT: vsetvli a4, a2, e8, m8, ta, ma
; CHECK-NEXT: bne a4, a2, .LBB0_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a3, a0
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: vle8.v v8, (a1)
; CHECK-NEXT: vse8.v v8, (a3)
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_2: # %if.then
; CHECK-NEXT: add a2, a0, a2
Expand Down

0 comments on commit 5fdab3c

Please sign in to comment.