Skip to content

Commit

Permalink
[RISCV] Add sink-and-fold support for RISC-V. (#67602)
Browse files Browse the repository at this point in the history
This uses the recently introduced sink-and-fold support in MachineSink.
https://reviews.llvm.org/D152828
    
This enables folding ADDI into load/store addresses.
    
Enabling by default will be a separate PR.
  • Loading branch information
topperc committed Oct 7, 2023
1 parent 8ee38f3 commit 45636ec
Show file tree
Hide file tree
Showing 9 changed files with 388 additions and 391 deletions.
68 changes: 68 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1907,6 +1907,74 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
return true;
}

bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,
const MachineInstr &AddrI,
ExtAddrMode &AM) const {
switch (MemI.getOpcode()) {
default:
return false;
case RISCV::LB:
case RISCV::LBU:
case RISCV::LH:
case RISCV::LHU:
case RISCV::LW:
case RISCV::LWU:
case RISCV::LD:
case RISCV::FLH:
case RISCV::FLW:
case RISCV::FLD:
case RISCV::SB:
case RISCV::SH:
case RISCV::SW:
case RISCV::SD:
case RISCV::FSH:
case RISCV::FSW:
case RISCV::FSD:
break;
}

if (MemI.getOperand(0).getReg() == Reg)
return false;

if (AddrI.getOpcode() != RISCV::ADDI || !AddrI.getOperand(1).isReg() ||
!AddrI.getOperand(2).isImm())
return false;

int64_t OldOffset = MemI.getOperand(2).getImm();
int64_t Disp = AddrI.getOperand(2).getImm();
int64_t NewOffset = OldOffset + Disp;
if (!STI.is64Bit())
NewOffset = SignExtend64<32>(NewOffset);

if (!isInt<12>(NewOffset))
return false;

AM.BaseReg = AddrI.getOperand(1).getReg();
AM.ScaledReg = 0;
AM.Scale = 0;
AM.Displacement = NewOffset;
AM.Form = ExtAddrMode::Formula::Basic;
return true;
}

MachineInstr *RISCVInstrInfo::emitLdStWithAddr(MachineInstr &MemI,
const ExtAddrMode &AM) const {

const DebugLoc &DL = MemI.getDebugLoc();
MachineBasicBlock &MBB = *MemI.getParent();

assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
"Addressing mode not supported for folding");

return BuildMI(MBB, MemI, DL, get(MemI.getOpcode()))
.addReg(MemI.getOperand(0).getReg(),
MemI.mayLoad() ? RegState::Define : 0)
.addReg(AM.BaseReg)
.addImm(AM.Displacement)
.setMemRefs(MemI.memoperands())
.setMIFlags(MemI.getFlags());
}

// Return true if get the base operand, byte offset of an instruction and the
// memory width. Width is the size of memory that is being loaded/stored.
bool RISCVInstrInfo::getMemOperandWithOffsetWidth(
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,13 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
bool verifyInstruction(const MachineInstr &MI,
StringRef &ErrInfo) const override;

bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,
const MachineInstr &AddrI,
ExtAddrMode &AM) const override;

MachineInstr *emitLdStWithAddr(MachineInstr &MemI,
const ExtAddrMode &AM) const override;

bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt,
const MachineOperand *&BaseOp,
int64_t &Offset, unsigned &Width,
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ bool RISCVInitUndef::processBasicBlock(MachineFunction &MF,
Changed |= handleSubReg(MF, MI, DLD);
if (MI.isImplicitDef()) {
auto DstReg = MI.getOperand(0).getReg();
if (isVectorRegClass(DstReg))
if (DstReg.isVirtual() && isVectorRegClass(DstReg))
Changed |= handleImplicitDef(MBB, I);
}
}
Expand Down
9 changes: 8 additions & 1 deletion llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,11 @@ static cl::opt<bool> EnableRISCVDeadRegisterElimination(
" them with stores to x0"),
cl::init(true));

static cl::opt<bool>
EnableSinkFold("riscv-enable-sink-fold",
cl::desc("Enable sinking and folding of instruction copies"),
cl::init(false), cl::Hidden);

extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
Expand Down Expand Up @@ -242,7 +247,9 @@ namespace {
class RISCVPassConfig : public TargetPassConfig {
public:
RISCVPassConfig(RISCVTargetMachine &TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {}
: TargetPassConfig(TM, PM) {
setEnableSinkAndFold(EnableSinkFold);
}

RISCVTargetMachine &getRISCVTargetMachine() const {
return getTM<RISCVTargetMachine>();
Expand Down
32 changes: 14 additions & 18 deletions llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV32I %s
; RUN: -riscv-enable-sink-fold | FileCheck -check-prefix=RV32I %s
; RUN: llc -mtriple=riscv32 -verify-machineinstrs -code-model=medium < %s \
; RUN: | FileCheck -check-prefix=RV32I-MEDIUM %s
; RUN: -riscv-enable-sink-fold | FileCheck -check-prefix=RV32I-MEDIUM %s
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV64I %s
; RUN: -riscv-enable-sink-fold | FileCheck -check-prefix=RV64I %s
; RUN: llc -mtriple=riscv64 -verify-machineinstrs -code-model=medium < %s \
; RUN: | FileCheck -check-prefix=RV64I-MEDIUM %s
; RUN: -riscv-enable-sink-fold | FileCheck -check-prefix=RV64I-MEDIUM %s

; We can often fold an ADDI into the offset of load/store instructions:
; (load (addi base, off1), off2) -> (load base, off1+off2)
Expand Down Expand Up @@ -769,14 +769,13 @@ define i64 @fold_addi_from_different_bb(i64 %k, i64 %n, ptr %a) nounwind {
; RV32I-NEXT: li s3, 0
; RV32I-NEXT: li s4, 0
; RV32I-NEXT: slli a0, a0, 4
; RV32I-NEXT: add a0, s0, a0
; RV32I-NEXT: addi s7, a0, 8
; RV32I-NEXT: add s7, s0, a0
; RV32I-NEXT: .LBB20_5: # %for.body
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call f@plt
; RV32I-NEXT: lw a0, 4(s7)
; RV32I-NEXT: lw a1, 0(s7)
; RV32I-NEXT: lw a0, 12(s7)
; RV32I-NEXT: lw a1, 8(s7)
; RV32I-NEXT: add a0, a0, s4
; RV32I-NEXT: add s3, a1, s3
; RV32I-NEXT: sltu s4, s3, a1
Expand Down Expand Up @@ -835,14 +834,13 @@ define i64 @fold_addi_from_different_bb(i64 %k, i64 %n, ptr %a) nounwind {
; RV32I-MEDIUM-NEXT: li s3, 0
; RV32I-MEDIUM-NEXT: li s4, 0
; RV32I-MEDIUM-NEXT: slli a0, a0, 4
; RV32I-MEDIUM-NEXT: add a0, s0, a0
; RV32I-MEDIUM-NEXT: addi s7, a0, 8
; RV32I-MEDIUM-NEXT: add s7, s0, a0
; RV32I-MEDIUM-NEXT: .LBB20_5: # %for.body
; RV32I-MEDIUM-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-MEDIUM-NEXT: mv a0, s0
; RV32I-MEDIUM-NEXT: call f@plt
; RV32I-MEDIUM-NEXT: lw a0, 4(s7)
; RV32I-MEDIUM-NEXT: lw a1, 0(s7)
; RV32I-MEDIUM-NEXT: lw a0, 12(s7)
; RV32I-MEDIUM-NEXT: lw a1, 8(s7)
; RV32I-MEDIUM-NEXT: add a0, a0, s4
; RV32I-MEDIUM-NEXT: add s3, a1, s3
; RV32I-MEDIUM-NEXT: sltu s4, s3, a1
Expand Down Expand Up @@ -883,13 +881,12 @@ define i64 @fold_addi_from_different_bb(i64 %k, i64 %n, ptr %a) nounwind {
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: li s2, 0
; RV64I-NEXT: slli a0, a0, 4
; RV64I-NEXT: add a0, a2, a0
; RV64I-NEXT: addi s3, a0, 8
; RV64I-NEXT: add s3, a2, a0
; RV64I-NEXT: .LBB20_2: # %for.body
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call f@plt
; RV64I-NEXT: ld a0, 0(s3)
; RV64I-NEXT: ld a0, 8(s3)
; RV64I-NEXT: addi s1, s1, -1
; RV64I-NEXT: add s2, a0, s2
; RV64I-NEXT: bnez s1, .LBB20_2
Expand Down Expand Up @@ -920,13 +917,12 @@ define i64 @fold_addi_from_different_bb(i64 %k, i64 %n, ptr %a) nounwind {
; RV64I-MEDIUM-NEXT: mv s1, a1
; RV64I-MEDIUM-NEXT: li s2, 0
; RV64I-MEDIUM-NEXT: slli a0, a0, 4
; RV64I-MEDIUM-NEXT: add a0, a2, a0
; RV64I-MEDIUM-NEXT: addi s3, a0, 8
; RV64I-MEDIUM-NEXT: add s3, a2, a0
; RV64I-MEDIUM-NEXT: .LBB20_2: # %for.body
; RV64I-MEDIUM-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-MEDIUM-NEXT: mv a0, s0
; RV64I-MEDIUM-NEXT: call f@plt
; RV64I-MEDIUM-NEXT: ld a0, 0(s3)
; RV64I-MEDIUM-NEXT: ld a0, 8(s3)
; RV64I-MEDIUM-NEXT: addi s1, s1, -1
; RV64I-MEDIUM-NEXT: add s2, a0, s2
; RV64I-MEDIUM-NEXT: bnez s1, .LBB20_2
Expand Down

0 comments on commit 45636ec

Please sign in to comment.