diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 7d142d38d0f9a..294927aecb94b 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -970,6 +970,16 @@ def TuneLUIADDIFusion : SubtargetFeature<"lui-addi-fusion", "HasLUIADDIFusion", "true", "Enable LUI+ADDI macrofusion">; +def TuneAUIPCADDIFusion + : SubtargetFeature<"auipc-addi-fusion", "HasAUIPCADDIFusion", + "true", "Enable AUIPC+ADDI macrofusion">; +def TuneShiftedZExtFusion + : SubtargetFeature<"shifted-zext-fusion", "HasShiftedZExtFusion", + "true", "Enable SLLI+SRLI to be fused when computing (shifted) zero extension">; +def TuneLDADDFusion + : SubtargetFeature<"ld-add-fusion", "HasLDADDFusion", + "true", "Enable LD+ADD macrofusion.">; + def TuneNoDefaultUnroll : SubtargetFeature<"no-default-unroll", "EnableDefaultUnroll", "false", "Disable default unroll preference.">; @@ -987,9 +997,12 @@ def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7", [TuneNoDefaultUnroll, TuneShortForwardBranchOpt]>; -def TuneVentanaVeyron : SubtargetFeature<"ventana-veyron", "RISCVProcFamily", "VentanaVeyron", - "Ventana-Veyron Series processors", - [TuneLUIADDIFusion]>; +def TuneVeyronFusions : SubtargetFeature<"ventana-veyron", "RISCVProcFamily", "VentanaVeyron", + "Ventana Veyron-Series processors", + [TuneLUIADDIFusion, + TuneAUIPCADDIFusion, + TuneShiftedZExtFusion, + TuneLDADDFusion]>; // Assume that lock-free native-width atomics are available, even if the target // and operating system combination would not usually provide them. The user diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp b/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp index 02a8d5c18fe1a..02ea5270823d8 100644 --- a/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp +++ b/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp @@ -18,6 +18,101 @@ using namespace llvm; +static bool checkRegisters(Register FirstDest, const MachineInstr &SecondMI) { + if (!SecondMI.getOperand(1).isReg()) + return false; + + if (SecondMI.getOperand(1).getReg() != FirstDest) + return false; + + // If the input is virtual make sure this is the only user. + if (FirstDest.isVirtual()) { + auto &MRI = SecondMI.getMF()->getRegInfo(); + return MRI.hasOneNonDBGUse(FirstDest); + } + + return SecondMI.getOperand(0).getReg() == FirstDest; +} + +// Fuse load with add: +// add rd, rs1, rs2 +// ld rd, 0(rd) +static bool isLDADD(const MachineInstr *FirstMI, const MachineInstr &SecondMI) { + if (SecondMI.getOpcode() != RISCV::LD) + return false; + + if (!SecondMI.getOperand(2).isImm()) + return false; + + if (SecondMI.getOperand(2).getImm() != 0) + return false; + + // Given SecondMI, when FirstMI is unspecified, we must return + // if SecondMI may be part of a fused pair at all. + if (!FirstMI) + return true; + + if (FirstMI->getOpcode() != RISCV::ADD) + return true; + + return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI); +} + +// Fuse these patterns: +// +// slli rd, rs1, 32 +// srli rd, rd, x +// where 0 <= x <= 32 +// +// and +// +// slli rd, rs1, 48 +// srli rd, rd, x +static bool isShiftedZExt(const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { + if (SecondMI.getOpcode() != RISCV::SRLI) + return false; + + if (!SecondMI.getOperand(2).isImm()) + return false; + + unsigned SRLIImm = SecondMI.getOperand(2).getImm(); + bool IsShiftBy48 = SRLIImm == 48; + if (SRLIImm > 32 && !IsShiftBy48) + return false; + + // Given SecondMI, when FirstMI is unspecified, we must return + // if SecondMI may be part of a fused pair at all. + if (!FirstMI) + return true; + + if (FirstMI->getOpcode() != RISCV::SLLI) + return false; + + unsigned SLLIImm = FirstMI->getOperand(2).getImm(); + if (IsShiftBy48 ? (SLLIImm != 48) : (SLLIImm != 32)) + return false; + + return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI); +} + +// Fuse AUIPC followed by ADDI +// auipc rd, imm20 +// addi rd, rd, imm12 +static bool isAUIPCADDI(const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { + if (SecondMI.getOpcode() != RISCV::ADDI) + return false; + // Assume the 1st instr to be a wildcard if it is unspecified. + if (!FirstMI) + return true; + + if (FirstMI->getOpcode() != RISCV::AUIPC) + return false; + + return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI); +} + // Fuse LUI followed by ADDI or ADDIW. // rd = imm[31:0] which decomposes to // lui rd, imm[31:12] @@ -27,7 +122,6 @@ static bool isLUIADDI(const MachineInstr *FirstMI, if (SecondMI.getOpcode() != RISCV::ADDI && SecondMI.getOpcode() != RISCV::ADDIW) return false; - // Assume the 1st instr to be a wildcard if it is unspecified. if (!FirstMI) return true; @@ -35,21 +129,7 @@ static bool isLUIADDI(const MachineInstr *FirstMI, if (FirstMI->getOpcode() != RISCV::LUI) return false; - Register FirstDest = FirstMI->getOperand(0).getReg(); - - // Destination of LUI should be the ADDI(W) source register. - if (SecondMI.getOperand(1).getReg() != FirstDest) - return false; - - // If the input is virtual make sure this is the only user. - if (FirstDest.isVirtual()) { - auto &MRI = SecondMI.getMF()->getRegInfo(); - return MRI.hasOneNonDBGUse(FirstDest); - } - - // If the FirstMI destination is non-virtual, it should match the SecondMI - // destination. - return SecondMI.getOperand(0).getReg() == FirstDest; + return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI); } static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, @@ -61,6 +141,15 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, if (ST.hasLUIADDIFusion() && isLUIADDI(FirstMI, SecondMI)) return true; + if (ST.hasAUIPCADDIFusion() && isAUIPCADDI(FirstMI, SecondMI)) + return true; + + if (ST.hasShiftedZExtFusion() && isShiftedZExt(FirstMI, SecondMI)) + return true; + + if (ST.hasLDADDFusion() && isLDADD(FirstMI, SecondMI)) + return true; + return false; } diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index 90ba99d3f845d..58989fd716fa0 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -254,7 +254,7 @@ def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1", FeatureStdExtZicbop, FeatureStdExtZicboz, FeatureVendorXVentanaCondOps], - [TuneVentanaVeyron]>; + [TuneVeyronFusions]>; def XIANGSHAN_NANHU : RISCVProcessorModel<"xiangshan-nanhu", NoSchedModel, diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index 52f00f1f09903..11b470d397b0c 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -192,7 +192,10 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { return UserReservedRegister[i]; } - bool hasMacroFusion() const { return hasLUIADDIFusion(); } + bool hasMacroFusion() const { + return hasLUIADDIFusion() || hasAUIPCADDIFusion() || + hasShiftedZExtFusion() || hasLDADDFusion(); + } // Vector codegen related methods. bool hasVInstructions() const { return HasStdExtZve32x; } diff --git a/llvm/test/CodeGen/RISCV/macro-fusions-veyron-v1.mir b/llvm/test/CodeGen/RISCV/macro-fusions-veyron-v1.mir new file mode 100644 index 0000000000000..6d1e92e997b32 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/macro-fusions-veyron-v1.mir @@ -0,0 +1,159 @@ +# REQUIRES: asserts +# RUN: llc -mtriple=riscv64-linux-gnu -mcpu=veyron-v1 -x=mir < %s \ +# RUN: -debug-only=machine-scheduler -start-before=machine-scheduler 2>&1 \ +# RUN: -mattr=+lui-addi-fusion,+auipc-addi-fusion,+shifted-zext-fusion,+ld-add-fusion \ +# RUN: | FileCheck %s + +# CHECK: lui_addi:%bb.0 +# CHECK: Macro fuse: {{.*}}LUI - ADDI +--- +name: lui_addi +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = LUI 1 + %3:gpr = XORI %1, 2 + %4:gpr = ADDI %2, 3 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: auipc_addi +# CHECK: Macro fuse: {{.*}}AUIPC - ADDI +--- +name: auipc_addi +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = AUIPC 1 + %3:gpr = XORI %1, 2 + %4:gpr = ADDI %2, 3 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: slli_srli +# CHECK: Macro fuse: {{.*}}SLLI - SRLI +--- +name: slli_srli +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = SLLI %1, 32 + %3:gpr = XORI %1, 3 + %4:gpr = SRLI %2, 4 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: slli_srli_48 +# CHECK: Macro fuse: {{.*}}SLLI - SRLI +--- +name: slli_srli_48 +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = SLLI %1, 48 + %3:gpr = XORI %1, 3 + %4:gpr = SRLI %2, 48 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: slli_srli_no_fusion_0 +# CHECK-NOT: Macro fuse: {{.*}}SLLI - SRLI +--- +name: slli_srli_no_fusion_0 +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = SLLI %1, 32 + %3:gpr = XORI %1, 3 + %4:gpr = SRLI %2, 33 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: slli_srli_no_fusion_1 +# CHECK-NOT: Macro fuse: {{.*}}SLLI - SRLI +--- +name: slli_srli_no_fusion_1 +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = SLLI %1, 48 + %3:gpr = XORI %1, 3 + %4:gpr = SRLI %2, 4 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: slli_srli_no_fusion_2 +# CHECK-NOT: Macro fuse: {{.*}}SLLI - SRLI +--- +name: slli_srli_no_fusion_2 +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = SLLI %1, 31 + %3:gpr = XORI %1, 3 + %4:gpr = SRLI %2, 4 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: slli_srli_no_fusion_3 +# CHECK-NOT: Macro fuse: {{.*}}SLLI - SRLI +--- +name: slli_srli_no_fusion_3 +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = SLLI %1, 31 + %3:gpr = XORI %1, 3 + %4:gpr = SRLI %2, 48 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: ld_add +# CHECK: Macro fuse: {{.*}}ADD - LD +--- +name: ld_add +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LD %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +...