[RISCV] Macro-fusion support for veyron-v1 CPU. #70012

mgudim · 2023-10-24T07:48:35Z

Support was added for the following fusions:
auipc-addi, slli-srli, ld-add
Some parts of the code became repetative, so small refactoring of existing lui-addi fusion was done.

llvmbot · 2023-10-24T07:49:45Z

@llvm/pr-subscribers-backend-risc-v

Author: Mikhail Gudim (mgudim)

Changes

Support was added for the following fusions:
auipc-addi, slli-srli, ld-add
Some parts of the code became repetative, so small refactoring of existing lui-addi fusion was done.

Full diff: https://github.com/llvm/llvm-project/pull/70012.diff

4 Files Affected:

(modified) llvm/lib/Target/RISCV/RISCVFeatures.td (+18)
(modified) llvm/lib/Target/RISCV/RISCVMacroFusion.cpp (+96-14)
(modified) llvm/lib/Target/RISCV/RISCVProcessors.td (+2-1)
(modified) llvm/lib/Target/RISCV/RISCVSubtarget.h (+4-1)

diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 979bc0ea8c7d065..13565ed361dbc2d 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -937,6 +937,16 @@ def TuneLUIADDIFusion
     : SubtargetFeature<"lui-addi-fusion", "HasLUIADDIFusion",
                        "true", "Enable LUI+ADDI macrofusion">;
 
+def TuneAUIPCADDIFusion
+    : SubtargetFeature<"auipc-addi-fusion", "HasAUIPCADDIFusion",
+                       "true", "Enable AUIPC+ADDI macrofusion">;
+def TuneSLLISRLIFusion
+    : SubtargetFeature<"slli-srli-fusion", "HasSLLISRLIFusion",
+                       "true", "Enable SLLI+SRLI macrofusion">;
+def TuneLDADDFusion
+    : SubtargetFeature<"ld-add-fusion", "HasLDADDFusion",
+                       "true", "Enable fusion of load with the last instruction of the address calculation">;
+
 def TuneNoDefaultUnroll
     : SubtargetFeature<"no-default-unroll", "EnableDefaultUnroll", "false",
                        "Disable default unroll preference.">;
@@ -954,6 +964,14 @@ def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7",
                                    [TuneNoDefaultUnroll,
                                     TuneShortForwardBranchOpt]>;
 
+def TuneVeyronFusions : SubtargetFeature<"ventana-veyron", "RISCVProcFamily", "VentanaVeyron",
+                                         "Ventana Veyron-Series processors",
+                                         [TuneLUIADDIFusion,
+                                          TuneAUIPCADDIFusion,
+                                          TuneSLLISRLIFusion,
+                                          TuneLDADDFusion]>;
+
+
 // Assume that lock-free native-width atomics are available, even if the target
 // and operating system combination would not usually provide them. The user
 // is responsible for providing any necessary __sync implementations. Code
diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp b/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
index 02a8d5c18fe1a0e..c33b3503aed0f97 100644
--- a/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
@@ -18,6 +18,90 @@
 
 using namespace llvm;
 
+static bool checkRegisters(Register FirstDest, const MachineInstr &SecondMI) {
+  if (SecondMI.getOperand(1).getReg() != FirstDest)
+    return false;
+
+  // If the input is virtual make sure this is the only user.
+  if (FirstDest.isVirtual()) {
+    auto &MRI = SecondMI.getMF()->getRegInfo();
+    return MRI.hasOneNonDBGUse(FirstDest);
+  }
+
+  return SecondMI.getOperand(0).getReg() == FirstDest;
+}
+
+// Fuse Load
+static bool isLDADD(const MachineInstr *FirstMI, const MachineInstr &SecondMI) {
+  if (SecondMI.getOpcode() != RISCV::LD)
+    return false;
+
+  if (!SecondMI.getOperand(2).isImm())
+    return false;
+
+  if (SecondMI.getOperand(2).getImm() != 0)
+    return false;
+
+  // Given SecondMI, when FirstMI is unspecified, we must return
+  // if SecondMI may be part of a fused pair at all.
+  if (!FirstMI)
+    return true;
+
+  if (FirstMI->getOpcode() != RISCV::ADD)
+    return true;
+
+  return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
+}
+
+// Fuse SLLI by 32 feeding into SRLI by 32 or less or
+// SLLI by exactly 48 feeding into SRLI by exactly 48.
+static bool isSLLISRLI(const MachineInstr *FirstMI,
+                       const MachineInstr &SecondMI) {
+  if (SecondMI.getOpcode() != RISCV::SRLI)
+    return false;
+
+  if (!SecondMI.getOperand(2).isImm())
+    return false;
+
+  unsigned SRLIImm = SecondMI.getOperand(2).getImm();
+  bool IsShiftBy48 = SRLIImm == 48;
+  if (SRLIImm > 32 && !IsShiftBy48)
+    return false;
+
+  // Given SecondMI, when FirstMI is unspecified, we must return
+  // if SecondMI may be part of a fused pair at all.
+  if (!FirstMI)
+    return true;
+
+  if (FirstMI->getOpcode() != RISCV::SLLI)
+    return false;
+
+  unsigned SLLIImm = FirstMI->getOperand(2).getImm();
+  if (IsShiftBy48 ? (SLLIImm != 48) : (SLLIImm > 32))
+    return false;
+
+  return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
+}
+
+// Fuse AUIPC followed by ADDI
+static bool isAUIPCADDI(const MachineInstr *FirstMI,
+                        const MachineInstr &SecondMI) {
+  if (SecondMI.getOpcode() != RISCV::ADDI)
+    return false;
+  // Assume the 1st instr to be a wildcard if it is unspecified.
+  if (!FirstMI)
+    return true;
+
+  if (FirstMI->getOpcode() != RISCV::AUIPC)
+    return false;
+
+  // The first operand of ADDI might be a frame index.
+  if (!SecondMI.getOperand(1).isReg())
+    return false;
+
+  return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
+}
+
 // Fuse LUI followed by ADDI or ADDIW.
 // rd = imm[31:0] which decomposes to
 // lui rd, imm[31:12]
@@ -27,7 +111,6 @@ static bool isLUIADDI(const MachineInstr *FirstMI,
   if (SecondMI.getOpcode() != RISCV::ADDI &&
       SecondMI.getOpcode() != RISCV::ADDIW)
     return false;
-
   // Assume the 1st instr to be a wildcard if it is unspecified.
   if (!FirstMI)
     return true;
@@ -35,21 +118,11 @@ static bool isLUIADDI(const MachineInstr *FirstMI,
   if (FirstMI->getOpcode() != RISCV::LUI)
     return false;
 
-  Register FirstDest = FirstMI->getOperand(0).getReg();
-
-  // Destination of LUI should be the ADDI(W) source register.
-  if (SecondMI.getOperand(1).getReg() != FirstDest)
+  // The first operand of ADDI might be a frame index.
+  if (!SecondMI.getOperand(1).isReg())
     return false;
 
-  // If the input is virtual make sure this is the only user.
-  if (FirstDest.isVirtual()) {
-    auto &MRI = SecondMI.getMF()->getRegInfo();
-    return MRI.hasOneNonDBGUse(FirstDest);
-  }
-
-  // If the FirstMI destination is non-virtual, it should match the SecondMI
-  // destination.
-  return SecondMI.getOperand(0).getReg() == FirstDest;
+  return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
 }
 
 static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
@@ -61,6 +134,15 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
   if (ST.hasLUIADDIFusion() && isLUIADDI(FirstMI, SecondMI))
     return true;
 
+  if (ST.hasAUIPCADDIFusion() && isAUIPCADDI(FirstMI, SecondMI))
+    return true;
+
+  if (ST.hasSLLISRLIFusion() && isSLLISRLI(FirstMI, SecondMI))
+    return true;
+
+  if (ST.hasLDADDFusion() && isLDADD(FirstMI, SecondMI))
+    return true;
+
   return false;
 }
 
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index e4008d145ffa572..3a242e20edb5aa4 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -242,4 +242,5 @@ def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1",
                                              FeatureStdExtZicbom,
                                              FeatureStdExtZicbop,
                                              FeatureStdExtZicboz,
-                                             FeatureVendorXVentanaCondOps]>;
+                                             FeatureVendorXVentanaCondOps,
+                                             TuneVeyronFusions]>;
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 6b915e61c136086..ed23b15bbca9bb5 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -182,7 +182,10 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
     return UserReservedRegister[i];
   }
 
-  bool hasMacroFusion() const { return hasLUIADDIFusion(); }
+  bool hasMacroFusion() const {
+    return hasLUIADDIFusion() || hasAUIPCADDIFusion() || hasSLLISRLIFusion() ||
+           hasLDADDFusion();
+  }
 
   // Vector codegen related methods.
   bool hasVInstructions() const { return HasStdExtZve32x; }

wangpc-pp · 2023-10-24T08:48:44Z

As far as I know, there may be up to 30+ instruction pairs that can be fused for some macroarchitecture (for example, , Xiangshan - Decode Unit, Chinese). If we define subtarget features for all of them, that would be a disaster I think. So, I'm working on a TableGen backend that generates predicators for macro fusion (with some refactors of ScheduleModel) and I will raise a RFC and PR later. This is still WIP and discussions are welcome!

Anyway, this PR looks great to me, and I'm just sharing some information here.

(For current implementation, I don't know if we can refer to PPC's, in which there is a script-generated header.)

mgudim · 2023-10-24T19:54:48Z

As far as I know, there may be up to 30+ instruction pairs that can be fused for some macroarchitecture (for example, , Xiangshan - Decode Unit, Chinese). If we define subtarget features for all of them, that would be a disaster I think. So, I'm working on a TableGen backend that generates predicators for macro fusion (with some refactors of ScheduleModel) and I will raise a RFC and PR later. This is still WIP and discussions are welcome!

Anyway, this PR looks great to me, and I'm just sharing some information here.

(For current implementation, I don't know if we can refer to PPC's, in which there is a script-generated header.)

Thanks for taking a look. I agree that we shouldn't have too many features defined. I'm happy to rework this in the future if we adopt a new approach.

topperc · 2023-10-24T19:54:57Z

llvm/lib/Target/RISCV/RISCVMacroFusion.cpp

+  return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
+}
+
+// Fuse SLLI by 32 feeding into SRLI by 32 or less or


Can we split this into two separate sentences? The 32 or less or is kind of hard to parse.

I've replaced this with pseudocode.

topperc · 2023-10-24T19:56:38Z

llvm/lib/Target/RISCV/RISCVMacroFusion.cpp

-
-  // Destination of LUI should be the ADDI(W) source register.
-  if (SecondMI.getOperand(1).getReg() != FirstDest)
+  // The first operand of ADDI might be a frame index.


This check was removed previously because it wasn't needed. Was this a bad merge or is it needed now?

In this code:

$rd0 = lui $imm0 $rd1 = addi $rs0, $imm1

In order for fusion to happen it must be that $rd0 == rd1 and $rd0 == rs0. Same for other fusions. Both checks are done in checkRegisters now. I think both checks were needed before two, according to this comment:

// Fuse LUI followed by ADDI or ADDIW. // rd = imm[31:0] which decomposes to // lui rd, imm[31:12] // addi(w) rd, rd, imm[11:0]

I was specifically refering to the FrameIndex comment. It was removed by #68701

I think we do need the check SecondMI.getOperand(1).isReg() because later we do SecondMI.getOperand(1).getReg() != FirstDest - this may crash, right?

I've put that check as a first line of checkRegisters.

As was explained in #68701, if the ADDI operand isn't a register then there can be no data dependency between the LUI and the ADDi so it wouldn't be considered for macrofusion.

oh, sorry I didn't realize that by the time we call shouldScheduleAdjacent we know that there is a data dependency. Ok, I looked at MacroFusion.cpp : 177 - we only consider pairs of vertices connected by an edge of a schedule dag. But now I am starting to wonder: can we guarantee that an edge always implies data dependency? What if some other mutation creates an artificial edge without data dependency for example?

But now I am starting to wonder: can we guarantee that an edge always implies data dependency? What if some other mutation creates an artificial edge without data dependency for example?

Yes, I have to say that it's possible to have other order dependencies like Artificial. For example, BaseMemOpClusterMutation::clusterNeighboringMemOps. Though we may not add artificial edges (not for fusion) between ALU instructions. I haven't run into any problem after remove the isReg() yet, but I think we may add it back since there are some potential risks of assertion theoretically.

preames

Is there an optimization guide or other public reference which describes these fusions?

preames · 2023-10-25T19:45:34Z

llvm/lib/Target/RISCV/RISCVProcessors.td

@@ -242,4 +242,5 @@ def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1",
                                             FeatureStdExtZicbom,
                                             FeatureStdExtZicbop,
                                             FeatureStdExtZicboz,
-                                             FeatureVendorXVentanaCondOps]>;
+                                             FeatureVendorXVentanaCondOps,
+                                             TuneVeyronFusions]>;


It looks like you're adding this to the feature list, not the tune list. That's probably not what you meant.

done in #70414

preames · 2023-10-25T19:46:48Z

llvm/lib/Target/RISCV/RISCVFeatures.td

@@ -954,6 +964,14 @@ def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7",
                                   [TuneNoDefaultUnroll,
                                    TuneShortForwardBranchOpt]>;

+def TuneVeyronFusions : SubtargetFeature<"ventana-veyron", "RISCVProcFamily", "VentanaVeyron",


Please separate this out in a separate PR which only adds the proc family and the existing LUI/ADDI fusion. Then this change can extend the list with the new fusions.

done: #70414

preames · 2023-10-25T19:49:52Z

llvm/lib/Target/RISCV/RISCVMacroFusion.cpp

+    return false;
+
+  unsigned SRLIImm = SecondMI.getOperand(2).getImm();
+  bool IsShiftBy48 = SRLIImm == 48;


This seems a very oddly restricted fusion. Are you sure that e.g. a shift pair by 37 that clears the top bits isn't fused?

The two shifts by 48 case is the i16 zext pattern for rv64 without Zbb. So it makes some sense.

llvm/lib/Target/RISCV/RISCVMacroFusion.cpp

topperc · 2023-10-25T20:44:15Z

llvm/lib/Target/RISCV/RISCVFeatures.td

+def TuneAUIPCADDIFusion
+    : SubtargetFeature<"auipc-addi-fusion", "HasAUIPCADDIFusion",
+                       "true", "Enable AUIPC+ADDI macrofusion">;
+def TuneSLLISRLIFusion


This is pretty generic name given the specific cases that are recognized for fusion.

how about ShiftedZExtFusion?

asb · 2023-11-07T16:46:00Z

Could we add tests for this please? I know we have macro-fusion-lui-addi.ll already though I'm not sure .ll tests are the best for this. PowerPC's macro-fusion.mir test seemed like a nice way to handle it, to my eye at least.

mgudim · 2023-11-10T20:23:52Z

Could we add tests for this please? I know we have macro-fusion-lui-addi.ll already though I'm not sure .ll tests are the best for this. PowerPC's macro-fusion.mir test seemed like a nice way to handle it, to my eye at least.

Yes, good idea. Done.

mgudim · 2023-11-17T19:06:26Z

@asb @preames @topperc Ping

github-actions · 2023-12-08T18:54:12Z

✅ With the latest revision this PR passed the C/C++ code formatter.

Support was added for the following fusions: auipc-addi, slli-srli, ld-add Some parts of the code became repetative, so small refactoring of existing lui-addi fusion was done.

same.

Added pseudocode to explain fusions. Replace the "spacer" `ADDI` in the test with `XORI`- it was confusing in case of LDADD fusion.

wangpc-pp

I'd like to see this landed so that there are more use cases for TableGen way. :-)

wangpc-pp · 2023-12-11T04:16:04Z

llvm/test/CodeGen/RISCV/macro-fusions-veyron-v1.mir

@@ -0,0 +1,159 @@
+# REQUIRES: asserts


Do we need end-to-end llc tests like llvm/test/CodeGen/RISCV/macro-fusion-lui-addi.ll?

mgudim · 2023-12-11T21:07:55Z

@topperc @asb @preames I think all the comments are addressed, sorry for taking so long. Is it OK to merge?

topperc

LGTM

topperc · 2024-01-26T05:20:15Z

llvm/lib/Target/RISCV/RISCVMacroFusion.cpp

+// add rd, rs1, rs2
+// ld rd, 0(rd)
+static bool isLDADD(const MachineInstr *FirstMI, const MachineInstr &SecondMI) {
+  if (SecondMI.getOpcode() != RISCV::LD)


@mgudim Is this fusion really restricted to just 64-bit load?

mgudim requested a review from topperc October 24, 2023 07:48

llvmbot added the backend:RISC-V label Oct 24, 2023

mgudim force-pushed the veyron-fusions branch from 602da9a to 514d689 Compare October 24, 2023 19:49

topperc reviewed Oct 24, 2023

View reviewed changes

mgudim force-pushed the veyron-fusions branch 2 times, most recently from c862441 to d98d166 Compare October 25, 2023 19:06

preames reviewed Oct 25, 2023

View reviewed changes

topperc reviewed Oct 25, 2023

View reviewed changes

mgudim force-pushed the veyron-fusions branch 3 times, most recently from c4dac98 to 909e139 Compare October 31, 2023 23:54

mgudim force-pushed the veyron-fusions branch from 909e139 to 39198f7 Compare November 10, 2023 20:21

mgudim force-pushed the veyron-fusions branch 2 times, most recently from b32afa5 to de5d9dc Compare December 8, 2023 18:51

mgudim force-pushed the veyron-fusions branch from de5d9dc to 40c3a0c Compare December 8, 2023 19:06

mgudim added 4 commits December 8, 2023 23:39

[RISCV] Macro-fusion support for veyron-v1 CPU.

783a2ad

Support was added for the following fusions: auipc-addi, slli-srli, ld-add Some parts of the code became repetative, so small refactoring of existing lui-addi fusion was done.

addressed review comments.

eb79be7

Corrected slli-srli fusion - the immediate shift ammounts have to be the

dad5db6

same.

Added test.

4a2bec5

mgudim force-pushed the veyron-fusions branch from 40c3a0c to 4ad00f3 Compare December 9, 2023 04:40

Renamed SLLISRLIFusion into ShiftedZExtFusion.

c6dff10

Added pseudocode to explain fusions. Replace the "spacer" `ADDI` in the test with `XORI`- it was confusing in case of LDADD fusion.

mgudim force-pushed the veyron-fusions branch from 4ad00f3 to c6dff10 Compare December 9, 2023 04:44

wangpc-pp reviewed Dec 11, 2023

View reviewed changes

topperc approved these changes Dec 11, 2023

View reviewed changes

mgudim merged commit 29ee66f into llvm:main Dec 11, 2023
4 checks passed

topperc reviewed Jan 26, 2024

View reviewed changes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[RISCV] Macro-fusion support for veyron-v1 CPU. #70012

[RISCV] Macro-fusion support for veyron-v1 CPU. #70012

mgudim commented Oct 24, 2023

llvmbot commented Oct 24, 2023

wangpc-pp commented Oct 24, 2023 •

edited

mgudim commented Oct 24, 2023

topperc Oct 24, 2023

mgudim Oct 25, 2023

topperc Oct 24, 2023

mgudim Oct 24, 2023

topperc Oct 24, 2023 •

edited

mgudim Oct 25, 2023

topperc Oct 25, 2023

mgudim Oct 25, 2023

wangpc-pp Oct 26, 2023

preames left a comment

preames Oct 25, 2023

mgudim Oct 27, 2023

preames Oct 25, 2023

mgudim Oct 27, 2023

preames Oct 25, 2023

topperc Oct 25, 2023

topperc Oct 25, 2023

mgudim Oct 31, 2023

topperc Nov 17, 2023

asb commented Nov 7, 2023

mgudim commented Nov 10, 2023

mgudim commented Nov 17, 2023

github-actions bot commented Dec 8, 2023 •

edited

wangpc-pp left a comment

wangpc-pp Dec 11, 2023

mgudim commented Dec 11, 2023

topperc left a comment

topperc Jan 26, 2024 •

edited

mgudim Jan 26, 2024

[RISCV] Macro-fusion support for veyron-v1 CPU. #70012

[RISCV] Macro-fusion support for veyron-v1 CPU. #70012

Conversation

mgudim commented Oct 24, 2023

llvmbot commented Oct 24, 2023

wangpc-pp commented Oct 24, 2023 • edited

mgudim commented Oct 24, 2023

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

topperc Oct 24, 2023 • edited

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

preames left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

asb commented Nov 7, 2023

mgudim commented Nov 10, 2023

mgudim commented Nov 17, 2023

github-actions bot commented Dec 8, 2023 • edited

wangpc-pp left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

mgudim commented Dec 11, 2023

topperc left a comment

Choose a reason for hiding this comment

topperc Jan 26, 2024 • edited

Choose a reason for hiding this comment

Choose a reason for hiding this comment

wangpc-pp commented Oct 24, 2023 •

edited

topperc Oct 24, 2023 •

edited

github-actions bot commented Dec 8, 2023 •

edited

topperc Jan 26, 2024 •

edited