-
Notifications
You must be signed in to change notification settings - Fork 12k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Enable LoopDataPrefetch pass #66201
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-risc-v ChangesSo that we can benefit from data prefetching when `Zicbop` extension is supported.Tune information for data prefetching are added in This PR is stacked on #66193. -- 10 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 6381263b37613b3..367f0fbbe44801b 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -950,12 +950,3 @@ def FeatureTaggedGlobals : SubtargetFeature<"tagged-globals", "AllowTaggedGlobals", "true", "Use an instruction sequence for taking the address of a global " "that allows a memory tag in the upper address bits">; - -foreach align = [2, 4, 8, 16, 32, 64] in { - def TunePrefFunctionAlignment # align : - SubtargetFeature<"pref-func-align-" # align, "PrefFunctionAlignment", - "Align(" # align # ")", "Set preferred function alignment to " # align # " bytes">; - def TunePrefLoopAlignment # align : - SubtargetFeature<"pref-loop-align-" # align, "PrefLoopAlignment", - "Align(" # align # ")", "Set preferred loop alignment to " # align # " bytes">; -} diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index 01291001cd7ca24..21c8edc3bf96226 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -10,12 +10,36 @@ // RISC-V processors supported. //===----------------------------------------------------------------------===// +class RISCVTuneInfo { + bits<8> PrefFunctionAlignment = 1; + bits<8> PrefLoopAlignment = 1; + + // Information needed by LoopPrefetch. + bits<16> CacheLineSize = 0; + bits<16> PrefetchDistance = 0; + bits<16> MinPrefetchStride = 1; + bits<16> MaxPrefetchIterationsAhead = 65535; +} + +def RISCVTuneInfoTable : GenericTable { + let FilterClass = "RISCVTuneInfo"; + let CppTypeName = "RISCVTuneInfo"; + let Fields = ["Name", "PrefFunctionAlignment", "PrefLoopAlignment", + "CacheLineSize", "PrefetchDistance", + "MinPrefetchStride", "MaxPrefetchIterationsAhead"]; +} + +def getRISCVTuneInfo : SearchIndex { + let Table = RISCVTuneInfoTable; + let Key = ["Name"]; +} + class RISCVProcessorModel<string n, SchedMachineModel m, list<SubtargetFeature> f, list<SubtargetFeature> tunef = [], string default_march = ""> - : ProcessorModel<n, m, f, tunef> { + : ProcessorModel<n, m, f, tunef>, RISCVTuneInfo { string DefaultMarch = default_march; } @@ -23,7 +47,7 @@ class RISCVTuneProcessorModel<string n, SchedMachineModel m, list<SubtargetFeature> tunef = [], list<SubtargetFeature> f = []> - : ProcessorModel<n, m, f,tunef>; + : ProcessorModel<n, m, f,tunef>, RISCVTuneInfo; def GENERIC_RV32 : RISCVProcessorModel<"generic-rv32", NoSchedModel, diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp index aa0275830e2a87a..572aa676edbbef4 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp @@ -29,6 +29,12 @@ using namespace llvm; #define GET_SUBTARGETINFO_CTOR #include "RISCVGenSubtargetInfo.inc" +namespace llvm::RISCVTuneInfoTable { + +#define GET_RISCVTuneInfoTable_IMPL +#include "RISCVGenSearchableTables.inc" +} // namespace llvm::RISCVTuneInfoTable + static cl::opt<bool> EnableSubRegLiveness("riscv-enable-subreg-liveness", cl::init(true), cl::Hidden); @@ -66,6 +72,7 @@ RISCVSubtarget::initializeSubtargetDependencies(const Triple &TT, StringRef CPU, TuneCPU = CPU; ParseSubtargetFeatures(CPU, TuneCPU, FS); + TuneInfo = RISCVTuneInfoTable::getRISCVTuneInfo(TuneCPU); TargetABI = RISCVABI::computeTargetABI(TT, getFeatureBits(), ABIName); RISCVFeatures::validate(TT, getFeatureBits()); return *this; diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index cf64dbc21bd8a8b..152e4a3beff257a 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -32,6 +32,24 @@ namespace llvm { class StringRef; +namespace RISCVTuneInfoTable { + +struct RISCVTuneInfo { + const char *Name; + uint8_t PrefFunctionAlignment; + uint8_t PrefLoopAlignment; + + // Information needed by LoopPrefetch. + uint16_t CacheLineSize; + uint16_t PrefetchDistance; + uint16_t MinPrefetchStride; + uint16_t MaxPrefetchIterationsAhead; +}; + +#define GET_RISCVTuneInfoTable_DECL +#include "RISCVGenSearchableTables.inc" +} // namespace RISCVTuneInfoTable + class RISCVSubtarget : public RISCVGenSubtargetInfo { public: enum RISCVProcFamilyEnum : uint8_t { @@ -54,8 +72,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { uint8_t MaxInterleaveFactor = 2; RISCVABI::ABI TargetABI = RISCVABI::ABI_Unknown; std::bitset<RISCV::NUM_TARGET_REGS> UserReservedRegister; - Align PrefFunctionAlignment; - Align PrefLoopAlignment; + const RISCVTuneInfoTable::RISCVTuneInfo *TuneInfo; RISCVFrameLowering FrameLowering; RISCVInstrInfo InstrInfo; @@ -96,8 +113,12 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { } bool enableMachineScheduler() const override { return true; } - Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; } - Align getPrefLoopAlignment() const { return PrefLoopAlignment; } + Align getPrefFunctionAlignment() const { + return Align(TuneInfo->PrefFunctionAlignment); + } + Align getPrefLoopAlignment() const { + return Align(TuneInfo->PrefLoopAlignment); + } /// Returns RISC-V processor family. /// Avoid this function! CPU specifics should be kept local to this class @@ -227,6 +248,22 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { &Mutations) const override; bool useAA() const override; + + unsigned getCacheLineSize() const override { + return TuneInfo->CacheLineSize; + }; + unsigned getPrefetchDistance() const override { + return TuneInfo->PrefetchDistance; + }; + unsigned getMinPrefetchStride(unsigned NumMemAccesses, + unsigned NumStridedMemAccesses, + unsigned NumPrefetches, + bool HasCall) const override { + return TuneInfo->MinPrefetchStride; + }; + unsigned getMaxPrefetchIterationsAhead() const override { + return TuneInfo->MaxPrefetchIterationsAhead; + }; }; } // End llvm namespace diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index d4fd66c9b360c4e..3a64f4cb5214ce0 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -34,6 +34,7 @@ #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Scalar.h" #include <optional> using namespace llvm; @@ -71,6 +72,11 @@ static cl::opt<bool> EnableRISCVCopyPropagation( cl::desc("Enable the copy propagation with RISC-V copy instr"), cl::init(true), cl::Hidden); +static cl::opt<bool> + EnableLoopDataPrefetch("riscv-enable-loop-data-prefetch", cl::Hidden, + cl::desc("Enable the loop data prefetch pass"), + cl::init(true)); + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target()); RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target()); @@ -285,6 +291,9 @@ void RISCVPassConfig::addIRPasses() { addPass(createAtomicExpandPass()); if (getOptLevel() != CodeGenOpt::None) { + if (EnableLoopDataPrefetch) + addPass(createLoopDataPrefetchPass()); + addPass(createRISCVGatherScatterLoweringPass()); addPass(createInterleavedAccessPass()); addPass(createRISCVCodeGenPreparePass()); diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index a9636cdf8bb17f3..c0983c8643c6dbb 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -5,17 +5,17 @@ ; RUN: grep -v "Verify generated machine code" | \ ; RUN: FileCheck %s --check-prefixes=CHECK,RV64 -; REQUIRES: asserts +; REQUIRES: asserts ; CHECK-LABEL: Pass Arguments: ; CHECK-NEXT: Target Library Information ; CHECK-NEXT: Target Pass Configuration ; CHECK-NEXT: Machine Module Information ; CHECK-NEXT: Target Transform Information -; CHECK-NEXT: Type-Based Alias Analysis -; CHECK-NEXT: Scoped NoAlias Alias Analysis ; CHECK-NEXT: Assumption Cache Tracker ; CHECK-NEXT: Profile summary info +; CHECK-NEXT: Type-Based Alias Analysis +; CHECK-NEXT: Scoped NoAlias Alias Analysis ; CHECK-NEXT: Create Garbage Collector Module Metadata ; CHECK-NEXT: Machine Branch Probability Analysis ; CHECK-NEXT: Default Regalloc Eviction Advisor @@ -28,6 +28,12 @@ ; CHECK-NEXT: Expand Atomic instructions ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Canonicalize natural loops +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: Optimization Remark Emitter +; CHECK-NEXT: Scalar Evolution Analysis +; CHECK-NEXT: Loop Data Prefetch ; CHECK-NEXT: RISC-V gather/scatter lowering ; CHECK-NEXT: Interleaved Access Pass ; CHECK-NEXT: RISC-V CodeGenPrepare diff --git a/llvm/test/CodeGen/RISCV/align-loops.ll b/llvm/test/CodeGen/RISCV/align-loops.ll index 5ef78c74d03532b..efa03992b6277f6 100644 --- a/llvm/test/CodeGen/RISCV/align-loops.ll +++ b/llvm/test/CodeGen/RISCV/align-loops.ll @@ -1,8 +1,6 @@ ; RUN: llc < %s -mtriple=riscv64 | FileCheck %s ; RUN: llc < %s -mtriple=riscv64 -align-loops=16 | FileCheck %s -check-prefix=ALIGN_16 ; RUN: llc < %s -mtriple=riscv64 -align-loops=32 | FileCheck %s -check-prefix=ALIGN_32 -; RUN: llc < %s -mtriple=riscv64 -mattr=+pref-loop-align-16 | FileCheck %s -check-prefix=ALIGN_16 -; RUN: llc < %s -mtriple=riscv64 -mattr=+pref-loop-align-32 | FileCheck %s -check-prefix=ALIGN_32 declare void @foo() diff --git a/llvm/test/CodeGen/RISCV/align.ll b/llvm/test/CodeGen/RISCV/align.ll index 1fb4585f8422aa4..5807fc14efc292d 100644 --- a/llvm/test/CodeGen/RISCV/align.ll +++ b/llvm/test/CodeGen/RISCV/align.ll @@ -2,8 +2,6 @@ ; RUN: | FileCheck %s -check-prefix=RV32I ; RUN: llc -mtriple=riscv32 -mattr=+c -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32C -; RUN: llc -mtriple=riscv32 -mattr=+pref-func-align-32 -verify-machineinstrs < %s \ -; RUN: | FileCheck %s -check-prefix=ALIGN-32 ; RUN: llc -filetype=obj -mtriple=riscv32 < %s -o %t ; RUN: llvm-readelf -S %t | FileCheck %s --check-prefixes=SEC,SEC-I ; RUN: llc -filetype=obj -mtriple=riscv32 -mattr=+c < %s -o %t @@ -18,8 +16,6 @@ define void @foo() { ;RV32I: foo: ;RV32C: .p2align 1 ;RV32C: foo: -;ALIGN-32: .p2align 5 -;ALIGN-32: foo: entry: ret void } diff --git a/llvm/test/Transforms/LoopDataPrefetch/RISCV/basic.ll b/llvm/test/Transforms/LoopDataPrefetch/RISCV/basic.ll new file mode 100644 index 000000000000000..0f8c59fd27a93c3 --- /dev/null +++ b/llvm/test/Transforms/LoopDataPrefetch/RISCV/basic.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -mtriple=riscv64 -riscv-enable-loop-data-prefetch \ +; RUN: -cache-line-size=64 -prefetch-distance=64 \ +; RUN: -passes=loop-data-prefetch -S < %s | FileCheck %s + +define void @foo(ptr nocapture %a, ptr nocapture readonly %b) { +; CHECK-LABEL: define void @foo( +; CHECK-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 3 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 64 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[INDVARS_IV]] +; CHECK-NEXT: call void @llvm.prefetch.p0(ptr [[SCEVGEP]], i32 0, i32 3, i32 1) +; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP2]], 1.000000e+00 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store double [[ADD]], ptr [[ARRAYIDX2]], align 8 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1600 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, ptr %b, i64 %indvars.iv + %0 = load double, ptr %arrayidx, align 8 + %add = fadd double %0, 1.000000e+00 + %arrayidx2 = getelementptr inbounds double, ptr %a, i64 %indvars.iv + store double %add, ptr %arrayidx2, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1600 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} diff --git a/llvm/test/Transforms/LoopDataPrefetch/RISCV/lit.local.cfg b/llvm/test/Transforms/LoopDataPrefetch/RISCV/lit.local.cfg new file mode 100644 index 000000000000000..17351748513d988 --- /dev/null +++ b/llvm/test/Transforms/LoopDataPrefetch/RISCV/lit.local.cfg @@ -0,0 +1,2 @@ +if not "RISCV" in config.root.targets: + config.unsupported = True |
823d38b
to
6b8030f
Compare
Should we add more entries to RISCVProcFamilyEnum and use that as the key instead of the CPU name? |
Would that be too course-grained? There may be some differences between CPUs even if they are in the same family. |
6b8030f
to
8d0ad72
Compare
Ping. |
1 similar comment
Ping. |
8d0ad72
to
419f4c2
Compare
Ping. |
2 similar comments
Ping. |
Ping. |
So that we can benefit from data prefetching when `Zicbop` extension is supported. Tune information for data prefetching are added in `RISCVTuneInfo`. This PR is stacked on llvm#66193.
use bits<32> and change default value to UINT_MAX
Remove -riscv-enable-loop-data-prefetch
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
419f4c2
to
7b354fc
Compare
Please fix the build error https://lab.llvm.org/buildbot/#/builders/218/builds/16792 . It seems that this patch is causing it. FAILED: lib/libLLVMRISCVCodeGen.so.18git |
Should be fixed in #71905. |
So that we can benefit from data prefetch when `Zicbop` extension is supported. Tune information for data prefetching are added in `RISCVTuneInfo`.
So that we can benefit from data prefetch when
Zicbop
extension issupported.
Tune information for data prefetching are added in
RISCVTuneInfo
.