-
Notifications
You must be signed in to change notification settings - Fork 10.8k
/
AArch64FalkorHWPFFix.cpp
147 lines (119 loc) · 4.55 KB
/
AArch64FalkorHWPFFix.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
//===-- AArch64FalkorHWPFFix.cpp - Avoid HW prefetcher pitfalls on Falkor--===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// For Falkor, we want to avoid HW prefetcher instruction tag collisions that
// may inhibit the HW prefetching. This is done in two steps. Before ISel, we
// mark strided loads (i.e. those that will likely benefit from prefetching)
// with metadata. Then, after opcodes have been finalized, we insert MOVs and
// re-write loads to prevent unintnentional tag collisions.
// ===----------------------------------------------------------------------===//
#include "AArch64.h"
#include "AArch64InstrInfo.h"
#include "AArch64TargetMachine.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
#define DEBUG_TYPE "falkor-hwpf-fix"
STATISTIC(NumStridedLoadsMarked, "Number of strided loads marked");
namespace {
class FalkorMarkStridedAccesses {
public:
FalkorMarkStridedAccesses(LoopInfo &LI, ScalarEvolution &SE)
: LI(LI), SE(SE) {}
bool run();
private:
bool runOnLoop(Loop *L);
LoopInfo &LI;
ScalarEvolution &SE;
};
class FalkorMarkStridedAccessesLegacy : public FunctionPass {
public:
static char ID; // Pass ID, replacement for typeid
FalkorMarkStridedAccessesLegacy() : FunctionPass(ID) {
initializeFalkorMarkStridedAccessesLegacyPass(
*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetPassConfig>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequired<ScalarEvolutionWrapperPass>();
// FIXME: For some reason, preserving SE here breaks LSR (even if
// this pass changes nothing).
// AU.addPreserved<ScalarEvolutionWrapperPass>();
}
bool runOnFunction(Function &F) override;
};
} // namespace
char FalkorMarkStridedAccessesLegacy::ID = 0;
INITIALIZE_PASS_BEGIN(FalkorMarkStridedAccessesLegacy, DEBUG_TYPE,
"Falkor HW Prefetch Fix", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_END(FalkorMarkStridedAccessesLegacy, DEBUG_TYPE,
"Falkor HW Prefetch Fix", false, false)
FunctionPass *llvm::createFalkorMarkStridedAccessesPass() {
return new FalkorMarkStridedAccessesLegacy();
}
bool FalkorMarkStridedAccessesLegacy::runOnFunction(Function &F) {
TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
const AArch64Subtarget *ST =
TPC.getTM<AArch64TargetMachine>().getSubtargetImpl(F);
if (ST->getProcFamily() != AArch64Subtarget::Falkor)
return false;
if (skipFunction(F))
return false;
LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
FalkorMarkStridedAccesses LDP(LI, SE);
return LDP.run();
}
bool FalkorMarkStridedAccesses::run() {
bool MadeChange = false;
for (Loop *I : LI)
for (auto L = df_begin(I), LE = df_end(I); L != LE; ++L)
MadeChange |= runOnLoop(*L);
return MadeChange;
}
bool FalkorMarkStridedAccesses::runOnLoop(Loop *L) {
// Only mark strided loads in the inner-most loop
if (!L->empty())
return false;
bool MadeChange = false;
for (const auto BB : L->blocks()) {
for (auto &I : *BB) {
LoadInst *LoadI = dyn_cast<LoadInst>(&I);
if (!LoadI)
continue;
Value *PtrValue = LoadI->getPointerOperand();
if (L->isLoopInvariant(PtrValue))
continue;
const SCEV *LSCEV = SE.getSCEV(PtrValue);
const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
continue;
LoadI->setMetadata(FALKOR_STRIDED_ACCESS_MD,
MDNode::get(LoadI->getContext(), {}));
++NumStridedLoadsMarked;
DEBUG(dbgs() << "Load: " << I << " marked as strided\n");
MadeChange = true;
}
}
return MadeChange;
}