Skip to content

Commit

Permalink
Consolidate invariant loads
Browse files Browse the repository at this point in the history
  If a (assumed) invariant location is loaded multiple times we
  generated a parameter for each location. However, this caused compile
  time problems for several benchmarks (e.g., 445_gobmk in SPEC2006 and
  BT in the NAS benchmarks). Additionally, the code we generate is
  suboptimal as we preload the same location multiple times and perform
  the same checks on all the parameters that refere to the same value.

  With this patch we consolidate the invariant loads in three steps:
    1) During SCoP initialization required invariant loads are put in
       equivalence classes based on their pointer operand. One
       representing load is used to generate a parameter for the whole
       class, thus we never generate multiple parameters for the same
       location.
    2) During the SCoP simplification we remove invariant memory
       accesses that are in the same equivalence class. While doing so
       we build the union of all execution domains as it is only
       important that the location is at least accessed once.
    3) During code generation we only preload one element of each
       equivalence class with the unified execution domain. All others
       are mapped to that preloaded value.

Differential Revision: http://reviews.llvm.org/D13338

llvm-svn: 249853
  • Loading branch information
Johannes Doerfert committed Oct 9, 2015
1 parent 769e1a9 commit 697fdf8
Show file tree
Hide file tree
Showing 12 changed files with 480 additions and 53 deletions.
43 changes: 38 additions & 5 deletions polly/include/polly/ScopInfo.h
Expand Up @@ -680,8 +680,14 @@ using MemoryAccessList = std::forward_list<MemoryAccess *>;
/// @brief Type for invariant memory accesses and their domain context.
using InvariantAccessTy = std::pair<MemoryAccess *, isl_set *>;

/// @brief Type for an ordered list of invariant accesses.
using InvariantAccessListTy = std::forward_list<InvariantAccessTy>;

/// @brief Type for a class of equivalent invariant memory accesses.
using InvariantEquivClassTy = std::pair<const SCEV *, InvariantAccessListTy>;

/// @brief Type for multiple invariant memory accesses and their domain context.
using InvariantAccessesTy = SmallVector<InvariantAccessTy, 8>;
using InvariantAccessesTy = SmallVector<InvariantEquivClassTy, 8>;

///===----------------------------------------------------------------------===//
/// @brief Statement of the Scop
Expand Down Expand Up @@ -906,12 +912,12 @@ class ScopStmt {
/// @brief Add @p Access to this statement's list of accesses.
void addAccess(MemoryAccess *Access);

/// @brief Move the memory access in @p InvMAs to @p TargetList.
/// @brief Move the memory access in @p InvMAs to @p InvariantEquivClasses.
///
/// Note that scalar accesses that are caused by any access in @p InvMAs will
/// be eliminated too.
void hoistMemoryAccesses(MemoryAccessList &InvMAs,
InvariantAccessesTy &TargetList);
InvariantAccessesTy &InvariantEquivClasses);

typedef MemoryAccessVec::iterator iterator;
typedef MemoryAccessVec::const_iterator const_iterator;
Expand Down Expand Up @@ -1135,7 +1141,7 @@ class Scop {
MinMaxVectorPairVectorTy MinMaxAliasGroups;

/// @brief List of invariant accesses.
InvariantAccessesTy InvariantAccesses;
InvariantAccessesTy InvariantEquivClasses;

/// @brief Scop constructor; invoked from ScopInfo::buildScop.
Scop(Region &R, AccFuncMapType &AccFuncMap, ScopDetection &SD,
Expand Down Expand Up @@ -1186,6 +1192,20 @@ class Scop {
/// @see isIgnored()
void simplifySCoP(bool RemoveIgnoredStmts);

/// @brief Create equivalence classes for required invariant accesses.
///
/// These classes will consolidate multiple required invariant loads from the
/// same address in order to keep the number of dimensions in the SCoP
/// description small. For each such class equivalence class only one
/// representing element, hence one required invariant load, will be chosen
/// and modeled as parameter. The method
/// Scop::getRepresentingInvariantLoadSCEV() will replace each element from an
/// equivalence class with the representing element that is modeled. As a
/// consequence Scop::getIdForParam() will only return an id for the
/// representing element of each equivalence class, thus for each required
/// invariant location.
void buildInvariantEquivalenceClasses();

/// @brief Hoist invariant memory loads and check for required ones.
///
/// We first identify "common" invariant loads, thus loads that are invariant
Expand Down Expand Up @@ -1220,6 +1240,19 @@ class Scop {
/// @brief Simplify the assumed and boundary context.
void simplifyContexts();

/// @brief Get the representing SCEV for @p S if applicable, otherwise @p S.
///
/// Invariant loads of the same location are put in an equivalence class and
/// only one of them is chosen as a representing element that will be
/// modeled as a parameter. The others have to be normalized, i.e.,
/// replaced by the representing element of their equivalence class, in order
/// to get the correct parameter value, e.g., in the SCEVAffinator.
///
/// @param S The SCEV to normalize.
///
/// @return The representing SCEV for invariant loads or @p S if none.
const SCEV *getRepresentingInvariantLoadSCEV(const SCEV *S) const;

/// @brief Create a new SCoP statement for either @p BB or @p R.
///
/// Either @p BB or @p R should be non-null. A new statement for the non-null
Expand Down Expand Up @@ -1340,7 +1373,7 @@ class Scop {

/// @brief Return the set of invariant accesses.
const InvariantAccessesTy &getInvariantAccesses() const {
return InvariantAccesses;
return InvariantEquivClasses;
}

/// @brief Mark the SCoP as optimized by the scheduler.
Expand Down
125 changes: 111 additions & 14 deletions polly/lib/Analysis/ScopInfo.cpp
Expand Up @@ -1356,7 +1356,7 @@ void ScopStmt::print(raw_ostream &OS) const {
void ScopStmt::dump() const { print(dbgs()); }

void ScopStmt::hoistMemoryAccesses(MemoryAccessList &InvMAs,
InvariantAccessesTy &TargetList) {
InvariantAccessesTy &InvariantEquivClasses) {

// Remove all memory accesses in @p InvMAs from this statement together
// with all scalar accesses that were caused by them. The tricky iteration
Expand Down Expand Up @@ -1409,8 +1409,49 @@ void ScopStmt::hoistMemoryAccesses(MemoryAccessList &InvMAs,
}
}

for (MemoryAccess *MA : InvMAs)
TargetList.push_back(std::make_pair(MA, isl_set_copy(DomainCtx)));
for (MemoryAccess *MA : InvMAs) {

// Check for another invariant access that accesses the same location as
// MA and if found consolidate them. Otherwise create a new equivalence
// class at the end of InvariantEquivClasses.
LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand());
bool Consolidated = false;

for (auto &IAClass : InvariantEquivClasses) {
const SCEV *ClassPointerSCEV = IAClass.first;
if (PointerSCEV != ClassPointerSCEV)
continue;

Consolidated = true;

// We created empty equivalence classes for required invariant loads
// in the beginning and might encounter one of them here. If so, this
// MA will be the first in that equivalence class.
auto &ClassList = IAClass.second;
if (ClassList.empty()) {
ClassList.push_front(std::make_pair(MA, isl_set_copy(DomainCtx)));
break;
}

// If the equivalence class for MA is not empty we unify the execution
// context and add MA to the list of accesses that are in this class.
isl_set *IAClassDomainCtx = IAClass.second.front().second;
IAClassDomainCtx =
isl_set_union(IAClassDomainCtx, isl_set_copy(DomainCtx));
ClassList.push_front(std::make_pair(MA, IAClassDomainCtx));
break;
}

if (Consolidated)
continue;

// If we did not consolidate MA, thus did not find an equivalence class
// that for it, we create a new one.
InvariantAccessTy IA = std::make_pair(MA, isl_set_copy(DomainCtx));
InvariantEquivClasses.emplace_back(InvariantEquivClassTy(
std::make_pair(PointerSCEV, InvariantAccessListTy({IA}))));
}

isl_set_free(DomainCtx);
}
Expand All @@ -1424,9 +1465,34 @@ void Scop::setContext(__isl_take isl_set *NewContext) {
Context = NewContext;
}

const SCEV *Scop::getRepresentingInvariantLoadSCEV(const SCEV *S) const {
const SCEVUnknown *SU = dyn_cast_or_null<SCEVUnknown>(S);
if (!SU)
return S;

LoadInst *LInst = dyn_cast<LoadInst>(SU->getValue());
if (!LInst)
return S;

// Try to find an equivalence class for the load, if found return
// the SCEV for the representing element, otherwise return S.
const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand());
for (const InvariantEquivClassTy &IAClass : InvariantEquivClasses) {
const SCEV *ClassPointerSCEV = IAClass.first;
if (ClassPointerSCEV == PointerSCEV)
return ClassPointerSCEV;
}

return S;
}

void Scop::addParams(std::vector<const SCEV *> NewParameters) {
for (const SCEV *Parameter : NewParameters) {
Parameter = extractConstantFactor(Parameter, *SE).second;

// Normalize the SCEV to get the representing element for an invariant load.
Parameter = getRepresentingInvariantLoadSCEV(Parameter);

if (ParameterIds.find(Parameter) != ParameterIds.end())
continue;

Expand All @@ -1438,6 +1504,9 @@ void Scop::addParams(std::vector<const SCEV *> NewParameters) {
}

__isl_give isl_id *Scop::getIdForParam(const SCEV *Parameter) const {
// Normalize the SCEV to get the representing element for an invariant load.
Parameter = getRepresentingInvariantLoadSCEV(Parameter);

ParamIdType::const_iterator IdIter = ParameterIds.find(Parameter);

if (IdIter == ParameterIds.end())
Expand Down Expand Up @@ -1515,6 +1584,21 @@ void Scop::addUserContext() {
isl_space_free(Space);
}

void Scop::buildInvariantEquivalenceClasses() {
const InvariantLoadsSetTy &RIL = *SD.getRequiredInvariantLoads(&getRegion());
SmallPtrSet<const SCEV *, 4> ClassPointerSet;
for (LoadInst *LInst : RIL) {
const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand());

// Skip the load if we already have a equivalence class for the pointer.
if (!ClassPointerSet.insert(PointerSCEV).second)
continue;

InvariantEquivClasses.emplace_back(InvariantEquivClassTy(
std::make_pair(PointerSCEV, InvariantAccessListTy())));
}
}

void Scop::buildContext() {
isl_space *Space = isl_space_params_alloc(IslCtx, 0);
Context = isl_set_universe(isl_space_copy(Space));
Expand Down Expand Up @@ -2337,6 +2421,8 @@ Scop::Scop(Region &R, AccFuncMapType &AccFuncMap, ScopDetection &SD,

void Scop::init(AliasAnalysis &AA) {
buildContext();
buildInvariantEquivalenceClasses();

buildDomains(&R);

// Remove empty and ignored statements.
Expand Down Expand Up @@ -2388,8 +2474,9 @@ Scop::~Scop() {
}
}

for (const auto &IA : InvariantAccesses)
isl_set_free(IA.second);
for (const auto &IAClass : InvariantEquivClasses)
if (!IAClass.second.empty())
isl_set_free(IAClass.second.front().second);
}

void Scop::updateAccessDimensionality() {
Expand Down Expand Up @@ -2478,17 +2565,18 @@ void Scop::hoistInvariantLoads() {
InvMAs.reverse();

// Transfer the memory access from the statement to the SCoP.
Stmt.hoistMemoryAccesses(InvMAs, InvariantAccesses);
Stmt.hoistMemoryAccesses(InvMAs, InvariantEquivClasses);

isl_set_free(Domain);
}
isl_union_map_free(Writes);

if (!InvariantAccesses.empty())
if (!InvariantEquivClasses.empty())
IsOptimized = true;

auto &ScopRIL = *SD.getRequiredInvariantLoads(&getRegion());
// Check required invariant loads that were tagged during SCoP detection.
for (LoadInst *LI : *SD.getRequiredInvariantLoads(&getRegion())) {
for (LoadInst *LI : ScopRIL) {
assert(LI && getRegion().contains(LI));
ScopStmt *Stmt = getStmtForBasicBlock(LI->getParent());
if (Stmt && Stmt->lookupAccessesFor(LI) != nullptr) {
Expand All @@ -2511,8 +2599,12 @@ void Scop::hoistInvariantLoads() {
// we already ordered the accesses such that indirect loads can be resolved,
// thus we use a stable sort here.

auto compareInvariantAccesses = [this](const InvariantAccessTy &IA0,
const InvariantAccessTy &IA1) {
auto compareInvariantAccesses = [this](
const InvariantEquivClassTy &IAClass0,
const InvariantEquivClassTy &IAClass1) {
const InvariantAccessTy &IA0 = IAClass0.second.front();
const InvariantAccessTy &IA1 = IAClass1.second.front();

Instruction *AI0 = IA0.first->getAccessInstruction();
Instruction *AI1 = IA1.first->getAccessInstruction();

Expand Down Expand Up @@ -2554,7 +2646,7 @@ void Scop::hoistInvariantLoads() {
return Involves1Id0;
};

std::stable_sort(InvariantAccesses.begin(), InvariantAccesses.end(),
std::stable_sort(InvariantEquivClasses.begin(), InvariantEquivClasses.end(),
compareInvariantAccesses);
}

Expand Down Expand Up @@ -2739,9 +2831,14 @@ void Scop::print(raw_ostream &OS) const {
OS.indent(4) << "Region: " << getNameStr() << "\n";
OS.indent(4) << "Max Loop Depth: " << getMaxLoopDepth() << "\n";
OS.indent(4) << "Invariant Accesses: {\n";
for (const auto &IA : InvariantAccesses) {
IA.first->print(OS);
OS.indent(12) << "Execution Context: " << IA.second << "\n";
for (const auto &IAClass : InvariantEquivClasses) {
if (IAClass.second.empty()) {
OS.indent(12) << "Class Pointer: " << IAClass.first << "\n";
} else {
IAClass.second.front().first->print(OS);
OS.indent(12) << "Execution Context: " << IAClass.second.front().second
<< "\n";
}
}
OS.indent(4) << "}\n";
printContext(OS.indent(4));
Expand Down
18 changes: 12 additions & 6 deletions polly/lib/CodeGen/IslNodeBuilder.cpp
Expand Up @@ -906,8 +906,8 @@ Value *IslNodeBuilder::preloadInvariantLoad(const MemoryAccess &MA,

void IslNodeBuilder::preloadInvariantLoads() {

const auto &InvAccList = S.getInvariantAccesses();
if (InvAccList.empty())
const auto &InvariantEquivClasses = S.getInvariantAccesses();
if (InvariantEquivClasses.empty())
return;

const Region &R = S.getRegion();
Expand All @@ -921,14 +921,20 @@ void IslNodeBuilder::preloadInvariantLoads() {
isl_ast_build *Build =
isl_ast_build_from_context(isl_set_universe(S.getParamSpace()));

for (const auto &IA : InvAccList) {
MemoryAccess *MA = IA.first;
// For each equivalence class of invariant loads we pre-load the representing
// element with the unified execution context. However, we have to map all
// elements of the class to the one preloaded load as they are referenced
// during the code generation and therefor need to be mapped.
for (const auto &IAClass : InvariantEquivClasses) {

MemoryAccess *MA = IAClass.second.front().first;
assert(!MA->isImplicit());

isl_set *Domain = isl_set_copy(IA.second);
isl_set *Domain = isl_set_copy(IAClass.second.front().second);
Instruction *AccInst = MA->getAccessInstruction();
Value *PreloadVal = preloadInvariantLoad(*MA, Domain, Build);
ValueMap[AccInst] = PreloadVal;
for (const InvariantAccessTy &IA : IAClass.second)
ValueMap[IA.first->getAccessInstruction()] = PreloadVal;

if (SE.isSCEVable(AccInst->getType())) {
isl_id *ParamId = S.getIdForParam(SE.getSCEV(AccInst));
Expand Down
35 changes: 35 additions & 0 deletions polly/test/Isl/CodeGen/OpenMP/invariant_base_pointers_preloaded.ll
@@ -0,0 +1,35 @@
; RUN: opt %loadPolly -polly-codegen -polly-parallel \
; RUN: -polly-parallel-force -S < %s | FileCheck %s
;
; Test to verify that we hand down the preloaded A[0] to the OpenMP subfunction.
;
; void f(float *A) {
; for (int i = 1; i < 1000; i++)
; A[i] += A[0] + A[0];
; }
;
; CHECK: %polly.subfn.storeaddr.polly.access.A.load = getelementptr inbounds
; CHECK: store float %polly.access.A.load, float* %polly.subfn.storeaddr.polly.access.A.load
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

define void @f(float* nocapture %A) {
entry:
br label %for.body

for.cond.cleanup: ; preds = %for.body
ret void

for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ]
%tmp = load float, float* %A, align 4
%tmp2 = load float, float* %A, align 4
%tmpadd = fadd float %tmp, %tmp2
%arrayidx1 = getelementptr inbounds float, float* %A, i64 %indvars.iv
%tmp1 = load float, float* %arrayidx1, align 4
%add = fadd float %tmp2, %tmp1
store float %add, float* %arrayidx1, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1000
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}

0 comments on commit 697fdf8

Please sign in to comment.