Skip to content

Commit

Permalink
[ScopBuilder] Introduce -polly-stmt-granularity=scalar-indep option.
Browse files Browse the repository at this point in the history
The option splits BasicBlocks into minimal statements such that no
additional scalar dependencies are introduced.

The algorithm is based on a union-find structure, and unites sets if
putting them into separate statements would introduce a scalar
dependencies. As a consequence, instructions may be split into separate
statements such their relative order is different than the statements
they are in. This is accounted for instructions whose relative order
matters (e.g. memory accesses).

The algorithm is generic in that heuristic changes can be made
relatively easily. We might relax the order requirement for read-reads
or accesses to different base pointers. Forwardable instructions can be
made to not cause a join.

This implementation gives us a speed-up of 82% in SPEC 2006 456.hmmer
benchmark by allowing loop-distribution in a hot loop such that one of
the loops can be vectorized.

Differential Revision: https://reviews.llvm.org/D38403

llvm-svn: 314983
  • Loading branch information
Meinersbur committed Oct 5, 2017
1 parent fc50004 commit cc345e6
Show file tree
Hide file tree
Showing 7 changed files with 517 additions and 2 deletions.
6 changes: 6 additions & 0 deletions polly/include/polly/ScopBuilder.h
Expand Up @@ -235,6 +235,12 @@ class ScopBuilder {
/// separator is found.
void buildSequentialBlockStmts(BasicBlock *BB);

/// Create one or more ScopStmts for @p BB using equivalence classes.
///
/// Instructions of a basic block that belong to the same equivalence class
/// are added to the same statement.
void buildEqivClassBlockStmts(BasicBlock *BB);

/// Create ScopStmt for all BBs and non-affine subregions of @p SR.
///
/// @param SR A subregion of @p R.
Expand Down
182 changes: 180 additions & 2 deletions polly/lib/Analysis/ScopBuilder.cpp
Expand Up @@ -25,6 +25,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
Expand Down Expand Up @@ -102,14 +103,16 @@ static cl::opt<bool> DisableMultiplicativeReductions(
cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore,
cl::init(false), cl::cat(PollyCategory));

enum class GranularityChoice { BasicBlocks };
enum class GranularityChoice { BasicBlocks, ScalarIndepependence };

static cl::opt<GranularityChoice> StmtGranularity(
"polly-stmt-granularity",
cl::desc(
"Algorithm to use for splitting basic blocks into multiple statements"),
cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb",
"One statement per basic block")),
"One statement per basic block"),
clEnumValN(GranularityChoice::ScalarIndepependence,
"scalar-indep", "Scalar independence heuristic")),
cl::init(GranularityChoice::BasicBlocks), cl::cat(PollyCategory));

void ScopBuilder::buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI,
Expand Down Expand Up @@ -701,6 +704,178 @@ void ScopBuilder::buildSequentialBlockStmts(BasicBlock *BB) {
scop->addScopStmt(BB, SurroundingLoop, Instructions, Count);
}

/// Is @p Inst an ordered instruction?
///
/// An unordered instruction is an instruction, such that a sequence of
/// unordered instructions can be permuted without changing semantics. Any
/// instruction for which this is not always the case is ordered.
static bool isOrderedInstruction(Instruction *Inst) {
return Inst->mayHaveSideEffects() || Inst->mayReadOrWriteMemory();
}

/// Join instructions to the same statement if one uses the scalar result of the
/// other.
static void joinOperandTree(EquivalenceClasses<Instruction *> &UnionFind,
ArrayRef<Instruction *> ModeledInsts) {
for (Instruction *Inst : ModeledInsts) {
if (isa<PHINode>(Inst))
continue;

for (Use &Op : Inst->operands()) {
Instruction *OpInst = dyn_cast<Instruction>(Op.get());
if (!OpInst)
continue;

// Check if OpInst is in the BB and is a modeled instruction.
auto OpVal = UnionFind.findValue(OpInst);
if (OpVal == UnionFind.end())
continue;

UnionFind.unionSets(Inst, OpInst);
}
}
}

/// Join instructions that are used as incoming value in successor PHIs into the
/// epilogue.
static void
joinIncomingPHIValuesIntoEpilogue(EquivalenceClasses<Instruction *> &UnionFind,
ArrayRef<Instruction *> ModeledInsts,
BasicBlock *BB) {
for (BasicBlock *Succ : successors(BB)) {
for (Instruction &SuccInst : *Succ) {
PHINode *SuccPHI = dyn_cast<PHINode>(&SuccInst);
if (!SuccPHI)
break;

Value *IncomingVal = SuccPHI->getIncomingValueForBlock(BB);
Instruction *IncomingInst = dyn_cast<Instruction>(IncomingVal);
if (!IncomingInst)
continue;
if (IncomingInst->getParent() != BB)
continue;
if (UnionFind.findValue(IncomingInst) == UnionFind.end())
continue;

UnionFind.unionSets(nullptr, IncomingInst);
}
}
}

/// Ensure that the order of ordered instructions does not change.
///
/// If we encounter an ordered instruction enclosed in instructions belonging to
/// a different statement (which might as well contain ordered instructions, but
/// this is not tested here), join them.
static void
joinOrderedInstructions(EquivalenceClasses<Instruction *> &UnionFind,
ArrayRef<Instruction *> ModeledInsts) {
SetVector<Instruction *> SeenLeaders;
for (Instruction *Inst : ModeledInsts) {
if (!isOrderedInstruction(Inst))
continue;

Instruction *Leader = UnionFind.getLeaderValue(Inst);
bool Inserted = SeenLeaders.insert(Leader);
if (Inserted)
continue;

// Merge statements to close holes. Say, we have already seen statements A
// and B, in this order. Then we see an instruction of A again and we would
// see the pattern "A B A". This function joins all statements until the
// only seen occurrence of A.
for (Instruction *Prev : reverse(SeenLeaders)) {
// Items added to 'SeenLeaders' are leaders, but may have lost their
// leadership status when merged into another statement.
Instruction *PrevLeader = UnionFind.getLeaderValue(SeenLeaders.back());
if (PrevLeader == Leader)
break;
UnionFind.unionSets(Prev, Leader);
}
}
}

/// Also ensure that the epilogue is the last statement relative to all ordered
/// instructions.
///
/// This is basically joinOrderedInstructions() but using the epilogue as
/// 'ordered instruction'.
static void joinAllAfterEpilogue(EquivalenceClasses<Instruction *> &UnionFind,
ArrayRef<Instruction *> ModeledInsts) {
bool EpilogueSeen = false;
for (Instruction *Inst : ModeledInsts) {
auto PHIWritesLeader = UnionFind.findLeader(nullptr);
auto InstLeader = UnionFind.findLeader(Inst);

if (PHIWritesLeader == InstLeader)
EpilogueSeen = true;

if (!isOrderedInstruction(Inst))
continue;

if (EpilogueSeen)
UnionFind.unionSets(PHIWritesLeader, InstLeader);
}
}

void ScopBuilder::buildEqivClassBlockStmts(BasicBlock *BB) {
Loop *L = LI.getLoopFor(BB);

// Extracting out modeled instructions saves us from checking
// shouldModelInst() repeatedly.
SmallVector<Instruction *, 32> ModeledInsts;
EquivalenceClasses<Instruction *> UnionFind;
for (Instruction &Inst : *BB) {
if (!shouldModelInst(&Inst, L))
continue;
ModeledInsts.push_back(&Inst);
UnionFind.insert(&Inst);
}

// 'nullptr' represents the last statement for a basic block. It contains no
// instructions, but holds the PHI write accesses for successor basic blocks.
// If a PHI has an incoming value defined in this BB, it can also be merged
// with other statements.
// TODO: We wouldn't need this if we would add PHIWrites into the statement
// that defines the incoming value (if in the BB) instead of always the last,
// so we could unconditionally always add a last statement.
UnionFind.insert(nullptr);

joinOperandTree(UnionFind, ModeledInsts);
joinIncomingPHIValuesIntoEpilogue(UnionFind, ModeledInsts, BB);
joinOrderedInstructions(UnionFind, ModeledInsts);
joinAllAfterEpilogue(UnionFind, ModeledInsts);

// The list of instructions for statement (statement represented by the leader
// instruction). The order of statements instructions is reversed such that
// the epilogue is first. This makes it easier to ensure that the epilogue is
// the last statement.
MapVector<Instruction *, std::vector<Instruction *>> LeaderToInstList;

// Ensure that the epilogue is last.
LeaderToInstList[nullptr];

// Collect the instructions of all leaders. UnionFind's member iterator
// unfortunately are not in any specific order.
for (Instruction &Inst : reverse(*BB)) {
auto LeaderIt = UnionFind.findLeader(&Inst);
if (LeaderIt == UnionFind.member_end())
continue;

std::vector<Instruction *> &InstList = LeaderToInstList[*LeaderIt];
InstList.push_back(&Inst);
}

// Finally build the statements.
int Count = 0;
for (auto &Instructions : reverse(LeaderToInstList)) {
std::vector<Instruction *> &InstList = Instructions.second;
std::reverse(InstList.begin(), InstList.end());
scop->addScopStmt(BB, L, std::move(InstList), Count);
Count += 1;
}
}

void ScopBuilder::buildStmts(Region &SR) {
if (scop->isNonAffineSubRegion(&SR)) {
std::vector<Instruction *> Instructions;
Expand All @@ -722,6 +897,9 @@ void ScopBuilder::buildStmts(Region &SR) {
case GranularityChoice::BasicBlocks:
buildSequentialBlockStmts(BB);
break;
case GranularityChoice::ScalarIndepependence:
buildEqivClassBlockStmts(BB);
break;
}
}
}
Expand Down
68 changes: 68 additions & 0 deletions polly/test/ScopInfo/granularity_scalar-indep.ll
@@ -0,0 +1,68 @@
; RUN: opt %loadPolly -polly-stmt-granularity=scalar-indep -polly-print-instructions -polly-scops -analyze < %s | FileCheck %s -match-full-lines
;
; Split a block into two independent statements that share no scalar.
; This case has the instructions of the two statements interleaved, such that
; splitting the BasicBlock in the middle would cause a scalar dependency.
;
; for (int j = 0; j < n; j += 1) {
; body:
; double valA = A[0];
; double valB = 21.0 + 21.0;
; A[0] = valA;
; B[0] = valB;
; }
;
define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) {
entry:
br label %for

for:
%j = phi i32 [0, %entry], [%j.inc, %inc]
%j.cmp = icmp slt i32 %j, %n
br i1 %j.cmp, label %body, label %exit

body:
%valA = load double, double* %A
%valB = fadd double 21.0, 21.0
store double %valA, double* %A
store double %valB, double* %B
br label %inc

inc:
%j.inc = add nuw nsw i32 %j, 1
br label %for

exit:
br label %return

return:
ret void
}


; CHECK: Statements {
; CHECK-NEXT: Stmt_body
; CHECK-NEXT: Domain :=
; CHECK-NEXT: [n] -> { Stmt_body[i0] : 0 <= i0 < n };
; CHECK-NEXT: Schedule :=
; CHECK-NEXT: [n] -> { Stmt_body[i0] -> [i0, 0] };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_A[0] };
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_A[0] };
; CHECK-NEXT: Instructions {
; CHECK-NEXT: %valA = load double, double* %A
; CHECK-NEXT: store double %valA, double* %A
; CHECK-NEXT: }
; CHECK-NEXT: Stmt_body1
; CHECK-NEXT: Domain :=
; CHECK-NEXT: [n] -> { Stmt_body1[i0] : 0 <= i0 < n };
; CHECK-NEXT: Schedule :=
; CHECK-NEXT: [n] -> { Stmt_body1[i0] -> [i0, 1] };
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [n] -> { Stmt_body1[i0] -> MemRef_B[0] };
; CHECK-NEXT: Instructions {
; CHECK-NEXT: %valB = fadd double 2.100000e+01, 2.100000e+01
; CHECK-NEXT: store double %valB, double* %B
; CHECK-NEXT: }
; CHECK-NEXT: }
68 changes: 68 additions & 0 deletions polly/test/ScopInfo/granularity_scalar-indep_epilogue.ll
@@ -0,0 +1,68 @@
; RUN: opt %loadPolly -polly-stmt-granularity=scalar-indep -polly-print-instructions -polly-scops -analyze < %s | FileCheck %s -match-full-lines
;
; Split a block into two independent statements that share no scalar.
; This case has an independent statement just for PHI writes.
;
; for (int j = 0; j < n; j += 1) {
; bodyA:
; double valA = A[0];
; A[0] = valA;
;
; bodyB:
; phi = 42.0;
; }
;
define void @func(i32 %n, double* noalias nonnull %A) {
entry:
br label %for

for:
%j = phi i32 [0, %entry], [%j.inc, %inc]
%j.cmp = icmp slt i32 %j, %n
br i1 %j.cmp, label %bodyA, label %exit

bodyA:
%valA = load double, double* %A
store double %valA, double* %A
br label %bodyB

bodyB:
%phi = phi double [42.0, %bodyA]
br label %inc

inc:
%j.inc = add nuw nsw i32 %j, 1
br label %for

exit:
br label %return

return:
ret void
}


; CHECK: Statements {
; CHECK-NEXT: Stmt_bodyA
; CHECK-NEXT: Domain :=
; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] : 0 <= i0 < n };
; CHECK-NEXT: Schedule :=
; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> [i0, 0] };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_A[0] };
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_A[0] };
; CHECK-NEXT: Instructions {
; CHECK-NEXT: %valA = load double, double* %A
; CHECK-NEXT: store double %valA, double* %A
; CHECK-NEXT: }
; CHECK-NEXT: Stmt_bodyA1
; CHECK-NEXT: Domain :=
; CHECK-NEXT: [n] -> { Stmt_bodyA1[i0] : 0 <= i0 < n };
; CHECK-NEXT: Schedule :=
; CHECK-NEXT: [n] -> { Stmt_bodyA1[i0] -> [i0, 1] };
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: [n] -> { Stmt_bodyA1[i0] -> MemRef_phi__phi[] };
; CHECK-NEXT: Instructions {
; CHECK-NEXT: }
; CHECK-NEXT: }

0 comments on commit cc345e6

Please sign in to comment.