13 changes: 13 additions & 0 deletions polly/lib/CodeGen/IslNodeBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ using namespace llvm;

STATISTIC(VersionedScops, "Number of SCoPs that required versioning.");

STATISTIC(SequentialLoops, "Number of generated sequential for-loops");
STATISTIC(ParallelLoops, "Number of generated parallel for-loops");
STATISTIC(VectorLoops, "Number of generated vector for-loops");
STATISTIC(IfConditions, "Number of generated if-conditions");

static cl::opt<bool> PollyGenerateRTCPrint(
"polly-codegen-emit-rtc-print",
cl::desc("Emit code that prints the runtime check result dynamically."),
Expand Down Expand Up @@ -480,6 +485,8 @@ void IslNodeBuilder::createForVector(__isl_take isl_ast_node *For,

isl_ast_node_free(For);
isl_ast_expr_free(Iterator);

VectorLoops++;
}

namespace {
Expand Down Expand Up @@ -571,6 +578,8 @@ void IslNodeBuilder::createForSequential(__isl_take isl_ast_node *For,
isl_ast_node_free(For);
isl_ast_expr_free(Iterator);
isl_id_free(IteratorID);

SequentialLoops++;
}

/// Remove the BBs contained in a (sub)function from the dominator tree.
Expand Down Expand Up @@ -720,6 +729,8 @@ void IslNodeBuilder::createForParallel(__isl_take isl_ast_node *For) {
isl_ast_node_free(For);
isl_ast_expr_free(Iterator);
isl_id_free(IteratorID);

ParallelLoops++;
}

/// Return whether any of @p Node's statements contain partial accesses.
Expand Down Expand Up @@ -813,6 +824,8 @@ void IslNodeBuilder::createIf(__isl_take isl_ast_node *If) {
Builder.SetInsertPoint(&MergeBB->front());

isl_ast_node_free(If);

IfConditions++;
}

__isl_give isl_id_to_ast_expr *
Expand Down
4 changes: 2 additions & 2 deletions polly/lib/Support/RegisterPasses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -328,13 +328,13 @@ void registerPollyPasses(llvm::legacy::PassManagerBase &PM) {
PM.add(polly::createPolyhedralInfoPass());

if (EnableSimplify)
PM.add(polly::createSimplifyPass());
PM.add(polly::createSimplifyPass(0));
if (EnableForwardOpTree)
PM.add(polly::createForwardOpTreePass());
if (EnableDeLICM)
PM.add(polly::createDeLICMPass());
if (EnableSimplify)
PM.add(polly::createSimplifyPass());
PM.add(polly::createSimplifyPass(1));

if (ImportJScop)
PM.add(polly::createJSONImporterPass());
Expand Down
18 changes: 18 additions & 0 deletions polly/lib/Transform/DeLICM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,16 @@ STATISTIC(MappedPHIScalars, "Number of mapped PHI scalars");
STATISTIC(TargetsMapped, "Number of stores used for at least one mapping");
STATISTIC(DeLICMScopsModified, "Number of SCoPs optimized");

STATISTIC(NumValueWrites, "Number of scalar value writes after DeLICM");
STATISTIC(NumValueWritesInLoops,
"Number of scalar value writes nested in affine loops after DeLICM");
STATISTIC(NumPHIWrites, "Number of scalar phi writes after DeLICM");
STATISTIC(NumPHIWritesInLoops,
"Number of scalar phi writes nested in affine loops after DeLICM");
STATISTIC(NumSingletonWrites, "Number of singleton writes after DeLICM");
STATISTIC(NumSingletonWritesInLoops,
"Number of singleton writes nested in affine loops after DeLICM");

isl::union_map computeReachingOverwrite(isl::union_map Schedule,
isl::union_map Writes,
bool InclPrevWrite,
Expand Down Expand Up @@ -1402,6 +1412,14 @@ class DeLICM : public ScopPass {

collapseToUnused(S);

auto ScopStats = S.getStatistics();
NumValueWrites += ScopStats.NumValueWrites;
NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
NumPHIWrites += ScopStats.NumPHIWrites;
NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
NumSingletonWrites += ScopStats.NumSingletonWrites;
NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;

return false;
}

Expand Down
19 changes: 19 additions & 0 deletions polly/lib/Transform/ForwardOpTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,16 @@ STATISTIC(TotalModifiedStmts,

STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree");

STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree");
STATISTIC(NumValueWritesInLoops,
"Number of scalar value writes nested in affine loops after OpTree");
STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree");
STATISTIC(NumPHIWritesInLoops,
"Number of scalar phi writes nested in affine loops after OpTree");
STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree");
STATISTIC(NumSingletonWritesInLoops,
"Number of singleton writes nested in affine loops after OpTree");

namespace {

/// The state of whether an operand tree was/can be forwarded.
Expand Down Expand Up @@ -844,6 +854,15 @@ class ForwardOpTree : public ScopPass {
DEBUG(dbgs() << "\nFinal Scop:\n");
DEBUG(dbgs() << S);

// Update statistics
auto ScopStats = S.getStatistics();
NumValueWrites += ScopStats.NumValueWrites;
NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
NumPHIWrites += ScopStats.NumPHIWrites;
NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
NumSingletonWrites += ScopStats.NumSingletonWrites;
NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;

return false;
}

Expand Down
100 changes: 97 additions & 3 deletions polly/lib/Transform/ScheduleOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,33 @@ static cl::opt<bool> OptimizedScops(
"transformations is applied on the schedule tree"),
cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));

STATISTIC(ScopsProcessed, "Number of scops processed");
STATISTIC(ScopsRescheduled, "Number of scops rescheduled");
STATISTIC(ScopsOptimized, "Number of scops optimized");

STATISTIC(NumAffineLoopsOptimized, "Number of affine loops optimized");
STATISTIC(NumBoxedLoopsOptimized, "Number of boxed loops optimized");

#define THREE_STATISTICS(VARNAME, DESC) \
static llvm::Statistic VARNAME[3] = { \
{DEBUG_TYPE, #VARNAME "0", DESC " (original)", {0}, false}, \
{DEBUG_TYPE, #VARNAME "1", DESC " (after scheduler)", {0}, false}, \
{DEBUG_TYPE, #VARNAME "2", DESC " (after optimizer)", {0}, false}}

THREE_STATISTICS(NumBands, "Number of bands");
THREE_STATISTICS(NumBandMembers, "Number of band members");
THREE_STATISTICS(NumCoincident, "Number of coincident band members");
THREE_STATISTICS(NumPermutable, "Number of permutable bands");
THREE_STATISTICS(NumFilters, "Number of filter nodes");
THREE_STATISTICS(NumExtension, "Number of extension nodes");

STATISTIC(FirstLevelTileOpts, "Number of first level tiling applied");
STATISTIC(SecondLevelTileOpts, "Number of second level tiling applied");
STATISTIC(RegisterTileOpts, "Number of register tiling applied");
STATISTIC(PrevectOpts, "Number of strip-mining for prevectorization applied");
STATISTIC(MatMulOpts,
"Number of matrix multiplication patterns detected and optimized");

/// Create an isl::union_set, which describes the isolate option based on
/// IsolateDomain.
///
Expand Down Expand Up @@ -368,6 +395,7 @@ isl::schedule_node ScheduleTreeOptimizer::prevectSchedBand(
if (isl_schedule_node_get_type(Node.get()) == isl_schedule_node_leaf)
Node = Node.parent();
auto LoopMarker = isl::id::alloc(Node.get_ctx(), "SIMD", nullptr);
PrevectOpts++;
return Node.insert_mark(LoopMarker);
}

Expand Down Expand Up @@ -456,17 +484,23 @@ bool ScheduleTreeOptimizer::isTileableBandNode(isl::schedule_node Node) {

__isl_give isl::schedule_node
ScheduleTreeOptimizer::standardBandOpts(isl::schedule_node Node, void *User) {
if (FirstLevelTiling)
if (FirstLevelTiling) {
Node = tileNode(Node, "1st level tiling", FirstLevelTileSizes,
FirstLevelDefaultTileSize);
FirstLevelTileOpts++;
}

if (SecondLevelTiling)
if (SecondLevelTiling) {
Node = tileNode(Node, "2nd level tiling", SecondLevelTileSizes,
SecondLevelDefaultTileSize);
SecondLevelTileOpts++;
}

if (RegisterTiling)
if (RegisterTiling) {
Node =
applyRegisterTiling(Node, RegisterTileSizes, RegisterDefaultTileSize);
RegisterTileOpts++;
}

if (PollyVectorizerChoice == VECTORIZER_NONE)
return Node;
Expand Down Expand Up @@ -1235,6 +1269,7 @@ ScheduleTreeOptimizer::optimizeBand(__isl_take isl_schedule_node *Node,
isMatrMultPattern(isl::manage(isl_schedule_node_copy(Node)), OAI->D,
MMI)) {
DEBUG(dbgs() << "The matrix multiplication pattern was detected\n");
MatMulOpts++;
return optimizeMatMulPattern(isl::manage(Node), OAI->TTI, MMI).release();
}

Expand Down Expand Up @@ -1308,6 +1343,52 @@ class IslScheduleOptimizer : public ScopPass {

char IslScheduleOptimizer::ID = 0;

/// Collect statistics for the schedule tree.
///
/// @param Schedule The schedule tree to analyze. If not a schedule tree it is
/// ignored.
/// @param Version The version of the schedule tree that is analyzed.
/// 0 for the original schedule tree before any transformation.
/// 1 for the schedule tree after isl's rescheduling.
/// 2 for the schedule tree after optimizations are applied
/// (tiling, pattern matching)
static void walkScheduleTreeForStatistics(isl::schedule Schedule, int Version) {
auto Root = Schedule.get_root();
if (!Root)
return;

Root.foreach_ancestor_top_down([Version](
isl::schedule_node Node) -> isl::stat {
switch (isl_schedule_node_get_type(Node.get())) {
case isl_schedule_node_band: {
NumBands[Version]++;
if (isl_schedule_node_band_get_permutable(Node.get()) == isl_bool_true)
NumPermutable[Version]++;

int CountMembers = isl_schedule_node_band_n_member(Node.get());
NumBandMembers[Version] += CountMembers;
for (int i = 0; i < CountMembers; i += 1) {
if (Node.band_member_get_coincident(i))
NumCoincident[Version]++;
}
} break;

case isl_schedule_node_filter:
NumFilters[Version]++;
break;

case isl_schedule_node_extension:
NumExtension[Version]++;
break;

default:
break;
}

return isl::stat::ok;
});
}

bool IslScheduleOptimizer::runOnScop(Scop &S) {

// Skip SCoPs in case they're already optimised by PPCGCodeGeneration
Expand Down Expand Up @@ -1352,6 +1433,9 @@ bool IslScheduleOptimizer::runOnScop(Scop &S) {
if (!Domain)
return false;

ScopsProcessed++;
walkScheduleTreeForStatistics(S.getScheduleTree(), 0);

isl::union_map Validity = give(D.getDependences(ValidityKinds));
isl::union_map Proximity = give(D.getDependences(ProximityKinds));

Expand Down Expand Up @@ -1432,11 +1516,15 @@ bool IslScheduleOptimizer::runOnScop(Scop &S) {
auto Schedule = SC.compute_schedule();
isl_options_set_on_error(Ctx, OnErrorStatus);

walkScheduleTreeForStatistics(Schedule, 1);

// In cases the scheduler is not able to optimize the code, we just do not
// touch the schedule.
if (!Schedule)
return false;

ScopsRescheduled++;

DEBUG({
auto *P = isl_printer_to_str(Ctx);
P = isl_printer_set_yaml_style(P, ISL_YAML_STYLE_BLOCK);
Expand All @@ -1451,10 +1539,16 @@ bool IslScheduleOptimizer::runOnScop(Scop &S) {
auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
const OptimizerAdditionalInfoTy OAI = {TTI, const_cast<Dependences *>(&D)};
auto NewSchedule = ScheduleTreeOptimizer::optimizeSchedule(Schedule, &OAI);
walkScheduleTreeForStatistics(NewSchedule, 1);

if (!ScheduleTreeOptimizer::isProfitableSchedule(S, NewSchedule))
return false;

auto ScopStats = S.getStatistics();
ScopsOptimized++;
NumAffineLoopsOptimized += ScopStats.NumAffineLoops;
NumBoxedLoopsOptimized += ScopStats.NumBoxedLoops;

S.setScheduleTree(NewSchedule.release());
S.markAsOptimized();

Expand Down
79 changes: 55 additions & 24 deletions polly/lib/Transform/Simplify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,25 +27,44 @@ using namespace polly;

namespace {

#define TWO_STATISTICS(VARNAME, DESC) \
static llvm::Statistic VARNAME[2] = { \
{DEBUG_TYPE, #VARNAME "0", DESC " (first)", {0}, false}, \
{DEBUG_TYPE, #VARNAME "1", DESC " (second)", {0}, false}}

/// Number of max disjuncts we allow in removeOverwrites(). This is to avoid
/// that the analysis of accesses in a statement is becoming too complex. Chosen
/// to be relatively small because all the common cases should access only few
/// array elements per statement.
static int const SimplifyMaxDisjuncts = 4;

STATISTIC(ScopsProcessed, "Number of SCoPs processed");
STATISTIC(ScopsModified, "Number of SCoPs simplified");

STATISTIC(TotalOverwritesRemoved, "Number of removed overwritten writes");
STATISTIC(TotalWritesCoalesced, "Number of writes coalesced with another");
STATISTIC(TotalRedundantWritesRemoved,
"Number of writes of same value removed in any SCoP");
STATISTIC(TotalEmptyPartialAccessesRemoved,
"Number of empty partial accesses removed");
STATISTIC(TotalDeadAccessesRemoved, "Number of dead accesses removed");
STATISTIC(TotalDeadInstructionsRemoved,
"Number of unused instructions removed");
STATISTIC(TotalStmtsRemoved, "Number of statements removed in any SCoP");
TWO_STATISTICS(ScopsProcessed, "Number of SCoPs processed");
TWO_STATISTICS(ScopsModified, "Number of SCoPs simplified");

TWO_STATISTICS(TotalOverwritesRemoved, "Number of removed overwritten writes");
TWO_STATISTICS(TotalWritesCoalesced, "Number of writes coalesced with another");
TWO_STATISTICS(TotalRedundantWritesRemoved,
"Number of writes of same value removed in any SCoP");
TWO_STATISTICS(TotalEmptyPartialAccessesRemoved,
"Number of empty partial accesses removed");
TWO_STATISTICS(TotalDeadAccessesRemoved, "Number of dead accesses removed");
TWO_STATISTICS(TotalDeadInstructionsRemoved,
"Number of unused instructions removed");
TWO_STATISTICS(TotalStmtsRemoved, "Number of statements removed in any SCoP");

TWO_STATISTICS(NumValueWrites, "Number of scalar value writes after Simplify");
TWO_STATISTICS(
NumValueWritesInLoops,
"Number of scalar value writes nested in affine loops after Simplify");
TWO_STATISTICS(NumPHIWrites,
"Number of scalar phi writes after the first simplification");
TWO_STATISTICS(
NumPHIWritesInLoops,
"Number of scalar phi writes nested in affine loops after Simplify");
TWO_STATISTICS(NumSingletonWrites, "Number of singleton writes after Simplify");
TWO_STATISTICS(
NumSingletonWritesInLoops,
"Number of singleton writes nested in affine loops after Simplify");

static bool isImplicitRead(MemoryAccess *MA) {
return MA->isRead() && MA->isOriginalScalarKind();
Expand Down Expand Up @@ -100,6 +119,10 @@ static isl::union_map underapproximatedAddMap(isl::union_map UMap,

class Simplify : public ScopPass {
private:
/// The invocation id (if there are multiple instances in the pass manager's
/// pipeline) to determine which statistics to update.
int CallNo;

/// The last/current SCoP that is/has been processed.
Scop *S;

Expand Down Expand Up @@ -176,7 +199,7 @@ class Simplify : public ScopPass {

Stmt.removeSingleMemoryAccess(MA);
OverwritesRemoved++;
TotalOverwritesRemoved++;
TotalOverwritesRemoved[CallNo]++;
}

// Unconditional writes overwrite other values.
Expand Down Expand Up @@ -315,7 +338,7 @@ class Simplify : public ScopPass {
// We removed MA, OtherMA takes its role.
MA = OtherMA;

TotalWritesCoalesced++;
TotalWritesCoalesced[CallNo]++;
WritesCoalesced++;

// Don't look for more candidates.
Expand Down Expand Up @@ -437,7 +460,7 @@ class Simplify : public ScopPass {
Stmt.removeSingleMemoryAccess(MA);

RedundantWritesRemoved++;
TotalRedundantWritesRemoved++;
TotalRedundantWritesRemoved[CallNo]++;
}
}
}
Expand Down Expand Up @@ -476,7 +499,7 @@ class Simplify : public ScopPass {
StmtsRemoved = NumStmtsBefore - S->getSize();
DEBUG(dbgs() << "Removed " << StmtsRemoved << " (of " << NumStmtsBefore
<< ") statements\n");
TotalStmtsRemoved += StmtsRemoved;
TotalStmtsRemoved[CallNo] += StmtsRemoved;
}

/// Remove accesses that have an empty domain.
Expand All @@ -501,7 +524,7 @@ class Simplify : public ScopPass {
for (MemoryAccess *MA : DeferredRemove) {
Stmt.removeSingleMemoryAccess(MA);
EmptyPartialAccessesRemoved++;
TotalEmptyPartialAccessesRemoved++;
TotalEmptyPartialAccessesRemoved[CallNo]++;
}
}
}
Expand Down Expand Up @@ -530,7 +553,7 @@ class Simplify : public ScopPass {
Stmt->removeSingleMemoryAccess(MA);

DeadAccessesRemoved++;
TotalDeadAccessesRemoved++;
TotalDeadAccessesRemoved[CallNo]++;
}

// Remove all non-reachable instructions.
Expand All @@ -548,7 +571,7 @@ class Simplify : public ScopPass {
DEBUG(dbgs() << "Removing "; Inst->print(dbgs());
dbgs() << " because it is not used\n");
DeadInstructionsRemoved++;
TotalDeadInstructionsRemoved++;
TotalDeadInstructionsRemoved[CallNo]++;
continue;
}

Expand Down Expand Up @@ -595,7 +618,7 @@ class Simplify : public ScopPass {

public:
static char ID;
explicit Simplify() : ScopPass(ID) {}
explicit Simplify(int CallNo = 0) : ScopPass(ID), CallNo(CallNo) {}

virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequiredTransitive<ScopInfoRegionPass>();
Expand All @@ -610,7 +633,7 @@ class Simplify : public ScopPass {

// Prepare processing of this SCoP.
this->S = &S;
ScopsProcessed++;
ScopsProcessed[CallNo]++;

DEBUG(dbgs() << "Removing partial writes that never happen...\n");
removeEmptyPartialAccesses();
Expand All @@ -632,10 +655,18 @@ class Simplify : public ScopPass {
removeUnnecessaryStmts();

if (isModified())
ScopsModified++;
ScopsModified[CallNo]++;
DEBUG(dbgs() << "\nFinal Scop:\n");
DEBUG(dbgs() << S);

auto ScopStats = S.getStatistics();
NumValueWrites[CallNo] += ScopStats.NumValueWrites;
NumValueWritesInLoops[CallNo] += ScopStats.NumValueWritesInLoops;
NumPHIWrites[CallNo] += ScopStats.NumPHIWrites;
NumPHIWritesInLoops[CallNo] += ScopStats.NumPHIWritesInLoops;
NumSingletonWrites[CallNo] += ScopStats.NumSingletonWrites;
NumSingletonWritesInLoops[CallNo] += ScopStats.NumSingletonWritesInLoops;

return false;
}

Expand Down Expand Up @@ -688,7 +719,7 @@ SmallVector<MemoryAccess *, 32> getAccessesInOrder(ScopStmt &Stmt) {
}
} // namespace polly

Pass *polly::createSimplifyPass() { return new Simplify(); }
Pass *polly::createSimplifyPass(int CallNo) { return new Simplify(CallNo); }

INITIALIZE_PASS_BEGIN(Simplify, "polly-simplify", "Polly - Simplify", false,
false)
Expand Down