Skip to content

Commit

Permalink
Specify the default values of the cache parameters
Browse files Browse the repository at this point in the history
If the parameters of the target cache (i.e., cache level sizes, cache level
associativities) are not specified or have wrong values, we use ones for
parameters of the macro-kernel and do not perform data-layout optimizations of
the matrix multiplication. In this patch we specify the default values of the
cache parameters to be able to apply the pattern matching optimizations even in
this case. Since there is no typical values of this parameters, we use the
parameters of Intel Core i7-3820 SandyBridge that also help to attain the
high-performance on IBM POWER System S822 and IBM Power 730 Express server.

Reviewed-by: Tobias Grosser <tobias@grosser.es>

Differential Revision: https://reviews.llvm.org/D28090

llvm-svn: 290518
  • Loading branch information
gareevroman committed Dec 25, 2016
1 parent 86602e8 commit 1c2927b
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 23 deletions.
54 changes: 35 additions & 19 deletions polly/lib/Transform/ScheduleOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,16 +134,33 @@ static cl::opt<int> ThroughputVectorFma(
"instructions per clock cycle."),
cl::Hidden, cl::init(1), cl::ZeroOrMore, cl::cat(PollyCategory));

static cl::list<int>
CacheLevelAssociativity("polly-target-cache-level-associativity",
cl::desc("The associativity of each cache level."),
cl::Hidden, cl::ZeroOrMore, cl::CommaSeparated,
cl::cat(PollyCategory));

static cl::list<int> CacheLevelSizes(
"polly-target-cache-level-sizes",
cl::desc("The size of each cache level specified in bytes."), cl::Hidden,
cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory));
// This option, along with --polly-target-2nd-cache-level-associativity,
// --polly-target-1st-cache-level-size, and --polly-target-2st-cache-level-size
// represent the parameters of the target cache, which do not have typical
// values that can be used by default. However, to apply the pattern matching
// optimizations, we use the values of the parameters of Intel Core i7-3820
// SandyBridge in case the parameters are not specified. Such an approach helps
// also to attain the high-performance on IBM POWER System S822 and IBM Power
// 730 Express server.
static cl::opt<int> FirstCacheLevelAssociativity(
"polly-target-1st-cache-level-associativity",
cl::desc("The associativity of the first cache level."), cl::Hidden,
cl::init(8), cl::ZeroOrMore, cl::cat(PollyCategory));

static cl::opt<int> SecondCacheLevelAssociativity(
"polly-target-2nd-cache-level-associativity",
cl::desc("The associativity of the second cache level."), cl::Hidden,
cl::init(8), cl::ZeroOrMore, cl::cat(PollyCategory));

static cl::opt<int> FirstCacheLevelSize(
"polly-target-1st-cache-level-size",
cl::desc("The size of the first cache level specified in bytes."),
cl::Hidden, cl::init(32768), cl::ZeroOrMore, cl::cat(PollyCategory));

static cl::opt<int> SecondCacheLevelSize(
"polly-target-2nd-cache-level-size",
cl::desc("The size of the second level specified in bytes."), cl::Hidden,
cl::init(262144), cl::ZeroOrMore, cl::cat(PollyCategory));

static cl::opt<int> FirstLevelDefaultTileSize(
"polly-default-tile-size",
Expand Down Expand Up @@ -612,21 +629,20 @@ getMacroKernelParams(const MicroKernelParamsTy &MicroKernelParams) {
// degree of a cache level is greater than two. Otherwise, another algorithm
// for determination of the parameters should be used.
if (!(MicroKernelParams.Mr > 0 && MicroKernelParams.Nr > 0 &&
CacheLevelSizes.size() >= 2 && CacheLevelAssociativity.size() >= 2 &&
CacheLevelSizes[0] > 0 && CacheLevelSizes[1] > 0 &&
CacheLevelAssociativity[0] > 2 && CacheLevelAssociativity[1] > 2))
FirstCacheLevelSize > 0 && SecondCacheLevelSize > 0 &&
FirstCacheLevelAssociativity > 2 && SecondCacheLevelAssociativity > 2))
return {1, 1, 1};
// The quotient should be greater than zero.
if (PollyPatternMatchingNcQuotient <= 0)
return {1, 1, 1};
int Car = floor(
(CacheLevelAssociativity[0] - 1) /
(FirstCacheLevelAssociativity - 1) /
(1 + static_cast<double>(MicroKernelParams.Nr) / MicroKernelParams.Mr));
int Kc = (Car * CacheLevelSizes[0]) /
(MicroKernelParams.Mr * CacheLevelAssociativity[0] * 8);
double Cac = static_cast<double>(Kc * 8 * CacheLevelAssociativity[1]) /
CacheLevelSizes[1];
int Mc = floor((CacheLevelAssociativity[1] - 2) / Cac);
int Kc = (Car * FirstCacheLevelSize) /
(MicroKernelParams.Mr * FirstCacheLevelAssociativity * 8);
double Cac = static_cast<double>(Kc * 8 * SecondCacheLevelAssociativity) /
SecondCacheLevelSize;
int Mc = floor((SecondCacheLevelAssociativity - 2) / Cac);
int Nc = PollyPatternMatchingNcQuotient * MicroKernelParams.Nr;
return {Mc, Nc, Kc};
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: opt %loadPolly -polly-opt-isl -polly-pattern-matching-based-opts=true -polly-target-throughput-vector-fma=1 -polly-target-latency-vector-fma=8 -polly-target-cache-level-associativity=8,8 -polly-target-cache-level-sizes=32768,262144 -polly-optimized-scops < %s 2>&1 | FileCheck %s
; RUN: opt %loadPolly -polly-opt-isl -polly-pattern-matching-based-opts=true -polly-target-throughput-vector-fma=1 -polly-target-latency-vector-fma=8 -polly-target-1st-cache-level-associativity=8 -polly-target-2nd-cache-level-associativity=8 -polly-target-1st-cache-level-size=32768 -polly-target-2nd-cache-level-size=262144 -polly-optimized-scops < %s 2>&1 | FileCheck %s
;
; /* C := alpha*A*B + beta*C */
; for (i = 0; i < _PB_NI; i++)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: opt %loadPolly -polly-opt-isl -polly-pattern-matching-based-opts=true -polly-target-throughput-vector-fma=1 -polly-target-latency-vector-fma=8 -polly-target-cache-level-associativity=8,8 -polly-target-cache-level-sizes=32768,262144 -polly-ast -analyze < %s | FileCheck %s
; RUN: opt %loadPolly -polly-opt-isl -polly-pattern-matching-based-opts=true -polly-target-throughput-vector-fma=1 -polly-target-latency-vector-fma=8 -polly-target-1st-cache-level-associativity=8 -polly-target-2nd-cache-level-associativity=8 -polly-target-1st-cache-level-size=32768 -polly-target-2nd-cache-level-size=262144 -polly-ast -analyze < %s | FileCheck %s
;
; /* C := alpha*A*B + beta*C */
; /* _PB_NK % Kc != 0 */
Expand Down
4 changes: 2 additions & 2 deletions polly/test/ScheduleOptimizer/pattern-matching-based-opts_3.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; RUN: opt %loadPolly -polly-opt-isl -polly-pattern-matching-based-opts=true -polly-target-throughput-vector-fma=1 -polly-target-latency-vector-fma=8 -analyze -polly-ast < %s 2>&1 | FileCheck %s
; RUN: opt %loadPolly -polly-opt-isl -polly-pattern-matching-based-opts=true -polly-target-throughput-vector-fma=1 -polly-target-latency-vector-fma=8 -analyze -polly-ast -polly-target-cache-level-associativity=8,8 -polly-target-cache-level-sizes=32768,262144 < %s 2>&1 | FileCheck %s --check-prefix=EXTRACTION-OF-MACRO-KERNEL
; RUN: opt %loadPolly -polly-opt-isl -polly-pattern-matching-based-opts=true -polly-target-throughput-vector-fma=1 -polly-target-latency-vector-fma=8 -analyze -polly-ast -polly-target-1st-cache-level-size=0 < %s 2>&1 | FileCheck %s
; RUN: opt %loadPolly -polly-opt-isl -polly-pattern-matching-based-opts=true -polly-target-throughput-vector-fma=1 -polly-target-latency-vector-fma=8 -analyze -polly-ast -polly-target-1st-cache-level-associativity=8 -polly-target-2nd-cache-level-associativity=8 -polly-target-1st-cache-level-size=32768 -polly-target-2nd-cache-level-size=262144 < %s 2>&1 | FileCheck %s --check-prefix=EXTRACTION-OF-MACRO-KERNEL
;
; /* C := alpha*A*B + beta*C */
; for (i = 0; i < _PB_NI; i++)
Expand Down

0 comments on commit 1c2927b

Please sign in to comment.