Skip to content

Commit

Permalink
Use SCEV information for the second level aliasing
Browse files Browse the repository at this point in the history
We introduce another level of alias metadata to distinguish the individual
non-aliasing accesses that have inter iteration alias-free base pointers
marked with "Inter iteration alias-free" mark nodes. To distinguish two
accesses, the comparison of raw pointers representing base pointers is used.

In case of, for example, ublas's prod function that implements GEMM, and
DeLiCM we can get accesses to same location represented by different raw
pointers. Consequently, we create different alias sets that can prevent
accesses from, for example, being sinked or hoisted.

To avoid the issue, we compare the corresponding SCEV information instead
of the corresponding raw pointers.

Reviewed-by: Tobias Grosser <tobias@grosser.es>

Differential Revision: https://reviews.llvm.org/D35761

llvm-svn: 310380
  • Loading branch information
gareevroman committed Aug 8, 2017
1 parent 4aa1905 commit 1563f03
Show file tree
Hide file tree
Showing 5 changed files with 187 additions and 11 deletions.
6 changes: 3 additions & 3 deletions polly/include/polly/CodeGen/IRBuilder.h
Expand Up @@ -17,6 +17,7 @@

#include "llvm/ADT/MapVector.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/ValueMap.h"

Expand Down Expand Up @@ -115,11 +116,10 @@ class ScopAnnotator {
OtherAliasScopeListMap;

/// A map from pointers to second level alias scopes.
llvm::DenseMap<llvm::AssertingVH<llvm::Value>, llvm::MDNode *>
SecondLevelAliasScopeMap;
llvm::DenseMap<const llvm::SCEV *, llvm::MDNode *> SecondLevelAliasScopeMap;

/// A map from pointers to second level alias scope list of other pointers.
llvm::DenseMap<llvm::AssertingVH<llvm::Value>, llvm::MDNode *>
llvm::DenseMap<const llvm::SCEV *, llvm::MDNode *>
SecondLevelOtherAliasScopeListMap;

/// Inter iteration alias-free base pointers.
Expand Down
18 changes: 10 additions & 8 deletions polly/lib/CodeGen/IRBuilder.cpp
Expand Up @@ -140,29 +140,31 @@ static llvm::Value *getMemAccInstPointerOperand(Instruction *Inst) {

void ScopAnnotator::annotateSecondLevel(llvm::Instruction *Inst,
llvm::Value *BasePtr) {
auto *Ptr = getMemAccInstPointerOperand(Inst);
if (!Ptr)
auto *PtrSCEV = SE->getSCEV(getMemAccInstPointerOperand(Inst));
auto *BasePtrSCEV = SE->getPointerBase(PtrSCEV);

if (!PtrSCEV)
return;
auto SecondLevelAliasScope = SecondLevelAliasScopeMap.lookup(Ptr);
auto SecondLevelAliasScope = SecondLevelAliasScopeMap.lookup(PtrSCEV);
auto SecondLevelOtherAliasScopeList =
SecondLevelOtherAliasScopeListMap.lookup(Ptr);
SecondLevelOtherAliasScopeListMap.lookup(PtrSCEV);
if (!SecondLevelAliasScope) {
auto AliasScope = AliasScopeMap.lookup(BasePtr);
if (!AliasScope)
return;
LLVMContext &Ctx = SE->getContext();
SecondLevelAliasScope = getID(
Ctx, AliasScope, MDString::get(Ctx, "second level alias metadata"));
SecondLevelAliasScopeMap[Ptr] = SecondLevelAliasScope;
SecondLevelAliasScopeMap[PtrSCEV] = SecondLevelAliasScope;
Metadata *Args = {SecondLevelAliasScope};
auto SecondLevelBasePtrAliasScopeList =
SecondLevelAliasScopeMap.lookup(BasePtr);
SecondLevelAliasScopeMap[BasePtr] = MDNode::concatenate(
SecondLevelAliasScopeMap.lookup(BasePtrSCEV);
SecondLevelAliasScopeMap[BasePtrSCEV] = MDNode::concatenate(
SecondLevelBasePtrAliasScopeList, MDNode::get(Ctx, Args));
auto OtherAliasScopeList = OtherAliasScopeListMap.lookup(BasePtr);
SecondLevelOtherAliasScopeList = MDNode::concatenate(
OtherAliasScopeList, SecondLevelBasePtrAliasScopeList);
SecondLevelOtherAliasScopeListMap[Ptr] = SecondLevelOtherAliasScopeList;
SecondLevelOtherAliasScopeListMap[PtrSCEV] = SecondLevelOtherAliasScopeList;
}
Inst->setMetadata("alias.scope", SecondLevelAliasScope);
Inst->setMetadata("noalias", SecondLevelOtherAliasScopeList);
Expand Down
@@ -0,0 +1,55 @@
{
"arrays" : [
{
"name" : "MemRef_C1",
"sizes" : [ "*" ],
"type" : "double"
},
{
"name" : "MemRef_A",
"sizes" : [ "*", "1024" ],
"type" : "double"
},
{
"name" : "MemRef_B",
"sizes" : [ "*", "1024" ],
"type" : "double"
},
{
"name" : "MemRef_C",
"sizes" : [ "*", "1024" ],
"type" : "double"
}
],
"context" : "{ : }",
"name" : "%for.body---%for.end24",
"statements" : [
{
"accesses" : [
{
"kind" : "read",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_C1[0] }"
},
{
"kind" : "read",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0, i2] }"
},
{
"kind" : "read",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_B[i2, i1] }"
},
{
"kind" : "read",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_C[i0, i1] }"
},
{
"kind" : "write",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_C[i0, i1] }"
}
],
"domain" : "{ Stmt_for_body6[i0, i1, i2] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and 0 <= i2 <= 1023 }",
"name" : "Stmt_for_body6",
"schedule" : "{ Stmt_for_body6[i0, i1, i2] -> [i0, i1, i2] }"
}
]
}
@@ -0,0 +1,55 @@
{
"arrays" : [
{
"name" : "MemRef_C1",
"sizes" : [ "*" ],
"type" : "double"
},
{
"name" : "MemRef_A",
"sizes" : [ "*", "1024" ],
"type" : "double"
},
{
"name" : "MemRef_B",
"sizes" : [ "*", "1024" ],
"type" : "double"
},
{
"name" : "MemRef_C",
"sizes" : [ "*", "1024" ],
"type" : "double"
}
],
"context" : "{ : }",
"name" : "%for.body---%for.end24",
"statements" : [
{
"accesses" : [
{
"kind" : "read",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_C[i0, i1] }"
},
{
"kind" : "read",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0, i2] }"
},
{
"kind" : "read",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_B[i2, i1] }"
},
{
"kind" : "read",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_C1[0] }"
},
{
"kind" : "write",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_C[i0, i1] }"
}
],
"domain" : "{ Stmt_for_body6[i0, i1, i2] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and 0 <= i2 <= 1023 }",
"name" : "Stmt_for_body6",
"schedule" : "{ Stmt_for_body6[i0, i1, i2] -> [i0, i1, i2] }"
}
]
}
64 changes: 64 additions & 0 deletions polly/test/ScheduleOptimizer/pattern-matching-based-opts_14.ll
@@ -0,0 +1,64 @@
; RUN: opt %loadPolly -polly-import-jscop -polly-opt-isl \
; RUN: -polly-target-throughput-vector-fma=1 \
; RUN: -polly-target-latency-vector-fma=8 \
; RUN: -polly-target-1st-cache-level-associativity=8 \
; RUN: -polly-target-2nd-cache-level-associativity=8 \
; RUN: -polly-target-1st-cache-level-size=32768 \
; RUN: -polly-target-vector-register-bitwidth=256 \
; RUN: -polly-target-2nd-cache-level-size=262144 \
; RUN: -polly-import-jscop-postfix=transformed -polly-codegen -S < %s \
; RUN: | FileCheck %s
;
; Check that we do not create different alias sets for locations represented by
; different raw pointers.
;
; CHECK-NOT: !76 = distinct !{!76, !5, !"second level alias metadata"}
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"

define void @kernel_gemm(i32 %ni, i32 %nj, i32 %nk, [1024 x double]* %A, [1024 x double]* %B, [1024 x double]* %C, double* %C1) {
entry:
br label %entry.split

entry.split: ; preds = %entry
br label %for.body

for.body: ; preds = %for.inc22, %entry.split
%indvars.iv43 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next44, %for.inc22 ]
br label %for.body3

for.body3: ; preds = %for.inc19, %for.body
%indvars.iv40 = phi i64 [ 0, %for.body ], [ %indvars.iv.next41, %for.inc19 ]
br label %for.body6

for.body6: ; preds = %for.body6, %for.body3
%indvars.iv = phi i64 [ 0, %for.body3 ], [ %indvars.iv.next, %for.body6 ]
%tmp = load double, double* %C1, align 8
%arrayidx9 = getelementptr inbounds [1024 x double], [1024 x double]* %A, i64 %indvars.iv43, i64 %indvars.iv
%tmp1 = load double, double* %arrayidx9, align 8
%arrayidx13 = getelementptr inbounds [1024 x double], [1024 x double]* %B, i64 %indvars.iv, i64 %indvars.iv40
%tmp2 = load double, double* %arrayidx13, align 8
%mul = fmul double %tmp1, %tmp2
%add = fadd double %tmp, %mul
%arrayidx17 = getelementptr inbounds [1024 x double], [1024 x double]* %C, i64 %indvars.iv43, i64 %indvars.iv40
%tmp3 = load double, double* %arrayidx17, align 8
%add18 = fadd double %tmp3, %add
store double %add18, double* %arrayidx17, align 8
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp ne i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.body6, label %for.inc19

for.inc19: ; preds = %for.body6
%indvars.iv.next41 = add nuw nsw i64 %indvars.iv40, 1
%exitcond42 = icmp ne i64 %indvars.iv.next41, 1024
br i1 %exitcond42, label %for.body3, label %for.inc22

for.inc22: ; preds = %for.inc19
%indvars.iv.next44 = add nuw nsw i64 %indvars.iv43, 1
%exitcond45 = icmp ne i64 %indvars.iv.next44, 1024
br i1 %exitcond45, label %for.body, label %for.end24

for.end24: ; preds = %for.inc22
ret void
}

0 comments on commit 1563f03

Please sign in to comment.