Skip to content

Commit

Permalink
[LoopUnroll] Use llvm.experimental.noalias.scope.decl for duplicating…
Browse files Browse the repository at this point in the history
… noalias metadata as needed

This is a fix for https://bugs.llvm.org/show_bug.cgi?id=39282. Compared to D90104, this version is based on part of the full restrict patched (D68484) and uses the `@llvm.experimental.noalias.scope.decl` intrinsic to track the location where !noalias and !alias.scope scopes have been introduced. This allows us to only duplicate the scopes that are really needed.

Notes:
- it also includes changes and tests from D90104

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D92887
  • Loading branch information
dobbelaj-snps committed Jan 24, 2021
1 parent b3d7e76 commit 7746296
Show file tree
Hide file tree
Showing 6 changed files with 269 additions and 13 deletions.
6 changes: 6 additions & 0 deletions llvm/include/llvm/IR/Metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -1208,6 +1208,12 @@ class AliasScopeNode {
return nullptr;
return dyn_cast_or_null<MDNode>(Node->getOperand(1));
}
StringRef getName() const {
if (Node->getNumOperands() > 2)
if (MDString *N = dyn_cast_or_null<MDString>(Node->getOperand(2)))
return N->getString();
return StringRef();
}
};

/// Typed iterator through MDNode operands.
Expand Down
34 changes: 34 additions & 0 deletions llvm/include/llvm/Transforms/Utils/Cloning.h
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,40 @@ void updateProfileCallee(
Function *Callee, int64_t entryDelta,
const ValueMap<const Value *, WeakTrackingVH> *VMap = nullptr);

/// Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified
/// basic blocks and extract their scope. These are candidates for duplication
/// when cloning.
void identifyNoAliasScopesToClone(
ArrayRef<BasicBlock *> BBs,
SmallVectorImpl<MetadataAsValue *> &NoAliasDeclScopes);

/// Duplicate the specified list of noalias decl scopes.
/// The 'Ext' string is added as an extension to the name.
/// Afterwards, the ClonedMVScopes contains a mapping of the original MV onto
/// the cloned version.
/// The ClonedScopes contains the mapping of the original scope MDNode onto the
/// cloned scope.
/// Be aware that the cloned scopes are still part of the original scope domain.
void cloneNoAliasScopes(
ArrayRef<MetadataAsValue *> NoAliasDeclScopes,
DenseMap<MDNode *, MDNode *> &ClonedScopes,
DenseMap<MetadataAsValue *, MetadataAsValue *> &ClonedMVScopes,
StringRef Ext, LLVMContext &Context);

/// Adapt the metadata for the specified instruction according to the
/// provided mapping. This is normally used after cloning an instruction, when
/// some noalias scopes needed to be cloned.
void adaptNoAliasScopes(
llvm::Instruction *I, const DenseMap<MDNode *, MDNode *> &ClonedScopes,
const DenseMap<MetadataAsValue *, MetadataAsValue *> &ClonedMVScopes,
LLVMContext &Context);

/// Clone the specified noalias decl scopes. Then adapt all instructions in the
/// NewBlocks basicblocks to the cloned versions.
/// 'Ext' will be added to the duplicate scope names.
void cloneAndAdaptNoAliasScopes(ArrayRef<MetadataAsValue *> NoAliasDeclScopes,
ArrayRef<BasicBlock *> NewBlocks,
LLVMContext &Context, StringRef Ext);
} // end namespace llvm

#endif // LLVM_TRANSFORMS_UTILS_CLONING_H
96 changes: 96 additions & 0 deletions llvm/lib/Transforms/Utils/CloneFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
Expand All @@ -36,6 +37,8 @@
#include <map>
using namespace llvm;

#define DEBUG_TYPE "clone-function"

/// See comments in Cloning.h.
BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
const Twine &NameSuffix, Function *F,
Expand Down Expand Up @@ -881,3 +884,96 @@ BasicBlock *llvm::DuplicateInstructionsInSplitBetween(

return NewBB;
}

void llvm::cloneNoAliasScopes(
ArrayRef<MetadataAsValue *> NoAliasDeclScopes,
DenseMap<MDNode *, MDNode *> &ClonedScopes,
DenseMap<MetadataAsValue *, MetadataAsValue *> &ClonedMVScopes,
StringRef Ext, LLVMContext &Context) {
MDBuilder MDB(Context);

for (auto *MV : NoAliasDeclScopes) {
SmallVector<Metadata *, 4> ScopeList;
for (auto &MDOperand : cast<MDNode>(MV->getMetadata())->operands()) {
if (MDNode *MD = dyn_cast<MDNode>(MDOperand)) {
AliasScopeNode SNANode(MD);

std::string Name;
auto ScopeName = SNANode.getName();
if (!ScopeName.empty())
Name = (Twine(ScopeName) + ":" + Ext).str();
else
Name = std::string(Ext);

MDNode *NewScope = MDB.createAnonymousAliasScope(
const_cast<MDNode *>(SNANode.getDomain()), Name);
ClonedScopes.insert(std::make_pair(MD, NewScope));
ScopeList.push_back(NewScope);
}
}
MDNode *NewScopeList = MDNode::get(Context, ScopeList);
ClonedMVScopes.insert(
std::make_pair(MV, MetadataAsValue::get(Context, NewScopeList)));
}
}

void llvm::adaptNoAliasScopes(
Instruction *I, const DenseMap<MDNode *, MDNode *> &ClonedScopes,
const DenseMap<MetadataAsValue *, MetadataAsValue *> &ClonedMVScopes,
LLVMContext &Context) {
// MetadataAsValue will always be replaced !
for (Use &U : I->operands())
if (MetadataAsValue *MV = dyn_cast<MetadataAsValue>(U))
if (auto *NewMV = ClonedMVScopes.lookup(MV))
U.set(NewMV);

auto replaceWhenNeeded = [&](unsigned MD_ID) {
if (const MDNode *CSNoAlias = I->getMetadata(MD_ID)) {
bool NeedsReplacement = false;
SmallVector<Metadata *, 8> NewScopeList;
for (auto &MDOp : CSNoAlias->operands()) {
if (MDNode *MD = dyn_cast<MDNode>(MDOp)) {
if (auto *NewMD = ClonedScopes.lookup(MD)) {
NewScopeList.push_back(NewMD);
NeedsReplacement = true;
continue;
}
NewScopeList.push_back(MD);
}
}
if (NeedsReplacement)
I->setMetadata(MD_ID, MDNode::get(Context, NewScopeList));
}
};
replaceWhenNeeded(LLVMContext::MD_noalias);
replaceWhenNeeded(LLVMContext::MD_alias_scope);
}

void llvm::cloneAndAdaptNoAliasScopes(
ArrayRef<MetadataAsValue *> NoAliasDeclScopes,
ArrayRef<BasicBlock *> NewBlocks, LLVMContext &Context, StringRef Ext) {
if (NoAliasDeclScopes.empty())
return;

DenseMap<MDNode *, MDNode *> ClonedScopes;
DenseMap<MetadataAsValue *, MetadataAsValue *> ClonedMVScopes;
LLVM_DEBUG(dbgs() << "cloneAndAdaptNoAliasScopes: cloning "
<< NoAliasDeclScopes.size() << " node(s)\n");

cloneNoAliasScopes(NoAliasDeclScopes, ClonedScopes, ClonedMVScopes, Ext,
Context);
// Identify instructions using metadata that needs adaptation
for (BasicBlock *NewBlock : NewBlocks)
for (Instruction &I : *NewBlock)
adaptNoAliasScopes(&I, ClonedScopes, ClonedMVScopes, Context);
}

void llvm::identifyNoAliasScopesToClone(
ArrayRef<BasicBlock *> BBs,
SmallVectorImpl<MetadataAsValue *> &NoAliasDeclScopes) {
for (BasicBlock *BB : BBs)
for (Instruction &I : *BB)
if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
NoAliasDeclScopes.push_back(cast<MetadataAsValue>(
Decl->getOperand(Intrinsic::NoAliasScopeDeclScopeArg)));
}
14 changes: 14 additions & 0 deletions llvm/lib/Transforms/Utils/LoopUnroll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -590,6 +590,11 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
<< DIL->getFilename() << " Line: " << DIL->getLine());
}

// Identify what noalias metadata is inside the loop: if it is inside the
// loop, the associated metadata must be cloned for each iteration.
SmallVector<MetadataAsValue *, 6> LoopLocalNoAliasDeclScopes;
identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);

for (unsigned It = 1; It != ULO.Count; ++It) {
SmallVector<BasicBlock *, 8> NewBlocks;
SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
Expand Down Expand Up @@ -683,6 +688,15 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
AC->registerAssumption(II);
}
}

{
// Identify what other metadata depends on the cloned version. After
// cloning, replace the metadata with the corrected version for both
// memory instructions and noalias intrinsics.
std::string ext = (Twine("It") + Twine(It)).str();
cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks,
Header->getContext(), ext);
}
}

// Loop over the PHI nodes in the original block, setting incoming values.
Expand Down
103 changes: 103 additions & 0 deletions llvm/test/Transforms/LoopUnroll/noalias.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -loop-unroll -unroll-count=4 < %s | FileCheck %s

define void @test_inside(i32* %addr1, i32* %addr2) {
; CHECK-LABEL: @test_inside(
; CHECK-NEXT: start:
; CHECK-NEXT: br label [[BODY:%.*]]
; CHECK: body:
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !0)
; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[ADDR1:%.*]], align 4, !alias.scope !0
; CHECK-NEXT: store i32 [[X]], i32* [[ADDR2:%.*]], align 4, !noalias !0
; CHECK-NEXT: [[ADDR1I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR1]], i32 1
; CHECK-NEXT: [[ADDR2I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR2]], i32 1
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !3)
; CHECK-NEXT: [[X_1:%.*]] = load i32, i32* [[ADDR1I_1]], align 4, !alias.scope !3
; CHECK-NEXT: store i32 [[X_1]], i32* [[ADDR2I_1]], align 4, !noalias !3
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !5)
; CHECK-NEXT: [[X_2:%.*]] = load i32, i32* [[ADDR1]], align 4, !alias.scope !5
; CHECK-NEXT: store i32 [[X_2]], i32* [[ADDR2]], align 4, !noalias !5
; CHECK-NEXT: [[ADDR1I_3:%.*]] = getelementptr inbounds i32, i32* [[ADDR1]], i32 1
; CHECK-NEXT: [[ADDR2I_3:%.*]] = getelementptr inbounds i32, i32* [[ADDR2]], i32 1
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !7)
; CHECK-NEXT: [[X_3:%.*]] = load i32, i32* [[ADDR1I_3]], align 4, !alias.scope !7
; CHECK-NEXT: store i32 [[X_3]], i32* [[ADDR2I_3]], align 4, !noalias !7
; CHECK-NEXT: ret void
;
start:
br label %body

body:
%i = phi i32 [ 0, %start ], [ %i2, %body ]
%j = and i32 %i, 1
%addr1i = getelementptr inbounds i32, i32* %addr1, i32 %j
%addr2i = getelementptr inbounds i32, i32* %addr2, i32 %j

call void @llvm.experimental.noalias.scope.decl(metadata !2)
%x = load i32, i32* %addr1i, !alias.scope !2
store i32 %x, i32* %addr2i, !noalias !2

%i2 = add i32 %i, 1
%cmp = icmp slt i32 %i2, 4
br i1 %cmp, label %body, label %end

end:
ret void
}

define void @test_outside(i32* %addr1, i32* %addr2) {
; CHECK-LABEL: @test_outside(
; CHECK-NEXT: start:
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !0)
; CHECK-NEXT: br label [[BODY:%.*]]
; CHECK: body:
; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[ADDR1:%.*]], align 4, !alias.scope !0
; CHECK-NEXT: store i32 [[X]], i32* [[ADDR2:%.*]], align 4, !noalias !0
; CHECK-NEXT: [[ADDR1I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR1]], i32 1
; CHECK-NEXT: [[ADDR2I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR2]], i32 1
; CHECK-NEXT: [[X_1:%.*]] = load i32, i32* [[ADDR1I_1]], align 4, !alias.scope !0
; CHECK-NEXT: store i32 [[X_1]], i32* [[ADDR2I_1]], align 4, !noalias !0
; CHECK-NEXT: [[X_2:%.*]] = load i32, i32* [[ADDR1]], align 4, !alias.scope !0
; CHECK-NEXT: store i32 [[X_2]], i32* [[ADDR2]], align 4, !noalias !0
; CHECK-NEXT: [[ADDR1I_3:%.*]] = getelementptr inbounds i32, i32* [[ADDR1]], i32 1
; CHECK-NEXT: [[ADDR2I_3:%.*]] = getelementptr inbounds i32, i32* [[ADDR2]], i32 1
; CHECK-NEXT: [[X_3:%.*]] = load i32, i32* [[ADDR1I_3]], align 4, !alias.scope !0
; CHECK-NEXT: store i32 [[X_3]], i32* [[ADDR2I_3]], align 4, !noalias !0
; CHECK-NEXT: ret void
;
start:
call void @llvm.experimental.noalias.scope.decl(metadata !2)
br label %body

body:
%i = phi i32 [ 0, %start ], [ %i2, %body ]
%j = and i32 %i, 1
%addr1i = getelementptr inbounds i32, i32* %addr1, i32 %j
%addr2i = getelementptr inbounds i32, i32* %addr2, i32 %j

%x = load i32, i32* %addr1i, !alias.scope !2
store i32 %x, i32* %addr2i, !noalias !2

%i2 = add i32 %i, 1
%cmp = icmp slt i32 %i2, 4
br i1 %cmp, label %body, label %end

end:
ret void
}

declare void @llvm.experimental.noalias.scope.decl(metadata)

!0 = distinct !{!0}
!1 = distinct !{!1, !0}
!2 = !{!1}

; CHECK: !0 = !{!1}
; CHECK: !1 = distinct !{!1, !2}
; CHECK: !2 = distinct !{!2}
; CHECK: !3 = !{!4}
; CHECK: !4 = distinct !{!4, !2, !"It1"}
; CHECK: !5 = !{!6}
; CHECK: !6 = distinct !{!6, !2, !"It2"}
; CHECK: !7 = !{!8}
; CHECK: !8 = distinct !{!8, !2, !"It3"}
29 changes: 16 additions & 13 deletions llvm/test/Transforms/PhaseOrdering/pr39282.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,24 +15,27 @@ define void @copy(i32* noalias %to, i32* noalias %from) {

; Consider that %addr1 = %addr2 + 1, in which case %addr2i and %addr1i are
; noalias within one iteration, but may alias across iterations.
; TODO: This is a micompile.
define void @pr39282(i32* %addr1, i32* %addr2) {
; CHECK-LABEL: @pr39282(
; CHECK-NEXT: start:
; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl([[META0:metadata !.*]])
; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl([[META3:metadata !.*]])
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl
; CHECK-NEXT: [[X_I:%.*]] = load i32, i32* [[ADDR1:%.*]], align 4, !alias.scope !3, !noalias !0
; CHECK-NEXT: store i32 [[X_I]], i32* [[ADDR2:%.*]], align 4, !alias.scope !0, !noalias !3
; CHECK-NEXT: [[ADDR1I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR1]], i64 1
; CHECK-NEXT: [[ADDR2I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR2:%.*]], i64 1
; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl([[META0]])
; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl([[META3]])
; CHECK-NEXT: [[X_I_1:%.*]] = load i32, i32* [[ADDR1I_1]], align 4, !alias.scope !3, !noalias !0
; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl([[META0]])
; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl([[META3]])
; CHECK-NEXT: store i32 [[X_I]], i32* [[ADDR2]], align 4, !alias.scope !0, !noalias !3
; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl([[META0]])
; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl([[META3]])
; CHECK-NEXT: store i32 [[X_I_1]], i32* [[ADDR2I_1]], align 4, !alias.scope !0, !noalias !3
; CHECK-NEXT: [[ADDR2I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR2]], i64 1
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl
; CHECK-NEXT: [[X_I_1:%.*]] = load i32, i32* [[ADDR1I_1]], align 4, !alias.scope !7, !noalias !5
; CHECK-NEXT: store i32 [[X_I_1]], i32* [[ADDR2I_1]], align 4, !alias.scope !5, !noalias !7
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl
; CHECK-NEXT: [[X_I_2:%.*]] = load i32, i32* [[ADDR1]], align 4, !alias.scope !11, !noalias !9
; CHECK-NEXT: store i32 [[X_I_2]], i32* [[ADDR2]], align 4, !alias.scope !9, !noalias !11
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl
; CHECK-NEXT: [[X_I_3:%.*]] = load i32, i32* [[ADDR1I_1]], align 4, !alias.scope !15, !noalias !13
; CHECK-NEXT: store i32 [[X_I_3]], i32* [[ADDR2I_1]], align 4, !alias.scope !13, !noalias !15
; CHECK-NEXT: ret void
;
start:
Expand Down

0 comments on commit 7746296

Please sign in to comment.