Skip to content

Commit

Permalink
Implement strip.invariant.group
Browse files Browse the repository at this point in the history
Summary:
This patch introduce new intrinsic -
strip.invariant.group that was described in the
RFC: Devirtualization v2

Reviewers: rsmith, hfinkel, nlopes, sanjoy, amharc, kuhar

Subscribers: arsenm, nhaehnle, JDevlieghere, hiraditya, xbolva00, llvm-commits

Differential Revision: https://reviews.llvm.org/D47103

Co-authored-by: Krzysztof Pszeniczny <krzysztof.pszeniczny@gmail.com>
llvm-svn: 336073
  • Loading branch information
prazek and amharc committed Jul 2, 2018
1 parent 5305414 commit 5b3db45
Show file tree
Hide file tree
Showing 21 changed files with 296 additions and 48 deletions.
48 changes: 43 additions & 5 deletions llvm/docs/LangRef.rst
Expand Up @@ -13350,16 +13350,17 @@ Overview:
"""""""""

The '``llvm.launder.invariant.group``' intrinsic can be used when an invariant
established by invariant.group metadata no longer holds, to obtain a new pointer
value that does not carry the invariant information. It is an experimental
intrinsic, which means that its semantics might change in the future.
established by ``invariant.group`` metadata no longer holds, to obtain a new
pointer value that carries fresh invariant group information. It is an
experimental intrinsic, which means that its semantics might change in the
future.


Arguments:
""""""""""

The ``llvm.launder.invariant.group`` takes only one argument, which is
the pointer to the memory for which the ``invariant.group`` no longer holds.
The ``llvm.launder.invariant.group`` takes only one argument, which is a pointer
to the memory.

Semantics:
""""""""""
Expand All @@ -13368,6 +13369,43 @@ Returns another pointer that aliases its argument but which is considered differ
for the purposes of ``load``/``store`` ``invariant.group`` metadata.
It does not read any accessible memory and the execution can be speculated.

'``llvm.strip.invariant.group``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Syntax:
"""""""
This is an overloaded intrinsic. The memory object can belong to any address
space. The returned pointer must belong to the same address space as the
argument.

::

declare i8* @llvm.strip.invariant.group.p0i8(i8* <ptr>)

Overview:
"""""""""

The '``llvm.strip.invariant.group``' intrinsic can be used when an invariant
established by ``invariant.group`` metadata no longer holds, to obtain a new pointer
value that does not carry the invariant information. It is an experimental
intrinsic, which means that its semantics might change in the future.


Arguments:
""""""""""

The ``llvm.strip.invariant.group`` takes only one argument, which is a pointer
to the memory.

Semantics:
""""""""""

Returns another pointer that aliases its argument but which has no associated
``invariant.group`` metadata.
It does not read any memory and can be speculated.



.. _constrainedfp:

Constrained Floating-Point Intrinsics
Expand Down
29 changes: 29 additions & 0 deletions llvm/include/llvm/IR/IRBuilder.h
Expand Up @@ -2022,6 +2022,7 @@ class IRBuilder : public IRBuilderBase, public Inserter {
Value *CreateLaunderInvariantGroup(Value *Ptr) {
assert(isa<PointerType>(Ptr->getType()) &&
"launder.invariant.group only applies to pointers.");
// FIXME: we could potentially avoid casts to/from i8*.
auto *PtrType = Ptr->getType();
auto *Int8PtrTy = getInt8PtrTy(PtrType->getPointerAddressSpace());
if (PtrType != Int8PtrTy)
Expand All @@ -2042,6 +2043,34 @@ class IRBuilder : public IRBuilderBase, public Inserter {
return Fn;
}

/// \brief Create a strip.invariant.group intrinsic call. If Ptr type is
/// different from pointer to i8, it's casted to pointer to i8 in the same
/// address space before call and casted back to Ptr type after call.
Value *CreateStripInvariantGroup(Value *Ptr) {
assert(isa<PointerType>(Ptr->getType()) &&
"strip.invariant.group only applies to pointers.");

// FIXME: we could potentially avoid casts to/from i8*.
auto *PtrType = Ptr->getType();
auto *Int8PtrTy = getInt8PtrTy(PtrType->getPointerAddressSpace());
if (PtrType != Int8PtrTy)
Ptr = CreateBitCast(Ptr, Int8PtrTy);
Module *M = BB->getParent()->getParent();
Function *FnStripInvariantGroup = Intrinsic::getDeclaration(
M, Intrinsic::strip_invariant_group, {Int8PtrTy});

assert(FnStripInvariantGroup->getReturnType() == Int8PtrTy &&
FnStripInvariantGroup->getFunctionType()->getParamType(0) ==
Int8PtrTy &&
"StripInvariantGroup should take and return the same type");

CallInst *Fn = CreateCall(FnStripInvariantGroup, {Ptr});

if (PtrType != Int8PtrTy)
return CreateBitCast(Fn, PtrType);
return Fn;
}

/// Return a vector value that contains \arg V broadcasted to \p
/// NumElts elements.
Value *CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name = "") {
Expand Down
5 changes: 5 additions & 0 deletions llvm/include/llvm/IR/Intrinsics.td
Expand Up @@ -728,6 +728,11 @@ def int_launder_invariant_group : Intrinsic<[llvm_anyptr_ty],
[LLVMMatchType<0>],
[IntrInaccessibleMemOnly, IntrSpeculatable]>;


def int_strip_invariant_group : Intrinsic<[llvm_anyptr_ty],
[LLVMMatchType<0>],
[IntrSpeculatable, IntrNoMem]>;

//===------------------------ Stackmap Intrinsics -------------------------===//
//
def int_experimental_stackmap : Intrinsic<[],
Expand Down
16 changes: 9 additions & 7 deletions llvm/lib/Analysis/BasicAliasAnalysis.cpp
Expand Up @@ -431,13 +431,15 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op);
if (!GEPOp) {
if (auto CS = ImmutableCallSite(V)) {
// Note: getArgumentAliasingToReturnedPointer keeps it in sync with
// CaptureTracking, which is needed for correctness. This is because
// some intrinsics like launder.invariant.group returns pointers that
// are aliasing it's argument, which is known to CaptureTracking.
// If AliasAnalysis does not use the same information, it could assume
// that pointer returned from launder does not alias it's argument
// because launder could not return it if the pointer was not captured.
// CaptureTracking can know about special capturing properties of some
// intrinsics like launder.invariant.group, that can't be expressed with
// the attributes, but have properties like returning aliasing pointer.
// Because some analysis may assume that nocaptured pointer is not
// returned from some special intrinsic (because function would have to
// be marked with returns attribute), it is crucial to use this function
// because it should be in sync with CaptureTracking. Not using it may
// cause weird miscompilations where 2 aliasing pointers are assumed to
// noalias.
if (auto *RP = getArgumentAliasingToReturnedPointer(CS)) {
V = RP;
continue;
Expand Down
9 changes: 6 additions & 3 deletions llvm/lib/Analysis/ConstantFolding.cpp
Expand Up @@ -1393,6 +1393,7 @@ bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) {
case Intrinsic::fmuladd:
case Intrinsic::copysign:
case Intrinsic::launder_invariant_group:
case Intrinsic::strip_invariant_group:
case Intrinsic::round:
case Intrinsic::masked_load:
case Intrinsic::sadd_with_overflow:
Expand Down Expand Up @@ -1596,14 +1597,16 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
return Constant::getNullValue(Ty);
if (IntrinsicID == Intrinsic::bswap ||
IntrinsicID == Intrinsic::bitreverse ||
IntrinsicID == Intrinsic::launder_invariant_group)
IntrinsicID == Intrinsic::launder_invariant_group ||
IntrinsicID == Intrinsic::strip_invariant_group)
return Operands[0];
}

if (isa<ConstantPointerNull>(Operands[0]) &&
Operands[0]->getType()->getPointerAddressSpace() == 0) {
// launder(null) == null iff in addrspace 0
if (IntrinsicID == Intrinsic::launder_invariant_group)
// launder(null) == null == strip(null) iff in addrspace 0
if (IntrinsicID == Intrinsic::launder_invariant_group ||
IntrinsicID == Intrinsic::strip_invariant_group)
return Operands[0];
return nullptr;
}
Expand Down
21 changes: 12 additions & 9 deletions llvm/lib/Analysis/ValueTracking.cpp
Expand Up @@ -3404,8 +3404,9 @@ const Value *llvm::getArgumentAliasingToReturnedPointer(ImmutableCallSite CS) {
}

bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
ImmutableCallSite CS) {
return CS.getIntrinsicID() == Intrinsic::launder_invariant_group;
ImmutableCallSite CS) {
return CS.getIntrinsicID() == Intrinsic::launder_invariant_group ||
CS.getIntrinsicID() == Intrinsic::strip_invariant_group;
}

/// \p PN defines a loop-variant pointer to an object. Check if the
Expand Down Expand Up @@ -3454,13 +3455,15 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL,
return V;
} else {
if (auto CS = CallSite(V)) {
// Note: getArgumentAliasingToReturnedPointer keeps it in sync with
// CaptureTracking, which is needed for correctness. This is because
// some intrinsics like launder.invariant.group returns pointers that
// are aliasing it's argument, which is known to CaptureTracking.
// If AliasAnalysis does not use the same information, it could assume
// that pointer returned from launder does not alias it's argument
// because launder could not return it if the pointer was not captured.
// CaptureTracking can know about special capturing properties of some
// intrinsics like launder.invariant.group, that can't be expressed with
// the attributes, but have properties like returning aliasing pointer.
// Because some analysis may assume that nocaptured pointer is not
// returned from some special intrinsic (because function would have to
// be marked with returns attribute), it is crucial to use this function
// because it should be in sync with CaptureTracking. Not using it may
// cause weird miscompilations where 2 aliasing pointers are assumed to
// noalias.
if (auto *RP = getArgumentAliasingToReturnedPointer(CS)) {
V = RP;
continue;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/CodeGenPrepare.cpp
Expand Up @@ -1702,6 +1702,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
return true;
}
case Intrinsic::launder_invariant_group:
case Intrinsic::strip_invariant_group:
II->replaceAllUsesWith(II->getArgOperand(0));
II->eraseFromParent();
return true;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
Expand Up @@ -1437,6 +1437,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
return true;
}
case Intrinsic::launder_invariant_group:
case Intrinsic::strip_invariant_group:
case Intrinsic::expect: {
unsigned ResultReg = getRegForValue(II->getArgOperand(0));
if (!ResultReg)
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Expand Up @@ -5768,6 +5768,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::annotation:
case Intrinsic::ptr_annotation:
case Intrinsic::launder_invariant_group:
case Intrinsic::strip_invariant_group:
// Drop the intrinsic, but forward the value
setValue(&I, getValue(I.getOperand(0)));
return nullptr;
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/IR/Value.cpp
Expand Up @@ -521,7 +521,8 @@ static const Value *stripPointerCastsAndOffsets(const Value *V) {
// but it can't be marked with returned attribute, that's why it needs
// special case.
if (StripKind == PSK_ZeroIndicesAndAliasesAndInvariantGroups &&
CS.getIntrinsicID() == Intrinsic::launder_invariant_group) {
(CS.getIntrinsicID() == Intrinsic::launder_invariant_group ||
CS.getIntrinsicID() == Intrinsic::strip_invariant_group)) {
V = CS.getArgOperand(0);
continue;
}
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
Expand Up @@ -457,6 +457,7 @@ static bool isCallPromotable(CallInst *CI) {
case Intrinsic::invariant_start:
case Intrinsic::invariant_end:
case Intrinsic::launder_invariant_group:
case Intrinsic::strip_invariant_group:
case Intrinsic::objectsize:
return true;
default:
Expand Down Expand Up @@ -882,6 +883,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
case Intrinsic::invariant_start:
case Intrinsic::invariant_end:
case Intrinsic::launder_invariant_group:
case Intrinsic::strip_invariant_group:
Intr->eraseFromParent();
// FIXME: I think the invariant marker should still theoretically apply,
// but the intrinsics need to be changed to accept pointers with any
Expand Down
21 changes: 19 additions & 2 deletions llvm/test/Analysis/ValueTracking/invariant.group.ll
@@ -1,7 +1,7 @@
; RUN: opt -S -instsimplify -instcombine < %s | FileCheck %s

; CHECK-LABEL: define void @checkNonnull()
define void @checkNonnull() {
; CHECK-LABEL: define void @checkNonnullLaunder()
define void @checkNonnullLaunder() {
; CHECK: %p = call i8* @llvm.launder.invariant.group.p0i8(i8* nonnull %0)
; CHECK: %p2 = call i8* @llvm.launder.invariant.group.p0i8(i8* nonnull %p)
; CHECK: call void @use(i8* nonnull %p2)
Expand All @@ -15,5 +15,22 @@ entry:
ret void
}

; CHECK-LABEL: define void @checkNonnullStrip()
define void @checkNonnullStrip() {
; CHECK: %p = call i8* @llvm.strip.invariant.group.p0i8(i8* nonnull %0)
; CHECK: %p2 = call i8* @llvm.strip.invariant.group.p0i8(i8* nonnull %p)
; CHECK: call void @use(i8* nonnull %p2)
entry:
%0 = alloca i8, align 8

%p = call i8* @llvm.strip.invariant.group.p0i8(i8* %0)
%p2 = call i8* @llvm.strip.invariant.group.p0i8(i8* %p)
call void @use(i8* %p2)

ret void
}

declare i8* @llvm.launder.invariant.group.p0i8(i8*)
declare i8* @llvm.strip.invariant.group.p0i8(i8*)

declare void @use(i8*)
10 changes: 9 additions & 1 deletion llvm/test/CodeGen/Generic/intrinsics.ll
Expand Up @@ -41,11 +41,19 @@ define double @test_cos(float %F) {

declare i8* @llvm.launder.invariant.group(i8*)

define i8* @barrier(i8* %p) {
define i8* @launder(i8* %p) {
%q = call i8* @llvm.launder.invariant.group(i8* %p)
ret i8* %q
}

declare i8* @llvm.strip.invariant.group(i8*)

define i8* @strip(i8* %p) {
%q = call i8* @llvm.strip.invariant.group(i8* %p)
ret i8* %q
}


; sideeffect

declare void @llvm.sideeffect()
Expand Down
Expand Up @@ -77,8 +77,14 @@ define i8 @unoptimizable2() {
define void @dontProveEquality(i8* %a) {
%b = call i8* @llvm.launder.invariant.group.p0i8(i8* %a)
%r = icmp eq i8* %b, %a
;CHECK: call void @useBool(i1 %r)
; CHECK: call void @useBool(i1 %r)
call void @useBool(i1 %r)

%b2 = call i8* @llvm.strip.invariant.group.p0i8(i8* %a)
%r2 = icmp eq i8* %b2, %a
; CHECK: call void @useBool(i1 %r2)
call void @useBool(i1 %r2)

ret void
}

Expand All @@ -90,5 +96,9 @@ declare void @clobber(i8*)
; CHECK-NEXT: declare i8* @llvm.launder.invariant.group.p0i8(i8*)
declare i8* @llvm.launder.invariant.group.p0i8(i8*)

!0 = !{}
; CHECK: Function Attrs: nounwind readnone speculatable{{$}}
; CHECK-NEXT: declare i8* @llvm.strip.invariant.group.p0i8(i8*)
declare i8* @llvm.strip.invariant.group.p0i8(i8*)


!0 = !{}
24 changes: 20 additions & 4 deletions llvm/test/Transforms/CodeGenPrepare/invariant.group.ll
Expand Up @@ -7,8 +7,8 @@ define void @foo() {
enter:
; CHECK-NOT: !invariant.group
; CHECK-NOT: @llvm.launder.invariant.group.p0i8(
; CHECK: %val = load i8, i8* @tmp, !tbaa
%val = load i8, i8* @tmp, !invariant.group !0, !tbaa !{!1, !1, i64 0}
; CHECK: %val = load i8, i8* @tmp{{$}}
%val = load i8, i8* @tmp, !invariant.group !0
%ptr = call i8* @llvm.launder.invariant.group.p0i8(i8* @tmp)

; CHECK: store i8 42, i8* @tmp{{$}}
Expand All @@ -18,7 +18,23 @@ enter:
}
; CHECK-LABEL: }

declare i8* @llvm.launder.invariant.group.p0i8(i8*)
; CHECK-LABEL: define void @foo2() {
define void @foo2() {
enter:
; CHECK-NOT: !invariant.group
; CHECK-NOT: @llvm.strip.invariant.group.p0i8(
; CHECK: %val = load i8, i8* @tmp{{$}}
%val = load i8, i8* @tmp, !invariant.group !0
%ptr = call i8* @llvm.strip.invariant.group.p0i8(i8* @tmp)

; CHECK: store i8 42, i8* @tmp{{$}}
store i8 42, i8* %ptr, !invariant.group !0

ret void
}
; CHECK-LABEL: }


declare i8* @llvm.launder.invariant.group.p0i8(i8*)
declare i8* @llvm.strip.invariant.group.p0i8(i8*)
!0 = !{}
!1 = !{!"x", !0}

0 comments on commit 5b3db45

Please sign in to comment.