diff --git a/llvm/lib/Analysis/GlobalsModRef.cpp b/llvm/lib/Analysis/GlobalsModRef.cpp index 80989c2c667157..c179fd383de2a8 100644 --- a/llvm/lib/Analysis/GlobalsModRef.cpp +++ b/llvm/lib/Analysis/GlobalsModRef.cpp @@ -511,6 +511,18 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { Handles.front().I = Handles.begin(); bool KnowNothing = false; + // Intrinsics, like any other synchronizing function, can make effects + // of other threads visible. Without nosync we know nothing really. + // Similarly, if `nocallback` is missing the function, or intrinsic, + // can call into the module arbitrarily. If both are set the function + // has an effect but will not interact with accesses of internal + // globals inside the module. We are conservative here for optnone + // functions, might not be necessary. + auto MaySyncOrCallIntoModule = [](const Function &F) { + return !F.isDeclaration() || !F.hasNoSync() || + !F.hasFnAttribute(Attribute::NoCallback); + }; + // Collect the mod/ref properties due to called functions. We only compute // one mod-ref set. for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) { @@ -525,7 +537,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { // Can't do better than that! } else if (F->onlyReadsMemory()) { FI.addModRefInfo(ModRefInfo::Ref); - if (!F->isIntrinsic() && !F->onlyAccessesArgMemory()) + if (!F->onlyAccessesArgMemory() && MaySyncOrCallIntoModule(*F)) // This function might call back into the module and read a global - // consider every global as possibly being read by this function. FI.setMayReadAnyGlobal(); @@ -533,7 +545,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { FI.addModRefInfo(ModRefInfo::ModRef); if (!F->onlyAccessesArgMemory()) FI.setMayReadAnyGlobal(); - if (!F->isIntrinsic()) { + if (MaySyncOrCallIntoModule(*F)) { KnowNothing = true; break; } diff --git a/llvm/test/Analysis/GlobalsModRef/functions_without_nosync.ll b/llvm/test/Analysis/GlobalsModRef/functions_without_nosync.ll new file mode 100644 index 00000000000000..7cbaaa13dcb975 --- /dev/null +++ b/llvm/test/Analysis/GlobalsModRef/functions_without_nosync.ll @@ -0,0 +1,38 @@ +; RUN: opt -globals-aa -gvn -S < %s | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa,globals-aa -passes='require,gvn' -S < %s | FileCheck %s +; +; Functions w/o `nosync` attribute may communicate via memory and must be +; treated conservatively. Taken from https://reviews.llvm.org/D115302. + +target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" +target triple = "nvptx64-nvidia-cuda" + +@s = internal local_unnamed_addr addrspace(3) global i32 undef, align 4 + +; CHECK-LABEL: @bar_sync +; CHECK: store +; CHECK: tail call void @llvm.nvvm.bar.sync(i32 0) +; CHECK: load +define dso_local i32 @bar_sync(i32 %0) local_unnamed_addr { + store i32 %0, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4 + tail call void @llvm.nvvm.bar.sync(i32 0) + %2 = load i32, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4 + ret i32 %2 +} + +declare void @llvm.nvvm.bar.sync(i32) #0 + +; CHECK-LABEL: @barrier0 +; CHECK: store +; CHECK: tail call void @llvm.nvvm.barrier0() +; CHECK: load +define dso_local i32 @barrier0(i32 %0) local_unnamed_addr { + store i32 %0, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4 + tail call void @llvm.nvvm.barrier0() + %2 = load i32, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4 + ret i32 %2 +} + +declare void @llvm.nvvm.barrier0() #0 + +attributes #0 = { convergent nounwind } diff --git a/llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken1.ll b/llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken1.ll index 61bf473770c3a4..3e2030f5a3a6f6 100644 --- a/llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken1.ll +++ b/llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken1.ll @@ -1,4 +1,4 @@ -; RUN: opt -aa-pipeline=basic-aa,globals-aa -passes='require,gvn' -S < %s | FileCheck %s +; RUN: opt -globals-aa -gvn -S < %s | FileCheck %s target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -8,7 +8,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-LABEL: @main() define dso_local i32 @main() { entry: - %tmp0 = call i8* @llvm.objc.autoreleasePoolPush() #1 + %tmp0 = call i8* @llvm.stacksave() #1 %tmp6 = load i8, i8* @deallocCalled, align 1 %tobool = icmp ne i8 %tmp6, 0 br i1 %tobool, label %if.else, label %if.end @@ -18,10 +18,10 @@ if.else: ; preds = %entry unreachable ; CHECK-LABEL: if.end: -; CHECK-NEXT: call void @llvm.objc.autoreleasePoolPop +; CHECK-NEXT: call void @llvm.stackrestore ; CHECK-NOT: load i8, i8* @deallocCalled if.end: ; preds = %entry - call void @llvm.objc.autoreleasePoolPop(i8* %tmp0) + call void @llvm.stackrestore(i8* %tmp0) %tmp7 = load i8, i8* @deallocCalled, align 1 %tobool3 = icmp ne i8 %tmp7, 0 br i1 %tobool3, label %if.end6, label %if.else5 @@ -35,10 +35,10 @@ if.end6: ; preds = %if.end ret i32 0 } -declare i8* @llvm.objc.autoreleasePoolPush() #1 -declare void @llvm.objc.autoreleasePoolPop(i8*) #1 +declare i8* @llvm.stacksave() #1 +declare void @llvm.stackrestore(i8*) #1 declare dso_local void @__assert_fail() #0 -attributes #0 = { noreturn nounwind } -attributes #1 = { nounwind } +attributes #0 = { noreturn nosync nounwind } +attributes #1 = { nosync nounwind } diff --git a/llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken2.ll b/llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken2.ll index f9b8d3b954104a..60239a990f2da5 100644 --- a/llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken2.ll +++ b/llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken2.ll @@ -1,4 +1,4 @@ -; RUN: opt -aa-pipeline=basic-aa,globals-aa -passes='require,gvn' -S < %s | FileCheck %s +; RUN: opt -globals-aa -gvn -S < %s | FileCheck %s target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -14,7 +14,7 @@ entry: ; CHECK-LABEL: @main() define dso_local i32 @main() { entry: - %tmp0 = call i8* @llvm.objc.autoreleasePoolPush() #1 + %tmp0 = call i8* @llvm.stacksave() #1 %tmp6 = load i8, i8* @deallocCalled, align 1 %tobool = icmp ne i8 %tmp6, 0 br i1 %tobool, label %if.else, label %if.end @@ -24,10 +24,10 @@ if.else: ; preds = %entry unreachable ; CHECK-LABEL: if.end: -; CHECK-NEXT: call void @llvm.objc.autoreleasePoolPop +; CHECK-NEXT: call void @llvm.stackrestore ; CHECK-NOT: load i8, i8* @deallocCalled if.end: ; preds = %entry - call void @llvm.objc.autoreleasePoolPop(i8* %tmp0) + call void @llvm.stackrestore(i8* %tmp0) %tmp7 = load i8, i8* @deallocCalled, align 1 %tobool3 = icmp ne i8 %tmp7, 0 br i1 %tobool3, label %if.end6, label %if.else5 @@ -41,10 +41,10 @@ if.end6: ; preds = %if.end ret i32 0 } -declare i8* @llvm.objc.autoreleasePoolPush() #1 -declare void @llvm.objc.autoreleasePoolPop(i8*) #1 +declare i8* @llvm.stacksave() #1 +declare void @llvm.stackrestore(i8*) #1 declare dso_local void @__assert_fail() #0 -attributes #0 = { noreturn nounwind } -attributes #1 = { nounwind } +attributes #0 = { noreturn nosync nounwind } +attributes #1 = { nosync nounwind } diff --git a/llvm/test/Analysis/GlobalsModRef/intrinsic_addresstaken.ll b/llvm/test/Analysis/GlobalsModRef/intrinsic_addresstaken.ll index 6135332546cd13..f3fb3989442142 100644 --- a/llvm/test/Analysis/GlobalsModRef/intrinsic_addresstaken.ll +++ b/llvm/test/Analysis/GlobalsModRef/intrinsic_addresstaken.ll @@ -18,7 +18,7 @@ entry: ; CHECK-LABEL: @main() define dso_local i32 @main() { entry: - %tmp0 = call i8* @llvm.objc.autoreleasePoolPush() #1 + %tmp0 = call i8* @llvm.stacksave() #1 %tmp6 = load i8, i8* @deallocCalled, align 1 %tobool = icmp ne i8 %tmp6, 0 br i1 %tobool, label %if.else, label %if.end @@ -28,9 +28,9 @@ if.else: ; preds = %entry unreachable ; CHECK-LABEL: if.end: -; CHECK-NEXT: call void @llvm.objc.autoreleasePoolPop +; CHECK-NEXT: call void @llvm.stackrestore if.end: ; preds = %entry - call void @llvm.objc.autoreleasePoolPop(i8* %tmp0) + call void @llvm.stackrestore(i8* %tmp0) %tmp7 = load i8, i8* @deallocCalled, align 1 %tobool3 = icmp ne i8 %tmp7, 0 br i1 %tobool3, label %if.end6, label %if.else5 @@ -44,8 +44,8 @@ if.end6: ; preds = %if.end ret i32 0 } -declare i8* @llvm.objc.autoreleasePoolPush() #1 -declare void @llvm.objc.autoreleasePoolPop(i8*) #1 +declare i8* @llvm.stacksave() #1 +declare void @llvm.stackrestore(i8*) #1 declare dso_local void @__assert_fail() #0 attributes #0 = { noreturn nounwind } diff --git a/llvm/test/Analysis/GlobalsModRef/nosync_nocallback.ll b/llvm/test/Analysis/GlobalsModRef/nosync_nocallback.ll new file mode 100644 index 00000000000000..e0a23d7405734b --- /dev/null +++ b/llvm/test/Analysis/GlobalsModRef/nosync_nocallback.ll @@ -0,0 +1,133 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes +; RUN: opt -aa-pipeline=basic-aa,globals-aa -passes='require,gvn' -S < %s | FileCheck %s + +; Make sure we do not hoist the load before the intrinsic, unknown function, or +; optnone function except if we know the unknown function is nosync and nocallback. + +@G1 = internal global i32 undef +@G2 = internal global i32 undef +@G3 = internal global i32 undef +@G4 = internal global i32 undef + +define void @test_barrier(i1 %c) { +; CHECK-LABEL: define {{[^@]+}}@test_barrier +; CHECK-SAME: (i1 [[C:%.*]]) { +; CHECK-NEXT: br i1 [[C]], label [[INIT:%.*]], label [[CHECK:%.*]] +; CHECK: init: +; CHECK-NEXT: store i32 0, ptr @G1, align 4 +; CHECK-NEXT: br label [[CHECK]] +; CHECK: check: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[V:%.*]] = load i32, ptr @G1, align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret void +; + br i1 %c, label %init, label %check +init: + store i32 0, ptr @G1 + br label %check +check: + call void @llvm.amdgcn.s.barrier() + %v = load i32, ptr @G1 + %cmp = icmp eq i32 %v, 0 + call void @llvm.assume(i1 %cmp) + ret void +} + +define void @test_unknown(i1 %c) { +; CHECK-LABEL: define {{[^@]+}}@test_unknown +; CHECK-SAME: (i1 [[C:%.*]]) { +; CHECK-NEXT: br i1 [[C]], label [[INIT:%.*]], label [[CHECK:%.*]] +; CHECK: init: +; CHECK-NEXT: store i32 0, ptr @G2, align 4 +; CHECK-NEXT: br label [[CHECK]] +; CHECK: check: +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[V:%.*]] = load i32, ptr @G2, align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret void +; + br i1 %c, label %init, label %check +init: + store i32 0, ptr @G2 + br label %check +check: + call void @unknown() + %v = load i32, ptr @G2 + %cmp = icmp eq i32 %v, 0 + call void @llvm.assume(i1 %cmp) + ret void +} + +define void @test_optnone(i1 %c) { +; CHECK-LABEL: define {{[^@]+}}@test_optnone +; CHECK-SAME: (i1 [[C:%.*]]) { +; CHECK-NEXT: br i1 [[C]], label [[INIT:%.*]], label [[CHECK:%.*]] +; CHECK: init: +; CHECK-NEXT: store i32 0, ptr @G3, align 4 +; CHECK-NEXT: br label [[CHECK]] +; CHECK: check: +; CHECK-NEXT: call void @optnone() +; CHECK-NEXT: [[V:%.*]] = load i32, ptr @G3, align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret void +; + br i1 %c, label %init, label %check +init: + store i32 0, ptr @G3 + br label %check +check: + call void @optnone() + %v = load i32, ptr @G3 + %cmp = icmp eq i32 %v, 0 + call void @llvm.assume(i1 %cmp) + ret void +} + +define void @optnone() optnone nosync nocallback noinline { +; CHECK: Function Attrs: nocallback noinline nosync optnone +; CHECK-LABEL: define {{[^@]+}}@optnone +; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: ret void +; + ret void +} + +; Here hoisting is legal and we use it to verify it will happen. +define void @test_unknown_annotated(i1 %c) { +; CHECK-LABEL: define {{[^@]+}}@test_unknown_annotated +; CHECK-SAME: (i1 [[C:%.*]]) { +; CHECK-NEXT: br i1 [[C]], label [[INIT:%.*]], label [[DOTCHECK_CRIT_EDGE:%.*]] +; CHECK: .check_crit_edge: +; CHECK-NEXT: [[V_PRE:%.*]] = load i32, ptr @G4, align 4 +; CHECK-NEXT: br label [[CHECK:%.*]] +; CHECK: init: +; CHECK-NEXT: store i32 0, ptr @G4, align 4 +; CHECK-NEXT: br label [[CHECK]] +; CHECK: check: +; CHECK-NEXT: [[V:%.*]] = phi i32 [ [[V_PRE]], [[DOTCHECK_CRIT_EDGE]] ], [ 0, [[INIT]] ] +; CHECK-NEXT: call void @unknown_nosync_nocallback() +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret void +; + br i1 %c, label %init, label %check +init: + store i32 0, ptr @G4 + br label %check +check: + call void @unknown_nosync_nocallback() + %v = load i32, ptr @G4 + %cmp = icmp eq i32 %v, 0 + call void @llvm.assume(i1 %cmp) + ret void +} + +declare void @unknown() +declare void @unknown_nosync_nocallback() nosync nocallback +declare void @llvm.amdgcn.s.barrier() +declare void @llvm.assume(i1 noundef) +