-
Notifications
You must be signed in to change notification settings - Fork 11k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
GlobalOpt: Handle threadlocal.address intrinsic #88454
Conversation
3cb0601
to
ce16ee0
Compare
@llvm/pr-subscribers-llvm-transforms Author: Matthias Braun (MatzeB) ChangesThis changes This fixes issue #73314 and improves issue #87437 Full diff: https://github.com/llvm/llvm-project/pull/88454.diff 7 Files Affected:
diff --git a/llvm/include/llvm/Transforms/Utils/GlobalStatus.h b/llvm/include/llvm/Transforms/Utils/GlobalStatus.h
index 60c91fc30174de..c001e587313c56 100644
--- a/llvm/include/llvm/Transforms/Utils/GlobalStatus.h
+++ b/llvm/include/llvm/Transforms/Utils/GlobalStatus.h
@@ -24,9 +24,9 @@ class Value;
///
bool isSafeToDestroyConstant(const Constant *C);
-/// As we analyze each global, keep track of some information about it. If we
-/// find out that the address of the global is taken, none of this info will be
-/// accurate.
+/// As we analyze each global or thread-local variable, keep track of some
+/// information about it. If we find out that the address of the global is
+/// taken, none of this info will be accurate.
struct GlobalStatus {
/// True if the global's address is used in a comparison.
bool IsCompared = false;
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index da714c9a75701b..fbb83e787f6327 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -306,6 +306,10 @@ static bool CleanupConstantGlobalUsers(GlobalVariable *GV,
APInt Offset(DL.getIndexTypeSizeInBits(PtrOp->getType()), 0);
PtrOp = PtrOp->stripAndAccumulateConstantOffsets(
DL, Offset, /* AllowNonInbounds */ true);
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(PtrOp)) {
+ if (II->getIntrinsicID() == Intrinsic::threadlocal_address)
+ PtrOp = II->getArgOperand(0);
+ }
if (PtrOp == GV) {
if (auto *Value = ConstantFoldLoadFromConst(Init, Ty, Offset, DL)) {
LI->replaceAllUsesWith(Value);
@@ -318,6 +322,9 @@ static bool CleanupConstantGlobalUsers(GlobalVariable *GV,
} else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv
if (getUnderlyingObject(MI->getRawDest()) == GV)
EraseFromParent(MI);
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
+ if (II->getIntrinsicID() == Intrinsic::threadlocal_address)
+ append_range(WorkList, II->users());
}
}
diff --git a/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/llvm/lib/Transforms/Utils/GlobalStatus.cpp
index c5aded3c45f4c7..ba6dba8db954b7 100644
--- a/llvm/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/llvm/lib/Transforms/Utils/GlobalStatus.cpp
@@ -172,9 +172,20 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
return true;
GS.StoredType = GlobalStatus::Stored;
} else if (const auto *CB = dyn_cast<CallBase>(I)) {
- if (!CB->isCallee(&U))
- return true;
- GS.IsLoaded = true;
+ bool KnownIntrinsic = false;
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ if (II->getIntrinsicID() == Intrinsic::threadlocal_address &&
+ II->getArgOperand(0) == V) {
+ if (analyzeGlobalAux(I, GS, VisitedUsers))
+ return true;
+ KnownIntrinsic = true;
+ }
+ }
+ if (!KnownIntrinsic) {
+ if (!CB->isCallee(&U))
+ return true;
+ GS.IsLoaded = true;
+ }
} else {
return true; // Any other non-load instruction might take address!
}
diff --git a/llvm/test/Transforms/GlobalOpt/basictest.ll b/llvm/test/Transforms/GlobalOpt/basictest.ll
index 6d7fcdd96dfda7..72d38a1e88457e 100644
--- a/llvm/test/Transforms/GlobalOpt/basictest.ll
+++ b/llvm/test/Transforms/GlobalOpt/basictest.ll
@@ -1,9 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt < %s -passes=globalopt -S | FileCheck %s
-; CHECK-NOT: global
@X = internal global i32 4 ; <ptr> [#uses=1]
define i32 @foo() {
- %V = load i32, ptr @X ; <i32> [#uses=1]
- ret i32 %V
+; CHECK-LABEL: define i32 @foo() local_unnamed_addr {
+; CHECK-NEXT: ret i32 4
+;
+ %V = load i32, ptr @X ; <i32> [#uses=1]
+ ret i32 %V
+}
+
+@X_tls = internal thread_local global i32 13
+
+define i32 @bar() {
+; CHECK-LABEL: define i32 @bar() local_unnamed_addr {
+; CHECK-NEXT: ret i32 13
+;
+ %p = call ptr @llvm.threadlocal.address(ptr @X_tls)
+ %v = load i32, ptr %p
+ ret i32 %v
}
diff --git a/llvm/test/Transforms/GlobalOpt/constantfold-initializers.ll b/llvm/test/Transforms/GlobalOpt/constantfold-initializers.ll
index ca844f63937ca3..f82942e73d92f7 100644
--- a/llvm/test/Transforms/GlobalOpt/constantfold-initializers.ll
+++ b/llvm/test/Transforms/GlobalOpt/constantfold-initializers.ll
@@ -72,11 +72,12 @@ entry:
}
@threadlocalptr = global ptr null, align 4
-; CHECK: @threadlocalptr = global ptr null, align 4
+; CHECK: @threadlocalptr = local_unnamed_addr global ptr null, align 4
@threadlocalvar = external thread_local global i32
define internal void @test5() {
entry:
- store ptr @threadlocalvar, ptr @threadlocalptr, align 4
+ %p = call ptr @llvm.threadlocal.address(ptr @threadlocalvar)
+ store ptr %p, ptr @threadlocalptr, align 4
ret void
}
diff --git a/llvm/test/Transforms/GlobalOpt/stored-once-forward-value.ll b/llvm/test/Transforms/GlobalOpt/stored-once-forward-value.ll
index 7b845070bbd0b3..2b7ceb4169f350 100644
--- a/llvm/test/Transforms/GlobalOpt/stored-once-forward-value.ll
+++ b/llvm/test/Transforms/GlobalOpt/stored-once-forward-value.ll
@@ -39,12 +39,14 @@ define i32 @dom_arg(i32 %a) {
define ptr @dom_thread_local_global() {
; CHECK-LABEL: @dom_thread_local_global(
-; CHECK-NEXT: store ptr @tl, ptr @g3, align 8
+; CHECK-NEXT: [[P:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @tl)
+; CHECK-NEXT: store ptr [[P]], ptr @g3, align 8
; CHECK-NEXT: call void @b()
; CHECK-NEXT: [[R:%.*]] = load ptr, ptr @g3, align 8
; CHECK-NEXT: ret ptr [[R]]
;
- store ptr @tl, ptr @g3
+ %p = call ptr @llvm.threadlocal.address(ptr @tl)
+ store ptr %p, ptr @g3
call void @b()
%r = load ptr, ptr @g3
ret ptr %r
diff --git a/llvm/test/Transforms/GlobalOpt/tls.ll b/llvm/test/Transforms/GlobalOpt/tls.ll
index 6ba003ff30b2ea..2cc2ea4e366e34 100644
--- a/llvm/test/Transforms/GlobalOpt/tls.ll
+++ b/llvm/test/Transforms/GlobalOpt/tls.ll
@@ -15,14 +15,16 @@ declare void @start_thread(ptr)
define i32 @f() {
entry:
; Set @ip to point to x[1] for thread 1.
- store ptr getelementptr inbounds ([100 x i32], ptr @x, i64 0, i64 1), ptr @ip, align 8
+ %p = call ptr @llvm.threadlocal.address(ptr @x)
+ %addr = getelementptr inbounds [100 x i32], ptr %p, i64 0, i64 1
+ store ptr %addr, ptr @ip, align 8
; Run g on a new thread.
tail call void @start_thread(ptr @g) nounwind
tail call void @wait() nounwind
; Reset x[1] for thread 1.
- store i32 0, ptr getelementptr inbounds ([100 x i32], ptr @x, i64 0, i64 1), align 4
+ store i32 0, ptr %addr, align 4
; Read the value of @ip, which now points at x[1] for thread 2.
%0 = load ptr, ptr @ip, align 8
@@ -39,10 +41,12 @@ entry:
define internal void @g() nounwind uwtable {
entry:
; Set @ip to point to x[1] for thread 2.
- store ptr getelementptr inbounds ([100 x i32], ptr @x, i64 0, i64 1), ptr @ip, align 8
+ %p = call ptr @llvm.threadlocal.address(ptr @x)
+ %addr = getelementptr inbounds [100 x i32], ptr %p, i64 0, i64 1
+ store ptr %addr, ptr @ip, align 8
; Store 50 in x[1] for thread 2.
- store i32 50, ptr getelementptr inbounds ([100 x i32], ptr @x, i64 0, i64 1), align 4
+ store i32 50, ptr %addr, align 4
tail call void @signal() nounwind
ret void
|
For the record: I believe there is no additional problems with coroutines (functions changing thread-id) with this optimization given |
ping. One more patch in my series about about |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
(Ignoring unrelated "TextDiagnosticPrinter.h(23): fatal error C1060: compiler is out of heap space" errors from windows build) |
This changes
GlobalOpt
to skip/look-throughthreadlocal.address
intrinsic where apropriate.This fixes issue #73314