From 4f9b8397725c2f57fa671e70f23ac48e7d47fe36 Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Wed, 16 Feb 2022 16:11:29 -0800 Subject: [PATCH] [WebAssembly] Make EH/SjLj vars unconditionally thread local This makes three thread local variables (`__THREW__`, `__threwValue`, and `__wasm_lpad_context`) unconditionally thread local. If the target doesn't support TLS, they will be downgraded to normal variables in `stripThreadLocals`. This makes the object not linkable with other objects using shared memory, which is what we intend here; these variables should be thread local when used with shared memory. This is what we initially tried in D88262. But D88323 changed this: It only created these variables when threads were supported, because `__THREW__` and `__threwValue` were always generated even if Emscripten EH/SjLj was not used, making all objects built without threads not linkable with shared memory, which was too restrictive. But sometimes this is not safe. If we build an object using variables such as `__THREW__` without threads, it can be linked to other objects using shared memory, because the original object's `__THREW__` was not created thread local to begin with. So this CL basically reverts D88323 with some additional improvements: - This checks each of the functions and global variables created within `LowerEmscriptenEHSjLj` pass and removes it if it's not used at the end of the pass. So only modules using those variables will be affected. - Moves `CoalesceFeaturesAndStripAtomics` and `AtomicExpand` passes after all other IR pasess that can create thread local variables. It is not sufficient to move them to the end of `addIRPasses`, because `__wasm_lpad_context` is created in `WasmEHPrepare`, which runs inside `addPassesToHandleExceptions`, which runs before `addISelPrepare`. So we override `addISelPrepare` and move atomic/TLS stripping and expanding passes there. This also removes merges `TLS` and `NO-TLS` FileCheck lines into one `CHECK` line, because in the bitcode level we always create them as thread local. Also some function declarations are deleted `CHECK` lines because they are unused. Reviewed By: tlively, sbc100 Differential Revision: https://reviews.llvm.org/D120013 --- llvm/lib/CodeGen/WasmEHPrepare.cpp | 16 ++----- .../WebAssemblyLowerEmscriptenEHSjLj.cpp | 47 ++++++++----------- .../WebAssembly/WebAssemblyTargetMachine.cpp | 17 ++++--- .../WebAssembly/lower-em-exceptions.ll | 14 +++--- .../test/CodeGen/WebAssembly/lower-em-sjlj.ll | 12 ++--- .../CodeGen/WebAssembly/lower-wasm-sjlj.ll | 15 +++--- .../test/CodeGen/WebAssembly/wasmehprepare.ll | 7 ++- 7 files changed, 56 insertions(+), 72 deletions(-) diff --git a/llvm/lib/CodeGen/WasmEHPrepare.cpp b/llvm/lib/CodeGen/WasmEHPrepare.cpp index 6b7df758e4579..e196931248466 100644 --- a/llvm/lib/CodeGen/WasmEHPrepare.cpp +++ b/llvm/lib/CodeGen/WasmEHPrepare.cpp @@ -213,19 +213,13 @@ bool WasmEHPrepare::prepareEHPads(Function &F) { assert(F.hasPersonalityFn() && "Personality function not found"); // __wasm_lpad_context global variable. - // If the target supports TLS, make this thread-local. We can't just - // unconditionally make it thread-local and depend on - // CoalesceFeaturesAndStripAtomics to downgrade it, because stripping TLS has - // the side effect of disallowing the object from being linked into a - // shared-memory module, which we don't want to be responsible for. + // This variable should be thread local. If the target does not support TLS, + // we depend on CoalesceFeaturesAndStripAtomics to downgrade it to + // non-thread-local ones, in which case we don't allow this object to be + // linked with other objects using shared memory. LPadContextGV = cast( M.getOrInsertGlobal("__wasm_lpad_context", LPadContextTy)); - Attribute FSAttr = F.getFnAttribute("target-features"); - if (FSAttr.isValid()) { - StringRef FS = FSAttr.getValueAsString(); - if (FS.contains("+atomics") && FS.contains("+bulk-memory")) - LPadContextGV->setThreadLocalMode(GlobalValue::GeneralDynamicTLSModel); - } + LPadContextGV->setThreadLocalMode(GlobalValue::GeneralDynamicTLSModel); LPadIndexField = IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 0, "lpad_index_gep"); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp index 38415507b23ca..c165542019532 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp @@ -406,8 +406,9 @@ static bool canThrow(const Value *V) { return true; } -// Get a global variable with the given name. If it doesn't exist declare it, -// which will generate an import and assume that it will exist at link time. +// Get a thread-local global variable with the given name. If it doesn't exist +// declare it, which will generate an import and assume that it will exist at +// link time. static GlobalVariable *getGlobalVariable(Module &M, Type *Ty, WebAssemblyTargetMachine &TM, const char *Name) { @@ -415,16 +416,11 @@ static GlobalVariable *getGlobalVariable(Module &M, Type *Ty, if (!GV) report_fatal_error(Twine("unable to create global: ") + Name); - // If the target supports TLS, make this variable thread-local. We can't just - // unconditionally make it thread-local and depend on - // CoalesceFeaturesAndStripAtomics to downgrade it, because stripping TLS has - // the side effect of disallowing the object from being linked into a - // shared-memory module, which we don't want to be responsible for. - auto *Subtarget = TM.getSubtargetImpl(); - auto TLS = Subtarget->hasAtomics() && Subtarget->hasBulkMemory() - ? GlobalValue::GeneralDynamicTLSModel - : GlobalValue::NotThreadLocal; - GV->setThreadLocalMode(TLS); + // Variables created by this function are thread local. If the target does not + // support TLS, we depend on CoalesceFeaturesAndStripAtomics to downgrade it + // to non-thread-local ones, in which case we don't allow this object to be + // linked with other objects using shared memory. + GV->setThreadLocalMode(GlobalValue::GeneralDynamicTLSModel); return GV; } @@ -1064,22 +1060,16 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) { nullifySetjmp(F); } - if (!Changed) { - // Delete unused global variables and functions - if (ResumeF) - ResumeF->eraseFromParent(); - if (EHTypeIDF) - EHTypeIDF->eraseFromParent(); - if (EmLongjmpF) - EmLongjmpF->eraseFromParent(); - if (SaveSetjmpF) - SaveSetjmpF->eraseFromParent(); - if (TestSetjmpF) - TestSetjmpF->eraseFromParent(); - return false; - } + // Delete unused global variables and functions + for (auto *V : {ThrewGV, ThrewValueGV}) + if (V && V->use_empty()) + V->eraseFromParent(); + for (auto *V : {GetTempRet0F, SetTempRet0F, ResumeF, EHTypeIDF, EmLongjmpF, + SaveSetjmpF, TestSetjmpF, WasmLongjmpF, CatchF}) + if (V && V->use_empty()) + V->eraseFromParent(); - return true; + return Changed; } bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) { @@ -1829,7 +1819,8 @@ void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForWasmSjLj( if (auto *CPI = dyn_cast(FromPad)) { UnwindDest = CPI->getCatchSwitch()->getUnwindDest(); break; - } else if (auto *CPI = dyn_cast(FromPad)) { + } + if (auto *CPI = dyn_cast(FromPad)) { // getCleanupRetUnwindDest() can return nullptr when // 1. This cleanuppad's matching cleanupret uwninds to caller // 2. There is no matching cleanupret because it ends with diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index 482837178f3dd..950df71e9efc9 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -320,6 +320,7 @@ class WebAssemblyPassConfig final : public TargetPassConfig { FunctionPass *createTargetRegisterAllocator(bool) override; void addIRPasses() override; + void addISelPrepare() override; bool addInstSelector() override; void addPostRegAlloc() override; bool addGCPasses() override { return false; } @@ -407,12 +408,6 @@ static void basicCheckForEHAndSjLj(TargetMachine *TM) { //===----------------------------------------------------------------------===// void WebAssemblyPassConfig::addIRPasses() { - // Lower atomics and TLS if necessary - addPass(new CoalesceFeaturesAndStripAtomics(&getWebAssemblyTargetMachine())); - - // This is a no-op if atomics are not used in the module - addPass(createAtomicExpandPass()); - // Add signatures to prototype-less function declarations addPass(createWebAssemblyAddMissingPrototypes()); @@ -455,6 +450,16 @@ void WebAssemblyPassConfig::addIRPasses() { TargetPassConfig::addIRPasses(); } +void WebAssemblyPassConfig::addISelPrepare() { + // Lower atomics and TLS if necessary + addPass(new CoalesceFeaturesAndStripAtomics(&getWebAssemblyTargetMachine())); + + // This is a no-op if atomics are not used in the module + addPass(createAtomicExpandPass()); + + TargetPassConfig::addISelPrepare(); +} + bool WebAssemblyPassConfig::addInstSelector() { (void)TargetPassConfig::addInstSelector(); addPass( diff --git a/llvm/test/CodeGen/WebAssembly/lower-em-exceptions.ll b/llvm/test/CodeGen/WebAssembly/lower-em-exceptions.ll index 95d7c9f2fb535..b72984b7b59f6 100644 --- a/llvm/test/CodeGen/WebAssembly/lower-em-exceptions.ll +++ b/llvm/test/CodeGen/WebAssembly/lower-em-exceptions.ll @@ -1,16 +1,15 @@ -; RUN: opt < %s -wasm-lower-em-ehsjlj -enable-emscripten-cxx-exceptions -S | FileCheck %s --check-prefixes=CHECK,NO-TLS -DPTR=i32 -; RUN: opt < %s -wasm-lower-em-ehsjlj -enable-emscripten-cxx-exceptions -S --mattr=+atomics,+bulk-memory | FileCheck %s --check-prefixes=CHECK,TLS -DPTR=i32 -; RUN: opt < %s -wasm-lower-em-ehsjlj -enable-emscripten-cxx-exceptions --mtriple=wasm64-unknown-unknown -data-layout="e-m:e-p:64:64-i64:64-n32:64-S128" -S | FileCheck %s --check-prefixes=CHECK -DPTR=i64 +; RUN: opt < %s -wasm-lower-em-ehsjlj -enable-emscripten-cxx-exceptions -S | FileCheck %s -DPTR=i32 +; RUN: opt < %s -wasm-lower-em-ehsjlj -enable-emscripten-cxx-exceptions -S --mattr=+atomics,+bulk-memory | FileCheck %s -DPTR=i32 +; RUN: opt < %s -wasm-lower-em-ehsjlj -enable-emscripten-cxx-exceptions --mtriple=wasm64-unknown-unknown -data-layout="e-m:e-p:64:64-i64:64-n32:64-S128" -S | FileCheck %s -DPTR=i64 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" @_ZTIi = external constant i8* @_ZTIc = external constant i8* -; NO-TLS-DAG: __THREW__ = external global [[PTR]] -; NO-TLS-DAG: __threwValue = external global i32 -; TLS-DAG: __THREW__ = external thread_local global [[PTR]] -; TLS-DAG: __threwValue = external thread_local global i32 +; CHECK: @__THREW__ = external thread_local global [[PTR]] +; __threwValue is only used in Emscripten SjLj, so it shouldn't be generated. +; CHECK-NOT: @__threwValue = ; Test invoke instruction with clauses (try-catch block) define void @clause() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { @@ -172,7 +171,6 @@ declare void @__cxa_call_unexpected(i8*) ; JS glue functions and invoke wrappers declaration ; CHECK-DAG: declare i32 @getTempRet0() -; CHECK-DAG: declare void @setTempRet0(i32) ; CHECK-DAG: declare void @__resumeException(i8*) ; CHECK-DAG: declare void @__invoke_void_i32(void (i32)*, i32) ; CHECK-DAG: declare i8* @__cxa_find_matching_catch_4(i8*, i8*) diff --git a/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll index 19253d1cf1175..3873ce9b5dc28 100644 --- a/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll +++ b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll @@ -1,6 +1,6 @@ -; RUN: opt < %s -wasm-lower-em-ehsjlj -enable-emscripten-sjlj -S | FileCheck %s --check-prefixes=CHECK,NO-TLS -DPTR=i32 -; RUN: opt < %s -wasm-lower-em-ehsjlj -enable-emscripten-sjlj -S --mattr=+atomics,+bulk-memory | FileCheck %s --check-prefixes=CHECK,TLS -DPTR=i32 -; RUN: opt < %s -wasm-lower-em-ehsjlj -enable-emscripten-sjlj --mtriple=wasm64-unknown-unknown -data-layout="e-m:e-p:64:64-i64:64-n32:64-S128" -S | FileCheck %s --check-prefixes=CHECK -DPTR=i64 +; RUN: opt < %s -wasm-lower-em-ehsjlj -enable-emscripten-sjlj -S | FileCheck %s -DPTR=i32 +; RUN: opt < %s -wasm-lower-em-ehsjlj -enable-emscripten-sjlj -S --mattr=+atomics,+bulk-memory | FileCheck %s -DPTR=i32 +; RUN: opt < %s -wasm-lower-em-ehsjlj -enable-emscripten-sjlj --mtriple=wasm64-unknown-unknown -data-layout="e-m:e-p:64:64-i64:64-n32:64-S128" -S | FileCheck %s -DPTR=i64 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" @@ -8,11 +8,9 @@ target triple = "wasm32-unknown-unknown" %struct.__jmp_buf_tag = type { [6 x i32], i32, [32 x i32] } @global_var = global i32 0, align 4 -; NO-TLS-DAG: __THREW__ = external global [[PTR]] -; NO-TLS-DAG: __threwValue = external global [[PTR]] -; TLS-DAG: __THREW__ = external thread_local global [[PTR]] -; TLS-DAG: __threwValue = external thread_local global [[PTR]] @global_longjmp_ptr = global void (%struct.__jmp_buf_tag*, i32)* @longjmp, align 4 +; CHECK-DAG: @__THREW__ = external thread_local global [[PTR]] +; CHECK-DAG: @__threwValue = external thread_local global i32 ; CHECK-DAG: @global_longjmp_ptr = global void (%struct.__jmp_buf_tag*, i32)* bitcast (void ([[PTR]], i32)* @emscripten_longjmp to void (%struct.__jmp_buf_tag*, i32)*) ; Test a simple setjmp - longjmp sequence diff --git a/llvm/test/CodeGen/WebAssembly/lower-wasm-sjlj.ll b/llvm/test/CodeGen/WebAssembly/lower-wasm-sjlj.ll index ebe22c7e6cb18..3c18a3ceaf3b6 100644 --- a/llvm/test/CodeGen/WebAssembly/lower-wasm-sjlj.ll +++ b/llvm/test/CodeGen/WebAssembly/lower-wasm-sjlj.ll @@ -1,16 +1,16 @@ -; RUN: opt < %s -wasm-lower-em-ehsjlj -wasm-enable-sjlj -S | FileCheck %s --check-prefixes=CHECK,NO-TLS -DPTR=i32 -; RUN: opt < %s -wasm-lower-em-ehsjlj -wasm-enable-sjlj -S --mattr=+atomics,+bulk-memory | FileCheck %s --check-prefixes=CHECK,TLS -DPTR=i32 -; RUN: opt < %s -wasm-lower-em-ehsjlj -wasm-enable-sjlj --mtriple=wasm64-unknown-unknown -data-layout="e-m:e-p:64:64-i64:64-n32:64-S128" -S | FileCheck %s --check-prefixes CHECK -DPTR=i64 +; RUN: opt < %s -wasm-lower-em-ehsjlj -wasm-enable-sjlj -S | FileCheck %s -DPTR=i32 +; RUN: opt < %s -wasm-lower-em-ehsjlj -wasm-enable-sjlj -S --mattr=+atomics,+bulk-memory | FileCheck %s -DPTR=i32 +; RUN: opt < %s -wasm-lower-em-ehsjlj -wasm-enable-sjlj --mtriple=wasm64-unknown-unknown -data-layout="e-m:e-p:64:64-i64:64-n32:64-S128" -S | FileCheck %s -DPTR=i64 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" %struct.__jmp_buf_tag = type { [6 x i32], i32, [32 x i32] } -; NO-TLS-DAG: __THREW__ = external global [[PTR]] -; NO-TLS-DAG: __threwValue = external global [[PTR]] -; TLS-DAG: __THREW__ = external thread_local global [[PTR]] -; TLS-DAG: __threwValue = external thread_local global [[PTR]] +; These variables are only used in Emscripten EH/SjLj, so they shouldn't be +; generated. +; CHECK-NOT: @__THREW__ = +; CHECK-NOT: @__threwValue = @global_longjmp_ptr = global void (%struct.__jmp_buf_tag*, i32)* @longjmp, align 4 ; CHECK-DAG: @global_longjmp_ptr = global void (%struct.__jmp_buf_tag*, i32)* bitcast (void (i8*, i32)* @__wasm_longjmp to void (%struct.__jmp_buf_tag*, i32)*) @@ -157,7 +157,6 @@ declare void @free(i8*) ; JS glue function declarations ; CHECK-DAG: declare i32 @getTempRet0() -; CHECK-DAG: declare void @setTempRet0(i32) ; CHECK-DAG: declare i32* @saveSetjmp(%struct.__jmp_buf_tag*, i32, i32*, i32) ; CHECK-DAG: declare i32 @testSetjmp([[PTR]], i32*, i32) ; CHECK-DAG: declare void @__wasm_longjmp(i8*, i32) diff --git a/llvm/test/CodeGen/WebAssembly/wasmehprepare.ll b/llvm/test/CodeGen/WebAssembly/wasmehprepare.ll index 081a9776fa9aa..3fff510e3cb45 100644 --- a/llvm/test/CodeGen/WebAssembly/wasmehprepare.ll +++ b/llvm/test/CodeGen/WebAssembly/wasmehprepare.ll @@ -1,11 +1,10 @@ -; RUN: opt < %s -winehprepare -demote-catchswitch-only -wasmehprepare -S | FileCheck %s --check-prefixes=CHECK,NO-TLS -; RUN: opt < %s -winehprepare -demote-catchswitch-only -wasmehprepare -S --mattr=+atomics,+bulk-memory | FileCheck %s --check-prefixes=CHECK,TLS +; RUN: opt < %s -winehprepare -demote-catchswitch-only -wasmehprepare -S | FileCheck %s +; RUN: opt < %s -winehprepare -demote-catchswitch-only -wasmehprepare -S --mattr=+atomics,+bulk-memory | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" -; NO-TLS: @__wasm_lpad_context = external global { i32, i8*, i32 } -; TLS: @__wasm_lpad_context = external thread_local global { i32, i8*, i32 } +; CHECK: @__wasm_lpad_context = external thread_local global { i32, i8*, i32 } @_ZTIi = external constant i8* %struct.Temp = type { i8 }