-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[WebAssembly] Implement an alternative translation for -wasm-enable-sjlj #84137
Conversation
@llvm/pr-subscribers-backend-webassembly @llvm/pr-subscribers-clang Author: YAMAMOTO Takashi (yamt) ChangesInstead of maintaining per-function-invocation malloc()'ed tables to track which functions each label belongs to, store the equivalent info in jump buffers (jmp_buf) themselves. Also, use a less emscripten-looking ABI symbols:
Enabled with:
(-experimental-wasm-enable-alt-sjlj is the new option this change introduces.) While I want to use this for WASI, it should work for emscripten as well. An example runtime and a few tests: Discussion: Full diff: https://github.com/llvm/llvm-project/pull/84137.diff 5 Files Affected:
diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp b/clang/lib/Driver/ToolChains/WebAssembly.cpp
index b8c2573d6265fb..2e7c8e6e8d13f7 100644
--- a/clang/lib/Driver/ToolChains/WebAssembly.cpp
+++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp
@@ -386,6 +386,20 @@ void WebAssembly::addClangTargetOptions(const ArgList &DriverArgs,
// Backend needs '-exception-model=wasm' to use Wasm EH instructions
CC1Args.push_back("-exception-model=wasm");
}
+
+ if (Opt.starts_with("-experimental-wasm-enable-alt-sjlj")) {
+ // '-mllvm -experimental-wasm-enable-alt-sjlj' should be used with
+ // '-mllvm -wasm-enable-sjlj'
+ bool HasWasmEnableSjlj = false;
+ for (const Arg *A : DriverArgs.filtered(options::OPT_mllvm)) {
+ if (StringRef(A->getValue(0)) == "-wasm-enable-sjlj")
+ HasWasmEnableSjlj = true;
+ }
+ if (!HasWasmEnableSjlj)
+ getDriver().Diag(diag::err_drv_argument_only_allowed_with)
+ << "-mllvm -experimental-wasm-enable-alt-sjlj"
+ << "-mllvm -wasm-enable-sjlj";
+ }
}
}
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
index e8f58a19d25e3b..7f15742367be09 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
@@ -54,6 +54,9 @@ cl::opt<bool>
// setjmp/longjmp handling using wasm EH instrutions
cl::opt<bool> WebAssembly::WasmEnableSjLj(
"wasm-enable-sjlj", cl::desc("WebAssembly setjmp/longjmp handling"));
+cl::opt<bool> WebAssembly::WasmEnableAltSjLj(
+ "experimental-wasm-enable-alt-sjlj",
+ cl::desc("Use experimental alternate ABI for --wasm-enable-sjlj"));
static MCAsmInfo *createMCAsmInfo(const MCRegisterInfo & /*MRI*/,
const Triple &TT,
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index 15aeaaeb8c4a4e..d23de9d407d894 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -44,6 +44,7 @@ extern cl::opt<bool> WasmEnableEmEH; // asm.js-style EH
extern cl::opt<bool> WasmEnableEmSjLj; // asm.js-style SjLJ
extern cl::opt<bool> WasmEnableEH; // EH using Wasm EH instructions
extern cl::opt<bool> WasmEnableSjLj; // SjLj using Wasm EH instructions
+extern cl::opt<bool> WasmEnableAltSjLj; // Alt ABI for WasmEnableSjLj
enum OperandType {
/// Basic block label in a branch construct.
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
index 77e6640d5a8224..fc76757011f5d8 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
@@ -300,6 +300,7 @@ class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass {
bool EnableEmEH; // Enable Emscripten exception handling
bool EnableEmSjLj; // Enable Emscripten setjmp/longjmp handling
bool EnableWasmSjLj; // Enable Wasm setjmp/longjmp handling
+ bool EnableWasmAltSjLj; // Alt ABI for EnableWasmSjLj
bool DoSjLj; // Whether we actually perform setjmp/longjmp handling
GlobalVariable *ThrewGV = nullptr; // __THREW__ (Emscripten)
@@ -368,7 +369,8 @@ class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass {
WebAssemblyLowerEmscriptenEHSjLj()
: ModulePass(ID), EnableEmEH(WebAssembly::WasmEnableEmEH),
EnableEmSjLj(WebAssembly::WasmEnableEmSjLj),
- EnableWasmSjLj(WebAssembly::WasmEnableSjLj) {
+ EnableWasmSjLj(WebAssembly::WasmEnableSjLj),
+ EnableWasmAltSjLj(WebAssembly::WasmEnableAltSjLj) {
assert(!(EnableEmSjLj && EnableWasmSjLj) &&
"Two SjLj modes cannot be turned on at the same time");
assert(!(EnableEmEH && EnableWasmSjLj) &&
@@ -619,6 +621,7 @@ static bool canLongjmp(const Value *Callee) {
// There are functions in Emscripten's JS glue code or compiler-rt
if (CalleeName == "__resumeException" || CalleeName == "llvm_eh_typeid_for" ||
CalleeName == "saveSetjmp" || CalleeName == "testSetjmp" ||
+ CalleeName == "__wasm_sjlj_setjmp" || CalleeName == "__wasm_sjlj_test" ||
CalleeName == "getTempRet0" || CalleeName == "setTempRet0")
return false;
@@ -999,7 +1002,11 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
// Register __wasm_longjmp function, which calls __builtin_wasm_longjmp.
FunctionType *FTy = FunctionType::get(
IRB.getVoidTy(), {Int8PtrTy, IRB.getInt32Ty()}, false);
- WasmLongjmpF = getEmscriptenFunction(FTy, "__wasm_longjmp", &M);
+ if (EnableWasmAltSjLj) {
+ WasmLongjmpF = getEmscriptenFunction(FTy, "__wasm_sjlj_longjmp", &M);
+ } else {
+ WasmLongjmpF = getEmscriptenFunction(FTy, "__wasm_longjmp", &M);
+ }
WasmLongjmpF->addFnAttr(Attribute::NoReturn);
}
@@ -1007,17 +1014,30 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
Type *Int8PtrTy = IRB.getPtrTy();
Type *Int32PtrTy = IRB.getPtrTy();
Type *Int32Ty = IRB.getInt32Ty();
- // Register saveSetjmp function
- FunctionType *SetjmpFTy = SetjmpF->getFunctionType();
- FunctionType *FTy = FunctionType::get(
- Int32PtrTy,
- {SetjmpFTy->getParamType(0), Int32Ty, Int32PtrTy, Int32Ty}, false);
- SaveSetjmpF = getEmscriptenFunction(FTy, "saveSetjmp", &M);
// Register testSetjmp function
- FTy = FunctionType::get(Int32Ty,
- {getAddrIntType(&M), Int32PtrTy, Int32Ty}, false);
- TestSetjmpF = getEmscriptenFunction(FTy, "testSetjmp", &M);
+ if (EnableWasmAltSjLj) {
+ // Register saveSetjmp function
+ FunctionType *SetjmpFTy = SetjmpF->getFunctionType();
+ FunctionType *FTy = FunctionType::get(
+ IRB.getVoidTy(), {SetjmpFTy->getParamType(0), Int32Ty, Int32PtrTy},
+ false);
+ SaveSetjmpF = getEmscriptenFunction(FTy, "__wasm_sjlj_setjmp", &M);
+
+ FTy = FunctionType::get(Int32Ty, {Int32PtrTy, Int32PtrTy}, false);
+ TestSetjmpF = getEmscriptenFunction(FTy, "__wasm_sjlj_test", &M);
+ } else {
+ // Register saveSetjmp function
+ FunctionType *SetjmpFTy = SetjmpF->getFunctionType();
+ FunctionType *FTy = FunctionType::get(
+ Int32PtrTy,
+ {SetjmpFTy->getParamType(0), Int32Ty, Int32PtrTy, Int32Ty}, false);
+ SaveSetjmpF = getEmscriptenFunction(FTy, "saveSetjmp", &M);
+
+ FTy = FunctionType::get(
+ Int32Ty, {getAddrIntType(&M), Int32PtrTy, Int32Ty}, false);
+ TestSetjmpF = getEmscriptenFunction(FTy, "testSetjmp", &M);
+ }
// wasm.catch() will be lowered down to wasm 'catch' instruction in
// instruction selection.
@@ -1291,19 +1311,29 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
Type *IntPtrTy = getAddrIntType(&M);
Constant *size = ConstantInt::get(IntPtrTy, 40);
IRB.SetInsertPoint(SetjmpTableSize);
- auto *SetjmpTable = IRB.CreateMalloc(IntPtrTy, IRB.getInt32Ty(), size,
- nullptr, nullptr, "setjmpTable");
- SetjmpTable->setDebugLoc(FirstDL);
- // CallInst::CreateMalloc may return a bitcast instruction if the result types
- // mismatch. We need to set the debug loc for the original call too.
- auto *MallocCall = SetjmpTable->stripPointerCasts();
- if (auto *MallocCallI = dyn_cast<Instruction>(MallocCall)) {
- MallocCallI->setDebugLoc(FirstDL);
+ Instruction *SetjmpTable;
+ if (EnableWasmAltSjLj) {
+ // This alloca'ed pointer is used by the runtime to identify function
+ // inovactions. It's just for pointer comparisons. It will never
+ // be dereferenced.
+ SetjmpTable = IRB.CreateAlloca(IRB.getInt32Ty());
+ SetjmpTable->setDebugLoc(FirstDL);
+ SetjmpTableInsts.push_back(SetjmpTable);
+ } else {
+ SetjmpTable = IRB.CreateMalloc(IntPtrTy, IRB.getInt32Ty(), size, nullptr,
+ nullptr, "setjmpTable");
+ SetjmpTable->setDebugLoc(FirstDL);
+ // CallInst::CreateMalloc may return a bitcast instruction if the result
+ // types mismatch. We need to set the debug loc for the original call too.
+ auto *MallocCall = SetjmpTable->stripPointerCasts();
+ if (auto *MallocCallI = dyn_cast<Instruction>(MallocCall)) {
+ MallocCallI->setDebugLoc(FirstDL);
+ }
+ // setjmpTable[0] = 0;
+ IRB.CreateStore(IRB.getInt32(0), SetjmpTable);
+ SetjmpTableInsts.push_back(SetjmpTable);
+ SetjmpTableSizeInsts.push_back(SetjmpTableSize);
}
- // setjmpTable[0] = 0;
- IRB.CreateStore(IRB.getInt32(0), SetjmpTable);
- SetjmpTableInsts.push_back(SetjmpTable);
- SetjmpTableSizeInsts.push_back(SetjmpTableSize);
// Setjmp transformation
SmallVector<PHINode *, 4> SetjmpRetPHIs;
@@ -1349,14 +1379,20 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
// Our index in the function is our place in the array + 1 to avoid index
// 0, because index 0 means the longjmp is not ours to handle.
IRB.SetInsertPoint(CI);
- Value *Args[] = {CI->getArgOperand(0), IRB.getInt32(SetjmpRetPHIs.size()),
- SetjmpTable, SetjmpTableSize};
- Instruction *NewSetjmpTable =
- IRB.CreateCall(SaveSetjmpF, Args, "setjmpTable");
- Instruction *NewSetjmpTableSize =
- IRB.CreateCall(GetTempRet0F, std::nullopt, "setjmpTableSize");
- SetjmpTableInsts.push_back(NewSetjmpTable);
- SetjmpTableSizeInsts.push_back(NewSetjmpTableSize);
+ if (EnableWasmAltSjLj) {
+ Value *Args[] = {CI->getArgOperand(0), IRB.getInt32(SetjmpRetPHIs.size()),
+ SetjmpTable};
+ IRB.CreateCall(SaveSetjmpF, Args);
+ } else {
+ Value *Args[] = {CI->getArgOperand(0), IRB.getInt32(SetjmpRetPHIs.size()),
+ SetjmpTable, SetjmpTableSize};
+ Instruction *NewSetjmpTable =
+ IRB.CreateCall(SaveSetjmpF, Args, "setjmpTable");
+ Instruction *NewSetjmpTableSize =
+ IRB.CreateCall(GetTempRet0F, std::nullopt, "setjmpTableSize");
+ SetjmpTableInsts.push_back(NewSetjmpTable);
+ SetjmpTableSizeInsts.push_back(NewSetjmpTableSize);
+ }
ToErase.push_back(CI);
}
@@ -1372,38 +1408,40 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
for (Instruction *I : ToErase)
I->eraseFromParent();
- // Free setjmpTable buffer before each return instruction + function-exiting
- // call
- SmallVector<Instruction *, 16> ExitingInsts;
- for (BasicBlock &BB : F) {
- Instruction *TI = BB.getTerminator();
- if (isa<ReturnInst>(TI))
- ExitingInsts.push_back(TI);
- // Any 'call' instruction with 'noreturn' attribute exits the function at
- // this point. If this throws but unwinds to another EH pad within this
- // function instead of exiting, this would have been an 'invoke', which
- // happens if we use Wasm EH or Wasm SjLJ.
- for (auto &I : BB) {
- if (auto *CI = dyn_cast<CallInst>(&I)) {
- bool IsNoReturn = CI->hasFnAttr(Attribute::NoReturn);
- if (Function *CalleeF = CI->getCalledFunction())
- IsNoReturn |= CalleeF->hasFnAttribute(Attribute::NoReturn);
- if (IsNoReturn)
- ExitingInsts.push_back(&I);
+ if (!EnableWasmAltSjLj) {
+ // Free setjmpTable buffer before each return instruction + function-exiting
+ // call
+ SmallVector<Instruction *, 16> ExitingInsts;
+ for (BasicBlock &BB : F) {
+ Instruction *TI = BB.getTerminator();
+ if (isa<ReturnInst>(TI))
+ ExitingInsts.push_back(TI);
+ // Any 'call' instruction with 'noreturn' attribute exits the function at
+ // this point. If this throws but unwinds to another EH pad within this
+ // function instead of exiting, this would have been an 'invoke', which
+ // happens if we use Wasm EH or Wasm SjLJ.
+ for (auto &I : BB) {
+ if (auto *CI = dyn_cast<CallInst>(&I)) {
+ bool IsNoReturn = CI->hasFnAttr(Attribute::NoReturn);
+ if (Function *CalleeF = CI->getCalledFunction())
+ IsNoReturn |= CalleeF->hasFnAttribute(Attribute::NoReturn);
+ if (IsNoReturn)
+ ExitingInsts.push_back(&I);
+ }
}
}
- }
- for (auto *I : ExitingInsts) {
- DebugLoc DL = getOrCreateDebugLoc(I, F.getSubprogram());
- // If this existing instruction is a call within a catchpad, we should add
- // it as "funclet" to the operand bundle of 'free' call
- SmallVector<OperandBundleDef, 1> Bundles;
- if (auto *CB = dyn_cast<CallBase>(I))
- if (auto Bundle = CB->getOperandBundle(LLVMContext::OB_funclet))
- Bundles.push_back(OperandBundleDef(*Bundle));
- IRB.SetInsertPoint(I);
- auto *Free = IRB.CreateFree(SetjmpTable, Bundles);
- Free->setDebugLoc(DL);
+ for (auto *I : ExitingInsts) {
+ DebugLoc DL = getOrCreateDebugLoc(I, F.getSubprogram());
+ // If this existing instruction is a call within a catchpad, we should add
+ // it as "funclet" to the operand bundle of 'free' call
+ SmallVector<OperandBundleDef, 1> Bundles;
+ if (auto *CB = dyn_cast<CallBase>(I))
+ if (auto Bundle = CB->getOperandBundle(LLVMContext::OB_funclet))
+ Bundles.push_back(OperandBundleDef(*Bundle));
+ IRB.SetInsertPoint(I);
+ auto *Free = IRB.CreateFree(SetjmpTable, Bundles);
+ Free->setDebugLoc(DL);
+ }
}
// Every call to saveSetjmp can change setjmpTable and setjmpTableSize
@@ -1738,10 +1776,16 @@ void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForWasmSjLj(
BasicBlock *ThenBB = BasicBlock::Create(C, "if.then", &F);
BasicBlock *EndBB = BasicBlock::Create(C, "if.end", &F);
Value *EnvP = IRB.CreateBitCast(Env, getAddrPtrType(&M), "env.p");
- Value *SetjmpID = IRB.CreateLoad(getAddrIntType(&M), EnvP, "setjmp.id");
- Value *Label =
- IRB.CreateCall(TestSetjmpF, {SetjmpID, SetjmpTable, SetjmpTableSize},
- OperandBundleDef("funclet", CatchPad), "label");
+ Value *Label;
+ if (EnableWasmAltSjLj) {
+ Label = IRB.CreateCall(TestSetjmpF, {EnvP, SetjmpTable},
+ OperandBundleDef("funclet", CatchPad), "label");
+ } else {
+ Value *SetjmpID = IRB.CreateLoad(getAddrIntType(&M), EnvP, "setjmp.id");
+ Label =
+ IRB.CreateCall(TestSetjmpF, {SetjmpID, SetjmpTable, SetjmpTableSize},
+ OperandBundleDef("funclet", CatchPad), "label");
+ }
Value *Cmp = IRB.CreateICmpEQ(Label, IRB.getInt32(0));
IRB.CreateCondBr(Cmp, ThenBB, EndBB);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index 70685b2e3bb2de..6db019034028bc 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -370,6 +370,7 @@ FunctionPass *WebAssemblyPassConfig::createTargetRegisterAllocator(bool) {
return nullptr; // No reg alloc
}
+using WebAssembly::WasmEnableAltSjLj;
using WebAssembly::WasmEnableEH;
using WebAssembly::WasmEnableEmEH;
using WebAssembly::WasmEnableEmSjLj;
@@ -405,6 +406,9 @@ static void basicCheckForEHAndSjLj(TargetMachine *TM) {
report_fatal_error(
"-exception-model=wasm only allowed with at least one of "
"-wasm-enable-eh or -wasm-enable-sjlj");
+ if (!WasmEnableSjLj && WasmEnableAltSjLj)
+ report_fatal_error("-experimental-wasm-enable-alt-sjlj only allowed with "
+ "-wasm-enable-sjlj");
// You can't enable two modes of EH at the same time
if (WasmEnableEmEH && WasmEnableEH)
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks good to me! @sbc100 Do you want to coordinate landing this with Emscripten?
Can I take a look before landing this? I'm OOO this afternoon but will be back tomorrow. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for working on this!
I'm excited to see this land and become less emscripten-specific.
I'm hoping that once we get the emscripten side landed we can remove the new option very soon too.
llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
Outdated
Show resolved
Hide resolved
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In terms of getting this landed and tested, I wonder which path we should take:
- Land this now, without tests, then update emscripten then come back and flip the default, at which point the existing tests will get updated.
- Duplicate/update the the existing tests to tests both modes, then delete those changes once we flip the default.
Personally I think I'd be happy with (1) since this is a behind and experimental flag.
What do others think? @aheejin ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice work! Sorry for the late reply. I thought we needed the table because we need to distinguish two different calls from the same function, for example, if foo
calls setjmp
and also recursively calls foo
again, so the same callsite can have two different setjmpId
s (e.g., https://github.com/emscripten-core/emscripten/blob/main/test/core/test_longjmp3.c). But this fixes that problem by adding function_invocation_id
, which is simpler and nicer.
I also ran all Emscripten SjLj tests with this implementation (+ your rt.c
) and all of them seemed to pass. 🎉
I left some inline comments.
- Given that we don't need
setjmpTableSize
anymore andsetjmpTable
doesn't change, we don't need the whole block here from line 1463 ~ line 1503 doing SSA updates anymore:
llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
Lines 1409 to 1449 in 578e66a
// Every call to saveSetjmp can change setjmpTable and setjmpTableSize // (when buffer reallocation occurs) // entry: // setjmpTableSize = 4; // setjmpTable = (int *) malloc(40); // setjmpTable[0] = 0; // ... // somebb: // setjmpTable = saveSetjmp(env, label, setjmpTable, setjmpTableSize); // setjmpTableSize = getTempRet0(); // So we need to make sure the SSA for these variables is valid so that every // saveSetjmp and testSetjmp calls have the correct arguments. SSAUpdater SetjmpTableSSA; SSAUpdater SetjmpTableSizeSSA; SetjmpTableSSA.Initialize(PointerType::get(C, 0), "setjmpTable"); SetjmpTableSizeSSA.Initialize(Type::getInt32Ty(C), "setjmpTableSize"); for (Instruction *I : SetjmpTableInsts) SetjmpTableSSA.AddAvailableValue(I->getParent(), I); for (Instruction *I : SetjmpTableSizeInsts) SetjmpTableSizeSSA.AddAvailableValue(I->getParent(), I); for (auto &U : make_early_inc_range(SetjmpTable->uses())) if (auto *I = dyn_cast<Instruction>(U.getUser())) if (I->getParent() != Entry) SetjmpTableSSA.RewriteUse(U); for (auto &U : make_early_inc_range(SetjmpTableSize->uses())) if (auto *I = dyn_cast<Instruction>(U.getUser())) if (I->getParent() != Entry) SetjmpTableSizeSSA.RewriteUse(U); // Finally, our modifications to the cfg can break dominance of SSA variables. // For example, in this code, // if (x()) { .. setjmp() .. } // if (y()) { .. longjmp() .. } // We must split the longjmp block, and it can jump into the block splitted // from setjmp one. But that means that when we split the setjmp block, it's // first part no longer dominates its second part - there is a theoretically // possible control flow path where x() is false, then y() is true and we // reach the second part of the setjmp block, without ever reaching the first // part. So, we rebuild SSA form here. rebuildSSA(F);
So you can wrap this part with (!EnableWasmAltSjLj)
.
@@ -386,6 +386,20 @@ void WebAssembly::addClangTargetOptions(const ArgList &DriverArgs, | |||
// Backend needs '-exception-model=wasm' to use Wasm EH instructions | |||
CC1Args.push_back("-exception-model=wasm"); | |||
} | |||
|
|||
if (Opt.starts_with("-experimental-wasm-enable-alt-sjlj")) { | |||
// '-mllvm -experimental-wasm-enable-alt-sjlj' should be used with |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// '-mllvm -experimental-wasm-enable-alt-sjlj' should be used with | |
// '-mllvm -experimental-wasm-enable-alt-sjlj' should be used with |
cl::opt<bool> WebAssembly::WasmEnableAltSjLj( | ||
"experimental-wasm-enable-alt-sjlj", | ||
cl::desc("Use experimental alternate ABI for --wasm-enable-sjlj")); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
cl::opt<bool> WebAssembly::WasmEnableAltSjLj( | |
"experimental-wasm-enable-alt-sjlj", | |
cl::desc("Use experimental alternate ABI for --wasm-enable-sjlj")); | |
cl::opt<bool> WebAssembly::WasmEnableExperimentalNewSjLj( | |
"wasm-enable-experimental-new-sjlj", | |
cl::desc("Use experimental new ABI for -wasm-enable-sjlj")); |
I've been trying to make all Wasm related option variable names start with Wasm
. Also, if we are to replace the current impl with this, how about just 'new' than 'alt', which sounds like we are planning to keep both?
If we are to change this, we have to fix other places that includes either of WasmEnableAltSjLj
and experimental-wasm-enable-alt-sjlj
as well.
@@ -300,6 +315,7 @@ class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass { | |||
bool EnableEmEH; // Enable Emscripten exception handling | |||
bool EnableEmSjLj; // Enable Emscripten setjmp/longjmp handling | |||
bool EnableWasmSjLj; // Enable Wasm setjmp/longjmp handling | |||
bool EnableWasmAltSjLj; // Alt ABI for EnableWasmSjLj |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
bool EnableWasmAltSjLj; // Alt ABI for EnableWasmSjLj | |
bool EnableWasmNewSjLj; // New ABI for EnableWasmSjLj |
Also running clang-format around this line will likely to change the comment indentation.
// be dereferenced. | ||
SetjmpTable = IRB.CreateAlloca(IRB.getInt32Ty()); | ||
SetjmpTable->setDebugLoc(FirstDL); | ||
SetjmpTableInsts.push_back(SetjmpTable); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
SetjmpTableInsts.push_back(SetjmpTable); |
SetjmpTableInsts
(and SetjmpTableSizeInsts
) is for the SSA updates at the end of the transformation, because in the current version setjmpTable
and setjmpTableTableSize
are assigned every time we call saveSetjmp
. Now that we don't do this I don't think we need to insert it here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we need to insert this to make handleLongjmpableCallsForWasmSjLj happy.
we can simplify it if/when we can retire the current logic.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think #84137 (comment) answers it too
SetjmpTable = IRB.CreateAlloca(IRB.getInt32Ty()); | ||
SetjmpTable->setDebugLoc(FirstDL); | ||
SetjmpTableInsts.push_back(SetjmpTable); | ||
} else { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
Lines 1278 to 1293 in 578e66a
// This instruction effectively means %setjmpTableSize = 4. | |
// We create this as an instruction intentionally, and we don't want to fold | |
// this instruction to a constant 4, because this value will be used in | |
// SSAUpdater.AddAvailableValue(...) later. | |
BasicBlock *Entry = &F.getEntryBlock(); | |
DebugLoc FirstDL = getOrCreateDebugLoc(&*Entry->begin(), F.getSubprogram()); | |
SplitBlock(Entry, &*Entry->getFirstInsertionPt()); | |
BinaryOperator *SetjmpTableSize = | |
BinaryOperator::Create(Instruction::Add, IRB.getInt32(4), IRB.getInt32(0), | |
"setjmpTableSize", Entry->getTerminator()); | |
SetjmpTableSize->setDebugLoc(FirstDL); | |
// setjmpTable = (int *) malloc(40); | |
Type *IntPtrTy = getAddrIntType(&M); | |
Constant *size = ConstantInt::get(IntPtrTy, 40); | |
IRB.SetInsertPoint(SetjmpTableSize); |
This can be moved into this
else
part too. Also we don't need to create SetjmpTableSize
, in which case we should do something like IRB.setInsertPoint(Entry.getTerminator()
instead.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok
auto *MallocCall = SetjmpTable->stripPointerCasts(); | ||
if (auto *MallocCallI = dyn_cast<Instruction>(MallocCall)) { | ||
MallocCallI->setDebugLoc(FirstDL); | ||
Instruction *SetjmpTable; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What we create with this option is not really a 'table' anymore... Can we rename it? I don't think we have to share the variable with the old mechanism, no? (I think using SetjmpTable
variable with two different meanings is more confusing.)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
at least in some places (eg handleLongjmpableCallsForWasmSjLj
) it was easier to share a variable.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Now we don't need to support the old and new options simultaneously we don't need to share this. Also Emscripten SjLj can use your functions too, so it doesn't need it either.
@@ -999,25 +1017,43 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) { | |||
// Register __wasm_longjmp function, which calls __builtin_wasm_longjmp. | |||
FunctionType *FTy = FunctionType::get( | |||
IRB.getVoidTy(), {Int8PtrTy, IRB.getInt32Ty()}, false); | |||
WasmLongjmpF = getEmscriptenFunction(FTy, "__wasm_longjmp", &M); | |||
if (EnableWasmAltSjLj) { | |||
WasmLongjmpF = getEmscriptenFunction(FTy, "__wasm_sjlj_longjmp", &M); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this the final name? It's unclear what the difference between __wasm_longjmp
and __wasm_sjlj_longjmp
to people who are not familiar with the history... If the goal is to eventually to replace the current library, I'd want this to be new __wasm_longjmp
. But given that we have to maintain both at this point, how about __wasm_longjmp_new
or something? (I'm not too opinionated on this specific name, so please feel free to suggest otherwise.. but what I'm saying is, mainly, in the final version, I'd like to have only __wasm_setjmp
and __wasm_longjmp
, with your version. I think _sjlj_
is a redundancy.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i wanted to have a common prefix for this set of api. (__wasm_sjlj_
)
what's your suggestion for testSetjmp
?
saveSetjmp -> __wasm_setjmp?
testSetjmp -> ???
getTempRet0 -> (removed)
__wasm_longjmp -> keep it
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we discussed this in the emscripten PR.
if (EnableWasmAltSjLj) { | ||
Label = IRB.CreateCall(TestSetjmpF, {EnvP, SetjmpTable}, | ||
OperandBundleDef("funclet", CatchPad), "label"); | ||
} else { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can move
llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
Lines 1685 to 1688 in 6c76506
// Arbitrarily use the ones defined in the beginning of the function. | |
// SSAUpdater will later update them to the correct values. | |
Instruction *SetjmpTable = *SetjmpTableInsts.begin(); | |
Instruction *SetjmpTableSize = *SetjmpTableSizeInsts.begin(); |
else
, given that SetjmpTableInsts
and SetjmpTableSizeInsts
are empty in this new mode.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
SetjmpTableInsts
still has a single item, which is used to pass function_invocation_id.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The reason I added all setjmp instructions in SetjmpTableInsts
was to do the SSA update. Now we have a single setjmpTable
, we don't need to do it and we don't need SetjmpTableInsts
. (Sorry I missed this comment before)
Come to think of it, should we even introduce this experimental option? Adding
|
So you think it should be possible to have both old and new versions supported in emscripten at the same time? If that works that would be great. Do you want to send and emscripten PR? And link to it here? If we can confirm that all tests pass then I agree it would be great to do this in a single LLVM change.
|
No, 1 just adds new library functions to emscripten (with different names than the current ones) and at this point they will not be actually used. The reason we add them there first is otherwise 2 (replacing the current scheme with the new one) wouldn't pass in the emscripten CI. As long as we use different library function names I think it can be fine. (I actually want to reuse the name @yamt, do you want to submit a PR to emscripten adding new library functions? I can do that too, but given that this is your code, if you want to do it it'd be good. About where to add them... to make the transition smooth, https://github.com/emscripten-core/emscripten/blob/main/system/lib/compiler-rt/emscripten_setjmp.c looks like a good place to start given that our current functions are here, but we can move them later to elsewhere. WDYT? @sbc100 |
i implemented this way ( |
Hopefully, this implementation can replace the current `-sSUPPORT_LONGJMP=wasm` implementation sooner or later. system/lib/compiler-rt/wasm_setjmp.c is basically a copy from: https://github.com/yamt/garbage/blob/wasm-sjlj-alt2/wasm/longjmp/rt.c. the copyright comment was copy-and-pasted from: system/lib/compiler-rt/emscripten_exception_builtins.c The corresponding LLVM change: llvm/llvm-project#84137 Discussion: https://docs.google.com/document/d/1ZvTPT36K5jjiedF8MCXbEmYjULJjI723aOAks1IdLLg/edit
Hopefully, this implementation can replace the current runtime for `-sSUPPORT_LONGJMP=wasm` sooner or later. system/lib/compiler-rt/wasm_setjmp.c is basically a copy from: https://github.com/yamt/garbage/blob/wasm-sjlj-alt2/wasm/longjmp/rt.c. the copyright comment was copy-and-pasted from: system/lib/compiler-rt/emscripten_exception_builtins.c The corresponding LLVM change: llvm/llvm-project#84137 Discussion: https://docs.google.com/document/d/1ZvTPT36K5jjiedF8MCXbEmYjULJjI723aOAks1IdLLg/edit
|
i get errors like the following if i simply put the SSA update things in
|
i think i found the cause. rebuildSSA is still necessary to propagate "val". |
Had a chance to chat with @sbc100 offline and we agreed we can probably do this:
The downside of this approach is when you add library functions in emscripten we can't add tests with them. But given that we will follow up with the LLVM side PRs I think it should be fine. Also this removes the need to duplicate tests in LLVM (or omit tests in LLVM when this lands). |
Oh, I'm sorry, I was incorrect. We should include this part, because SSA rewriting is necessary for other variables (other than llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp Lines 1439 to 1449 in 578e66a
What we can exclude is this part: llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp Lines 1409 to 1437 in 578e66a
I'm sorry for the incorrect comment and unnecessary debugging.. 😢 |
✅ With the latest revision this PR passed the C/C++ code formatter. |
the new runtime code is basically a copy from: https://github.com/yamt/garbage/blob/wasm-sjlj-alt2/wasm/longjmp/rt.c. The corresponding LLVM change: llvm/llvm-project#84137 Discussion: https://docs.google.com/document/d/1ZvTPT36K5jjiedF8MCXbEmYjULJjI723aOAks1IdLLg/edit
the new runtime code is basically a copy from: https://github.com/yamt/garbage/blob/wasm-sjlj-alt2/wasm/longjmp/rt.c. The corresponding LLVM change: llvm/llvm-project#84137 Discussion: https://docs.google.com/document/d/1ZvTPT36K5jjiedF8MCXbEmYjULJjI723aOAks1IdLLg/edit
SaveSetjmpF -> WasmSetjmpF TestSetjmpF -> WasmSetjmpTestF
ok. i just rebased + force pushed. i usually avoid merge commits in a PR because i personally feel it's rather complicated to review. |
…LVM (#21502) the new runtime code is basically a copy from: https://github.com/yamt/garbage/blob/wasm-sjlj-alt2/wasm/longjmp/rt.c. The corresponding LLVM change: llvm/llvm-project#84137 Discussion: https://docs.google.com/document/d/1ZvTPT36K5jjiedF8MCXbEmYjULJjI723aOAks1IdLLg/edit
OK I'll land this. Thank you for working on this! |
After llvm/llvm-project#84137 and emscripten-core#21502, these methods are no longer used.
After llvm/llvm-project#84137 and #21502, these methods are no longer used.
* Add libsetjmp.a/so Add setjmp/longjump support based on Wasm EH proposal. It's provided as a separate library (libsetjmp) from libc so that runtimes w/o EH support can still load libc.so. To use this setjmp/longjmp implementation, an application should be compiled with `-mllvm -wasm-enable-sjlj` and linked with `-lsetjmp`. (You need an LLVM with the change mentioned below.) Also, you need a runtime with EH support to run such an application. If you want to use the latest EH instructions, you can use `binaryen --translate-eh-old-to-new` on your application. Note: You don't need to translate libsetjmp.a/so to the new EH. While LLVM currently produces bytecode for an old version of the EH proposal, luckily for us, the bytecode used in this library (ie. the tag definition and the "throw" instruction) is compatible with the latest version of the proposal. The runtime logic is basically copy-and-paste from: https://github.com/yamt/garbage/tree/wasm-sjlj-alt2/wasm/longjmp The corresponding LLVM change: llvm/llvm-project#84137 (Note: you need this change to build setjmp/longjmp using code. otoh, you don't need this to build libsetjmp.) A similar change for emscripten: emscripten-core/emscripten#21502 An older version of this PR, which doesn't require LLVM changes: #467 Discussion: https://docs.google.com/document/d/1ZvTPT36K5jjiedF8MCXbEmYjULJjI723aOAks1IdLLg/edit An example to use the latest EH instructions: ``` clang -mllvm -wasm-enable-sjlj -o your_app.wasm your_app.c -lsetjmp wasm-opt --translate-eh-old-to-new -o your_app.wasm your_app.wasm toywasm --wasi your_app.wasm ``` Note: use toywasm built with `-DTOYWASM_ENABLE_WASM_EXCEPTION_HANDLING=ON`. An example to use the older EH instructions, which LLVM currently produces: ``` clang -mllvm -wasm-enable-sjlj -o your_app.wasm your_app.c -lsetjmp iwasm your_app.wasm ``` Note: use wasm-micro-runtime built with `-DWAMR_BUILD_EXCE_HANDLING=1`. Note: as of writing this, only the classic interpreter supports EH. * Make libsetjmp build optional * CI: Disable libsetjmp for old LLVM * libc-top-half/musl/include/setjmp.h: fix a rebase botch
Instead of maintaining per-function-invocation malloc()'ed tables to track which functions each label belongs to, store the equivalent info in jump buffers (jmp_buf) themselves.
Also, use a less emscripten-looking ABI symbols:
While I want to use this for WASI, it should work for emscripten as well.
An example runtime and a few tests:
https://github.com/yamt/garbage/tree/wasm-sjlj-alt2/wasm/longjmp
wasi-libc version of the runtime:
WebAssembly/wasi-libc#483
emscripten version of the runtime:
emscripten-core/emscripten#21502
Discussion:
https://docs.google.com/document/d/1ZvTPT36K5jjiedF8MCXbEmYjULJjI723aOAks1IdLLg/edit
todo:
remove the option (-mllvm -experimental-wasm-enable-alt-sjlj) by making it unconditionally truedonerename functions to __wasm_setjmp, __wasm_setjmp_test, __wasm_longjmpdoneupdate testsdoneupdate commentsdoneupdate emscripten PRdone