diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp index 567e85a02dc61..b4c724422c14a 100644 --- a/clang/lib/CodeGen/CGCoroutine.cpp +++ b/clang/lib/CodeGen/CGCoroutine.cpp @@ -278,7 +278,11 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co llvm::Function *AwaitSuspendIntrinsic = CGF.CGM.getIntrinsic(AwaitSuspendIID); - const auto AwaitSuspendCanThrow = StmtCanThrow(S.getSuspendExpr()); + // SuspendHandle might throw since it also resumes the returned handle. + const bool AwaitSuspendCanThrow = + SuspendReturnType == + CoroutineSuspendExpr::SuspendReturnType::SuspendHandle || + StmtCanThrow(S.getSuspendExpr()); llvm::CallBase *SuspendRet = nullptr; // FIXME: add call attributes? @@ -307,10 +311,7 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co break; } case CoroutineSuspendExpr::SuspendReturnType::SuspendHandle: { - assert(SuspendRet->getType()->isPointerTy()); - - auto ResumeIntrinsic = CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_resume); - Builder.CreateCall(ResumeIntrinsic, SuspendRet); + assert(SuspendRet->getType()->isVoidTy()); break; } } diff --git a/clang/test/CodeGenCoroutines/coro-await.cpp b/clang/test/CodeGenCoroutines/coro-await.cpp index 65bfb09946881..c7a09e8b8bc7c 100644 --- a/clang/test/CodeGenCoroutines/coro-await.cpp +++ b/clang/test/CodeGenCoroutines/coro-await.cpp @@ -370,8 +370,8 @@ extern "C" void TestTailcall() { // --------------------------- // Call coro.await.suspend // --------------------------- - // CHECK-NEXT: %[[RESUMED:.+]] = call ptr @llvm.coro.await.suspend.handle(ptr %[[AWAITABLE]], ptr %[[FRAME]], ptr @TestTailcall.__await_suspend_wrapper__await) - // CHECK-NEXT: call void @llvm.coro.resume(ptr %[[RESUMED]]) + // Note: The call must not be nounwind since the resumed function could throw. + // CHECK-NEXT: call void @llvm.coro.await.suspend.handle(ptr %[[AWAITABLE]], ptr %[[FRAME]], ptr @TestTailcall.__await_suspend_wrapper__await){{$}} // CHECK-NEXT: %[[OUTCOME:.+]] = call i8 @llvm.coro.suspend(token %[[SUSPEND_ID]], i1 false) // CHECK-NEXT: switch i8 %[[OUTCOME]], label %[[RET_BB:.+]] [ // CHECK-NEXT: i8 0, label %[[READY_BB]] diff --git a/clang/test/CodeGenCoroutines/coro-symmetric-transfer-01.cpp b/clang/test/CodeGenCoroutines/coro-symmetric-transfer-01.cpp deleted file mode 100644 index da30e12c63cff..0000000000000 --- a/clang/test/CodeGenCoroutines/coro-symmetric-transfer-01.cpp +++ /dev/null @@ -1,54 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -O0 -emit-llvm %s -o - -disable-llvm-passes | FileCheck %s -// RUN: %clang -std=c++20 -O0 -emit-llvm -c %s -o %t -Xclang -disable-llvm-passes && %clang -c %t - -#include "Inputs/coroutine.h" - -struct detached_task { - struct promise_type { - detached_task get_return_object() noexcept { - return detached_task{std::coroutine_handle::from_promise(*this)}; - } - - void return_void() noexcept {} - - struct final_awaiter { - bool await_ready() noexcept { return false; } - std::coroutine_handle<> await_suspend(std::coroutine_handle h) noexcept { - h.destroy(); - return {}; - } - void await_resume() noexcept {} - }; - - void unhandled_exception() noexcept {} - - final_awaiter final_suspend() noexcept { return {}; } - - std::suspend_always initial_suspend() noexcept { return {}; } - }; - - ~detached_task() { - if (coro_) { - coro_.destroy(); - coro_ = {}; - } - } - - void start() && { - auto tmp = coro_; - coro_ = {}; - tmp.resume(); - } - - std::coroutine_handle coro_; -}; - -detached_task foo() { - co_return; -} - -// check that the lifetime of the coroutine handle used to obtain the address is contained within single basic block, and hence does not live across suspension points. -// CHECK-LABEL: final.suspend: -// CHECK: %{{.+}} = call token @llvm.coro.save(ptr null) -// CHECK: %[[HDL_TRANSFER:.+]] = call ptr @llvm.coro.await.suspend.handle -// CHECK: call void @llvm.coro.resume(ptr %[[HDL_TRANSFER]]) diff --git a/clang/test/CodeGenCoroutines/coro-symmetric-transfer-02.cpp b/clang/test/CodeGenCoroutines/coro-symmetric-transfer-02.cpp index ca6cf74115a3b..f36f89926505f 100644 --- a/clang/test/CodeGenCoroutines/coro-symmetric-transfer-02.cpp +++ b/clang/test/CodeGenCoroutines/coro-symmetric-transfer-02.cpp @@ -89,8 +89,7 @@ Task bar() { // CHECK: br i1 %{{.+}}, label %[[CASE1_AWAIT_READY:.+]], label %[[CASE1_AWAIT_SUSPEND:.+]] // CHECK: [[CASE1_AWAIT_SUSPEND]]: // CHECK-NEXT: %{{.+}} = call token @llvm.coro.save(ptr null) -// CHECK-NEXT: %[[HANDLE1_PTR:.+]] = call ptr @llvm.coro.await.suspend.handle -// CHECK-NEXT: call void @llvm.coro.resume(ptr %[[HANDLE1_PTR]]) +// CHECK-NEXT: call void @llvm.coro.await.suspend.handle // CHECK-NEXT: %{{.+}} = call i8 @llvm.coro.suspend // CHECK-NEXT: switch i8 %{{.+}}, label %coro.ret [ // CHECK-NEXT: i8 0, label %[[CASE1_AWAIT_READY]] @@ -104,8 +103,7 @@ Task bar() { // CHECK: br i1 %{{.+}}, label %[[CASE2_AWAIT_READY:.+]], label %[[CASE2_AWAIT_SUSPEND:.+]] // CHECK: [[CASE2_AWAIT_SUSPEND]]: // CHECK-NEXT: %{{.+}} = call token @llvm.coro.save(ptr null) -// CHECK-NEXT: %[[HANDLE2_PTR:.+]] = call ptr @llvm.coro.await.suspend.handle -// CHECK-NEXT: call void @llvm.coro.resume(ptr %[[HANDLE2_PTR]]) +// CHECK-NEXT: call void @llvm.coro.await.suspend.handle // CHECK-NEXT: %{{.+}} = call i8 @llvm.coro.suspend // CHECK-NEXT: switch i8 %{{.+}}, label %coro.ret [ // CHECK-NEXT: i8 0, label %[[CASE2_AWAIT_READY]] diff --git a/llvm/docs/Coroutines.rst b/llvm/docs/Coroutines.rst index 83369d93c309a..36092325e536f 100644 --- a/llvm/docs/Coroutines.rst +++ b/llvm/docs/Coroutines.rst @@ -1922,7 +1922,7 @@ Example: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ :: - declare ptr @llvm.coro.await.suspend.handle( + declare void @llvm.coro.await.suspend.handle( ptr , ptr , ptr ) @@ -1967,7 +1967,9 @@ The intrinsic must be used between corresponding `coro.save`_ and `await_suspend_function` call during `CoroSplit`_ pass. `await_suspend_function` must return a pointer to a valid -coroutine frame, which is immediately resumed +coroutine frame. The intrinsic will be lowered to a tail call resuming the +returned coroutine frame. It will be marked `musttail` on targets that support +that. Instructions following the intrinsic will become unreachable. Example: """""""" @@ -1977,11 +1979,10 @@ Example: ; before lowering await.suspend: %save = call token @llvm.coro.save(ptr %hdl) - %next = call ptr @llvm.coro.await.suspend.handle( - ptr %awaiter, - ptr %hdl, - ptr @await_suspend_function) - call void @llvm.coro.resume(%next) + call void @llvm.coro.await.suspend.handle( + ptr %awaiter, + ptr %hdl, + ptr @await_suspend_function) %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) ... @@ -1992,8 +1993,8 @@ Example: %next = call ptr @await_suspend_function( ptr %awaiter, ptr %hdl) - call void @llvm.coro.resume(%next) - %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) + musttail call void @llvm.coro.resume(%next) + ret void ... ; wrapper function example diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index f1c7d950f9275..78f0dbec863e9 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1717,7 +1717,7 @@ def int_coro_await_suspend_bool : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty], [Throws]>; -def int_coro_await_suspend_handle : Intrinsic<[llvm_ptr_ty], +def int_coro_await_suspend_handle : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty], [Throws]>; diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h index 84fd88806154e..5716fd0ea4ab9 100644 --- a/llvm/lib/Transforms/Coroutines/CoroInternal.h +++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h @@ -47,7 +47,7 @@ struct LowererBase { ConstantPointerNull *const NullPtr; LowererBase(Module &M); - Value *makeSubFnCall(Value *Arg, int Index, Instruction *InsertPt); + CallInst *makeSubFnCall(Value *Arg, int Index, Instruction *InsertPt); }; enum class ABI { @@ -85,6 +85,7 @@ struct LLVM_LIBRARY_VISIBILITY Shape { SmallVector CoroSuspends; SmallVector SwiftErrorOps; SmallVector CoroAwaitSuspends; + SmallVector SymmetricTransfers; // Field indexes for special fields in the switch lowering. struct SwitchFieldIndex { diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index 4eb6e75d09fa5..450ea82343715 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -113,21 +113,24 @@ class CoroCloner { /// ABIs. AnyCoroSuspendInst *ActiveSuspend = nullptr; + TargetTransformInfo &TTI; + public: /// Create a cloner for a switch lowering. CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape, - Kind FKind) + Kind FKind, TargetTransformInfo &TTI) : OrigF(OrigF), NewF(nullptr), Suffix(Suffix), Shape(Shape), FKind(FKind), - Builder(OrigF.getContext()) { + Builder(OrigF.getContext()), TTI(TTI) { assert(Shape.ABI == coro::ABI::Switch); } /// Create a cloner for a continuation lowering. CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape, - Function *NewF, AnyCoroSuspendInst *ActiveSuspend) + Function *NewF, AnyCoroSuspendInst *ActiveSuspend, + TargetTransformInfo &TTI) : OrigF(OrigF), NewF(NewF), Suffix(Suffix), Shape(Shape), FKind(Shape.ABI == coro::ABI::Async ? Kind::Async : Kind::Continuation), - Builder(OrigF.getContext()), ActiveSuspend(ActiveSuspend) { + Builder(OrigF.getContext()), ActiveSuspend(ActiveSuspend), TTI(TTI) { assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce || Shape.ABI == coro::ABI::Async); assert(NewF && "need existing function for continuation"); @@ -171,7 +174,8 @@ class CoroCloner { // Lower the intrinisc in CoroEarly phase if coroutine frame doesn't escape // and it is known that other transformations, for example, sanitizers // won't lead to incorrect code. -static void lowerAwaitSuspend(IRBuilder<> &Builder, CoroAwaitSuspendInst *CB) { +static void lowerAwaitSuspend(IRBuilder<> &Builder, CoroAwaitSuspendInst *CB, + coro::Shape &Shape) { auto Wrapper = CB->getWrapperFunction(); auto Awaiter = CB->getAwaiter(); auto FramePtr = CB->getFrame(); @@ -206,6 +210,31 @@ static void lowerAwaitSuspend(IRBuilder<> &Builder, CoroAwaitSuspendInst *CB) { llvm_unreachable("Unexpected coro_await_suspend invocation method"); } + if (CB->getCalledFunction()->getIntrinsicID() == + Intrinsic::coro_await_suspend_handle) { + // Follow the lowered await_suspend call above with a lowered resume call + // to the returned coroutine. + if (auto *Invoke = dyn_cast(CB)) { + // If the await_suspend call is an invoke, we continue in the next block. + Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstInsertionPt()); + } + + coro::LowererBase LB(*Wrapper->getParent()); + auto *ResumeAddr = LB.makeSubFnCall(NewCall, CoroSubFnInst::ResumeIndex, + &*Builder.GetInsertPoint()); + + LLVMContext &Ctx = Builder.getContext(); + FunctionType *ResumeTy = FunctionType::get( + Type::getVoidTy(Ctx), PointerType::getUnqual(Ctx), false); + auto *ResumeCall = Builder.CreateCall(ResumeTy, ResumeAddr, {NewCall}); + + // We can't insert the 'ret' instruction and adjust the cc until the + // function has been split, so remember this for later. + Shape.SymmetricTransfers.push_back(ResumeCall); + + NewCall = ResumeCall; + } + CB->replaceAllUsesWith(NewCall); CB->eraseFromParent(); } @@ -213,7 +242,7 @@ static void lowerAwaitSuspend(IRBuilder<> &Builder, CoroAwaitSuspendInst *CB) { static void lowerAwaitSuspends(Function &F, coro::Shape &Shape) { IRBuilder<> Builder(F.getContext()); for (auto *AWS : Shape.CoroAwaitSuspends) - lowerAwaitSuspend(Builder, AWS); + lowerAwaitSuspend(Builder, AWS, Shape); } static void maybeFreeRetconStorage(IRBuilder<> &Builder, @@ -1056,6 +1085,25 @@ void CoroCloner::create() { // Set up the new entry block. replaceEntryBlock(); + // Turn symmetric transfers into musttail calls. + for (CallInst *ResumeCall : Shape.SymmetricTransfers) { + ResumeCall = cast(VMap[ResumeCall]); + ResumeCall->setCallingConv(NewF->getCallingConv()); + if (TTI.supportsTailCallFor(ResumeCall)) { + // FIXME: Could we support symmetric transfer effectively without + // musttail? + ResumeCall->setTailCallKind(CallInst::TCK_MustTail); + } + + // Put a 'ret void' after the call, and split any remaining instructions to + // an unreachable block. + BasicBlock *BB = ResumeCall->getParent(); + BB->splitBasicBlock(ResumeCall->getNextNode()); + Builder.SetInsertPoint(BB->getTerminator()); + Builder.CreateRetVoid(); + BB->getTerminator()->eraseFromParent(); + } + Builder.SetInsertPoint(&NewF->getEntryBlock().front()); NewFramePtr = deriveNewFramePointer(); @@ -1186,130 +1234,6 @@ scanPHIsAndUpdateValueMap(Instruction *Prev, BasicBlock *NewBlock, } } -// Replace a sequence of branches leading to a ret, with a clone of a ret -// instruction. Suspend instruction represented by a switch, track the PHI -// values and select the correct case successor when possible. -static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) { - // There is nothing to simplify. - if (isa(InitialInst)) - return false; - - DenseMap ResolvedValues; - assert(InitialInst->getModule()); - const DataLayout &DL = InitialInst->getModule()->getDataLayout(); - - auto TryResolveConstant = [&ResolvedValues](Value *V) { - auto It = ResolvedValues.find(V); - if (It != ResolvedValues.end()) - V = It->second; - return dyn_cast(V); - }; - - Instruction *I = InitialInst; - while (true) { - if (isa(I)) { - assert(!cast(I)->getReturnValue()); - ReplaceInstWithInst(InitialInst, I->clone()); - return true; - } - - if (auto *BR = dyn_cast(I)) { - unsigned SuccIndex = 0; - if (BR->isConditional()) { - // Handle the case the condition of the conditional branch is constant. - // e.g., - // - // br i1 false, label %cleanup, label %CoroEnd - // - // It is possible during the transformation. We could continue the - // simplifying in this case. - ConstantInt *Cond = TryResolveConstant(BR->getCondition()); - if (!Cond) - return false; - - SuccIndex = Cond->isOne() ? 0 : 1; - } - - BasicBlock *Succ = BR->getSuccessor(SuccIndex); - scanPHIsAndUpdateValueMap(I, Succ, ResolvedValues); - I = Succ->getFirstNonPHIOrDbgOrLifetime(); - continue; - } - - if (auto *Cmp = dyn_cast(I)) { - // If the case number of suspended switch instruction is reduced to - // 1, then it is simplified to CmpInst in llvm::ConstantFoldTerminator. - // Try to constant fold it. - ConstantInt *Cond0 = TryResolveConstant(Cmp->getOperand(0)); - ConstantInt *Cond1 = TryResolveConstant(Cmp->getOperand(1)); - if (Cond0 && Cond1) { - ConstantInt *Result = - dyn_cast_or_null(ConstantFoldCompareInstOperands( - Cmp->getPredicate(), Cond0, Cond1, DL)); - if (Result) { - ResolvedValues[Cmp] = Result; - I = I->getNextNode(); - continue; - } - } - } - - if (auto *SI = dyn_cast(I)) { - ConstantInt *Cond = TryResolveConstant(SI->getCondition()); - if (!Cond) - return false; - - BasicBlock *Succ = SI->findCaseValue(Cond)->getCaseSuccessor(); - scanPHIsAndUpdateValueMap(I, Succ, ResolvedValues); - I = Succ->getFirstNonPHIOrDbgOrLifetime(); - continue; - } - - if (I->isDebugOrPseudoInst() || I->isLifetimeStartOrEnd() || - wouldInstructionBeTriviallyDead(I)) { - // We can skip instructions without side effects. If their values are - // needed, we'll notice later, e.g. when hitting a conditional branch. - I = I->getNextNode(); - continue; - } - - break; - } - - return false; -} - -// Check whether CI obeys the rules of musttail attribute. -static bool shouldBeMustTail(const CallInst &CI, const Function &F) { - if (CI.isInlineAsm()) - return false; - - // Match prototypes and calling conventions of resume function. - FunctionType *CalleeTy = CI.getFunctionType(); - if (!CalleeTy->getReturnType()->isVoidTy() || (CalleeTy->getNumParams() != 1)) - return false; - - Type *CalleeParmTy = CalleeTy->getParamType(0); - if (!CalleeParmTy->isPointerTy() || - (CalleeParmTy->getPointerAddressSpace() != 0)) - return false; - - if (CI.getCallingConv() != F.getCallingConv()) - return false; - - // CI should not has any ABI-impacting function attributes. - static const Attribute::AttrKind ABIAttrs[] = { - Attribute::StructRet, Attribute::ByVal, Attribute::InAlloca, - Attribute::Preallocated, Attribute::InReg, Attribute::Returned, - Attribute::SwiftSelf, Attribute::SwiftError}; - AttributeList Attrs = CI.getAttributes(); - for (auto AK : ABIAttrs) - if (Attrs.hasParamAttr(0, AK)) - return false; - - return true; -} - // Coroutine has no suspend points. Remove heap allocation for the coroutine // frame if possible. static void handleNoSuspendCoroutine(coro::Shape &Shape) { @@ -1523,24 +1447,16 @@ struct SwitchCoroutineSplitter { createResumeEntryBlock(F, Shape); auto *ResumeClone = - createClone(F, ".resume", Shape, CoroCloner::Kind::SwitchResume); + createClone(F, ".resume", Shape, CoroCloner::Kind::SwitchResume, TTI); auto *DestroyClone = - createClone(F, ".destroy", Shape, CoroCloner::Kind::SwitchUnwind); + createClone(F, ".destroy", Shape, CoroCloner::Kind::SwitchUnwind, TTI); auto *CleanupClone = - createClone(F, ".cleanup", Shape, CoroCloner::Kind::SwitchCleanup); + createClone(F, ".cleanup", Shape, CoroCloner::Kind::SwitchCleanup, TTI); postSplitCleanup(*ResumeClone); postSplitCleanup(*DestroyClone); postSplitCleanup(*CleanupClone); - // Adding musttail call to support symmetric transfer. - // Skip targets which don't support tail call. - // - // FIXME: Could we support symmetric transfer effectively without musttail - // call? - if (TTI.supportsTailCalls()) - addMustTailToCoroResumes(*ResumeClone, TTI); - // Store addresses resume/destroy/cleanup functions in the coroutine frame. updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone); @@ -1560,8 +1476,9 @@ struct SwitchCoroutineSplitter { // new entry block and replacing coro.suspend an appropriate value to force // resume or cleanup pass for every suspend point. static Function *createClone(Function &F, const Twine &Suffix, - coro::Shape &Shape, CoroCloner::Kind FKind) { - CoroCloner Cloner(F, Suffix, Shape, FKind); + coro::Shape &Shape, CoroCloner::Kind FKind, + TargetTransformInfo &TTI) { + CoroCloner Cloner(F, Suffix, Shape, FKind, TTI); Cloner.create(); return Cloner.getFunction(); } @@ -1662,34 +1579,6 @@ struct SwitchCoroutineSplitter { Shape.SwitchLowering.ResumeEntryBlock = NewEntry; } - // Add musttail to any resume instructions that is immediately followed by a - // suspend (i.e. ret). We do this even in -O0 to support guaranteed tail call - // for symmetrical coroutine control transfer (C++ Coroutines TS extension). - // This transformation is done only in the resume part of the coroutine that - // has identical signature and calling convention as the coro.resume call. - static void addMustTailToCoroResumes(Function &F, TargetTransformInfo &TTI) { - bool Changed = false; - - // Collect potential resume instructions. - SmallVector Resumes; - for (auto &I : instructions(F)) - if (auto *Call = dyn_cast(&I)) - if (shouldBeMustTail(*Call, F)) - Resumes.push_back(Call); - - // Set musttail on those that are followed by a ret instruction. - for (CallInst *Call : Resumes) - // Skip targets which don't support tail call on the specific case. - if (TTI.supportsTailCallFor(Call) && - simplifyTerminatorLeadingToRet(Call->getNextNode())) { - Call->setTailCallKind(CallInst::TCK_MustTail); - Changed = true; - } - - if (Changed) - removeUnreachableBlocks(F); - } - // Store addresses of Resume/Destroy/Cleanup functions in the coroutine frame. static void updateCoroFrame(coro::Shape &Shape, Function *ResumeFn, Function *DestroyFn, Function *CleanupFn) { @@ -1893,12 +1782,13 @@ static void splitAsyncCoroutine(Function &F, coro::Shape &Shape, auto *Suspend = Shape.CoroSuspends[Idx]; auto *Clone = Clones[Idx]; - CoroCloner(F, "resume." + Twine(Idx), Shape, Clone, Suspend).create(); + CoroCloner(F, "resume." + Twine(Idx), Shape, Clone, Suspend, TTI).create(); } } static void splitRetconCoroutine(Function &F, coro::Shape &Shape, - SmallVectorImpl &Clones) { + SmallVectorImpl &Clones, + TargetTransformInfo &TTI) { assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce); assert(Clones.empty()); @@ -2021,7 +1911,7 @@ static void splitRetconCoroutine(Function &F, coro::Shape &Shape, auto Suspend = Shape.CoroSuspends[i]; auto Clone = Clones[i]; - CoroCloner(F, "resume." + Twine(i), Shape, Clone, Suspend).create(); + CoroCloner(F, "resume." + Twine(i), Shape, Clone, Suspend, TTI).create(); } } @@ -2073,7 +1963,7 @@ splitCoroutine(Function &F, SmallVectorImpl &Clones, break; case coro::ABI::Retcon: case coro::ABI::RetconOnce: - splitRetconCoroutine(F, Shape, Clones); + splitRetconCoroutine(F, Shape, Clones, TTI); break; } } diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp index a1c78d6a44ef4..1a92bc1636257 100644 --- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp +++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp @@ -47,8 +47,8 @@ coro::LowererBase::LowererBase(Module &M) // // call ptr @llvm.coro.subfn.addr(ptr %Arg, i8 %index) -Value *coro::LowererBase::makeSubFnCall(Value *Arg, int Index, - Instruction *InsertPt) { +CallInst *coro::LowererBase::makeSubFnCall(Value *Arg, int Index, + Instruction *InsertPt) { auto *IndexVal = ConstantInt::get(Type::getInt8Ty(Context), Index); auto *Fn = Intrinsic::getDeclaration(&TheModule, Intrinsic::coro_subfn_addr); diff --git a/llvm/test/Transforms/Coroutines/coro-await-suspend-lower-invoke.ll b/llvm/test/Transforms/Coroutines/coro-await-suspend-lower-invoke.ll index fbc4a2c006f84..fd3b7bd815300 100644 --- a/llvm/test/Transforms/Coroutines/coro-await-suspend-lower-invoke.ll +++ b/llvm/test/Transforms/Coroutines/coro-await-suspend-lower-invoke.ll @@ -58,14 +58,13 @@ suspend.cond: ; CHECK-NEXT: to label %[[STEP2_CONT:[^ ]+]] unwind label %[[PAD]] step2: %save2 = call token @llvm.coro.save(ptr null) - %resume.handle = invoke ptr @llvm.coro.await.suspend.handle(ptr %awaiter, ptr %hdl, ptr @await_suspend_wrapper_handle) + invoke void @llvm.coro.await.suspend.handle(ptr %awaiter, ptr %hdl, ptr @await_suspend_wrapper_handle) to label %step2.continue unwind label %pad ; CHECK: [[STEP2_CONT]]: ; CHECK-NEXT: %[[NEXT_RESUME:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[NEXT_HDL]], i8 0) ; CHECK-NEXT: musttail call {{.*}} void %[[NEXT_RESUME]](ptr %[[NEXT_HDL]]) step2.continue: - call void @llvm.coro.resume(ptr %resume.handle) %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false) switch i8 %suspend2, label %ret [ i8 0, label %step3 @@ -112,7 +111,7 @@ declare i1 @llvm.coro.alloc(token) declare ptr @llvm.coro.begin(token, ptr) declare void @llvm.coro.await.suspend.void(ptr, ptr, ptr) declare i1 @llvm.coro.await.suspend.bool(ptr, ptr, ptr) -declare ptr @llvm.coro.await.suspend.handle(ptr, ptr, ptr) +declare void @llvm.coro.await.suspend.handle(ptr, ptr, ptr) declare i1 @llvm.coro.end(ptr, i1, token) declare ptr @__cxa_begin_catch(ptr) diff --git a/llvm/test/Transforms/Coroutines/coro-await-suspend-lower.ll b/llvm/test/Transforms/Coroutines/coro-await-suspend-lower.ll index 0f574c4acc26e..8d019e6954628 100644 --- a/llvm/test/Transforms/Coroutines/coro-await-suspend-lower.ll +++ b/llvm/test/Transforms/Coroutines/coro-await-suspend-lower.ll @@ -49,8 +49,7 @@ suspend.cond: ; CHECK-NEXT: musttail call {{.*}} void %[[CONT]](ptr %[[NEXT_HDL]]) step2: %save2 = call token @llvm.coro.save(ptr null) - %resume.handle = call ptr @llvm.coro.await.suspend.handle(ptr %awaiter, ptr %hdl, ptr @await_suspend_wrapper_handle) - call void @llvm.coro.resume(ptr %resume.handle) + call void @llvm.coro.await.suspend.handle(ptr %awaiter, ptr %hdl, ptr @await_suspend_wrapper_handle) %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false) switch i8 %suspend2, label %ret [ i8 0, label %step3 @@ -89,7 +88,7 @@ declare i1 @llvm.coro.alloc(token) declare ptr @llvm.coro.begin(token, ptr) declare void @llvm.coro.await.suspend.void(ptr, ptr, ptr) declare i1 @llvm.coro.await.suspend.bool(ptr, ptr, ptr) -declare ptr @llvm.coro.await.suspend.handle(ptr, ptr, ptr) +declare void @llvm.coro.await.suspend.handle(ptr, ptr, ptr) declare i1 @llvm.coro.end(ptr, i1, token) declare noalias ptr @malloc(i32) diff --git a/llvm/test/Transforms/Coroutines/coro-preserve-final.ll b/llvm/test/Transforms/Coroutines/coro-preserve-final.ll deleted file mode 100644 index 16eeb84e7915a..0000000000000 --- a/llvm/test/Transforms/Coroutines/coro-preserve-final.ll +++ /dev/null @@ -1,131 +0,0 @@ -; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s - -%"struct.std::__n4861::noop_coroutine_promise" = type { i8 } -%struct.Promise = type { %"struct.std::__n4861::coroutine_handle" } -%"struct.std::__n4861::coroutine_handle" = type { ptr } - -define dso_local ptr @_Z5Outerv() #1 { -entry: - %__promise = alloca %struct.Promise, align 8 - %0 = call token @llvm.coro.id(i32 16, ptr nonnull %__promise, ptr nonnull @_Z5Outerv, ptr null) - %1 = call i1 @llvm.coro.alloc(token %0) - br i1 %1, label %coro.alloc, label %init.suspend - -coro.alloc: ; preds = %entry - %2 = tail call i64 @llvm.coro.size.i64() - %call = call noalias noundef nonnull ptr @_Znwm(i64 noundef %2) #12 - br label %init.suspend - -init.suspend: ; preds = %entry, %coro.alloc - %3 = phi ptr [ null, %entry ], [ %call, %coro.alloc ] - %4 = call ptr @llvm.coro.begin(token %0, ptr %3) #13 - call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %__promise) #3 - store ptr null, ptr %__promise, align 8 - %5 = call token @llvm.coro.save(ptr null) - %6 = call i8 @llvm.coro.suspend(token %5, i1 false) - switch i8 %6, label %coro.ret [ - i8 0, label %await.suspend - i8 1, label %cleanup62 - ] - -await.suspend: ; preds = %init.suspend - %7 = call token @llvm.coro.save(ptr null) - %8 = call ptr @llvm.coro.subfn.addr(ptr %4, i8 0) - call fastcc void %8(ptr %4) #3 - %9 = call i8 @llvm.coro.suspend(token %7, i1 false) - switch i8 %9, label %coro.ret [ - i8 0, label %await2.suspend - i8 1, label %cleanup62 - ] - -await2.suspend: ; preds = %await.suspend - %call27 = call ptr @_Z5Innerv() #3 - %10 = call token @llvm.coro.save(ptr null) - %11 = getelementptr inbounds i8, ptr %__promise, i64 -16 - store ptr %11, ptr %call27, align 8 - %12 = getelementptr inbounds i8, ptr %call27, i64 -16 - %13 = call ptr @llvm.coro.subfn.addr(ptr nonnull %12, i8 0) - call fastcc void %13(ptr nonnull %12) #3 - %14 = call i8 @llvm.coro.suspend(token %10, i1 false) - switch i8 %14, label %coro.ret [ - i8 0, label %final.suspend - i8 1, label %cleanup62 - ] - -final.suspend: ; preds = %await2.suspend - %15 = call ptr @llvm.coro.subfn.addr(ptr nonnull %12, i8 1) - call fastcc void %15(ptr nonnull %12) #3 - %16 = call token @llvm.coro.save(ptr null) - %retval.sroa.0.0.copyload.i = load ptr, ptr %__promise, align 8 - %17 = call ptr @llvm.coro.subfn.addr(ptr %retval.sroa.0.0.copyload.i, i8 0) - call fastcc void %17(ptr %retval.sroa.0.0.copyload.i) #3 - %18 = call i8 @llvm.coro.suspend(token %16, i1 true) #13 - switch i8 %18, label %coro.ret [ - i8 0, label %final.ready - i8 1, label %cleanup62 - ] - -final.ready: ; preds = %final.suspend - call void @_Z5_exiti(i32 noundef 1) #14 - unreachable - -cleanup62: ; preds = %await2.suspend, %await.suspend, %init.suspend, %final.suspend - call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %__promise) #3 - %19 = call ptr @llvm.coro.free(token %0, ptr %4) - %.not = icmp eq ptr %19, null - br i1 %.not, label %coro.ret, label %coro.free - -coro.free: ; preds = %cleanup62 - call void @_ZdlPv(ptr noundef nonnull %19) #3 - br label %coro.ret - -coro.ret: ; preds = %coro.free, %cleanup62, %final.suspend, %await2.suspend, %await.suspend, %init.suspend - %20 = call i1 @llvm.coro.end(ptr null, i1 false, token none) #13 - ret ptr %__promise -} - -declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #2 -declare i1 @llvm.coro.alloc(token) #3 -declare dso_local noundef nonnull ptr @_Znwm(i64 noundef) local_unnamed_addr #4 -declare i64 @llvm.coro.size.i64() #5 -declare ptr @llvm.coro.begin(token, ptr writeonly) #3 -declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #6 -declare token @llvm.coro.save(ptr) #7 -declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #6 -declare i8 @llvm.coro.suspend(token, i1) #3 -declare dso_local ptr @_Z5Innerv() local_unnamed_addr #8 -declare dso_local void @_ZdlPv(ptr noundef) local_unnamed_addr #9 -declare ptr @llvm.coro.free(token, ptr nocapture readonly) #2 -declare i1 @llvm.coro.end(ptr, i1, token) #3 -declare dso_local void @_Z5_exiti(i32 noundef) local_unnamed_addr #10 -declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #11 - -attributes #0 = { mustprogress nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #1 = { nounwind presplitcoroutine uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #2 = { argmemonly nofree nounwind readonly } -attributes #3 = { nounwind } -attributes #4 = { nobuiltin allocsize(0) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #5 = { nofree nosync nounwind readnone } -attributes #6 = { argmemonly mustprogress nocallback nofree nosync nounwind willreturn } -attributes #7 = { nomerge nounwind } -attributes #8 = { "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #9 = { nobuiltin nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #10 = { noreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #11 = { argmemonly nounwind readonly } -attributes #12 = { nounwind allocsize(0) } -attributes #13 = { noduplicate } -attributes #14 = { noreturn nounwind } - -; CHECK: define{{.*}}@_Z5Outerv.resume( -; CHECK: entry.resume: -; CHECK: switch i2 %index -; CHECK-NEXT: i2 0, label %await2.suspend -; CHECK-NEXT: i2 1, label %final.suspend -; -; CHECK: await2.suspend: -; CHECK: musttail call -; CHECK-NEXT: ret void -; -; CHECK: final.suspend: -; CHECK: musttail call -; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail-chain-pgo-counter-promo.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail-chain-pgo-counter-promo.ll index ddd293eed2409..e2ed205f2c2f4 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-musttail-chain-pgo-counter-promo.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-musttail-chain-pgo-counter-promo.ll @@ -90,10 +90,7 @@ define ptr @f(i32 %0) presplitcoroutine align 32 { %25 = getelementptr inbounds { ptr, ptr }, ptr %5, i64 0, i32 1 store ptr %24, ptr %25, align 8 %26 = call token @llvm.coro.save(ptr null) - %27 = call ptr @await_transform_await_suspend(ptr noundef nonnull align 8 dereferenceable(16) %5, ptr %14) - %28 = call ptr @llvm.coro.subfn.addr(ptr %27, i8 0) - %29 = ptrtoint ptr %28 to i64 - call fastcc void %28(ptr %27) #9 + call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_transform_await_suspend) %30 = call i8 @llvm.coro.suspend(token %26, i1 false) switch i8 %30, label %60 [ i8 0, label %31 @@ -123,9 +120,7 @@ define ptr @f(i32 %0) presplitcoroutine align 32 { br i1 %42, label %43, label %46 43: ; preds = %36 - %44 = call ptr @llvm.coro.subfn.addr(ptr nonnull %14, i8 1) - %45 = ptrtoint ptr %44 to i64 - call fastcc void %44(ptr nonnull %14) #9 + call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_transform_await_suspend) br label %47 46: ; preds = %36 diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail.ll index 825e44471db27..70f29f4a9a4dc 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-musttail.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-musttail.ll @@ -1,7 +1,6 @@ -; Tests that coro-split will convert coro.resume followed by a suspend to a -; musttail call. -; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,NOPGO %s -; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,PGO %s +; Tests that coro-split will convert coro.await.suspend.handle to a musttail call. +; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK %s +; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK %s define void @f() #0 { entry: @@ -20,8 +19,7 @@ entry: ] await.ready: %save2 = call token @llvm.coro.save(ptr null) - %addr2 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0) - call fastcc void %addr2(ptr null) + call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_suspend_function) %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false) switch i8 %suspend2, label %exit [ @@ -40,10 +38,8 @@ exit: ; Verify that in the resume part resume call is marked with musttail. ; CHECK-LABEL: @f.resume( -; CHECK: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0) -; NOPGO-NEXT: musttail call fastcc void %[[addr2]](ptr null) -; PGO: call void @llvm.instrprof -; PGO-NEXT: musttail call fastcc void %[[addr2]](ptr null) +; CHECK: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr +; CHECK-NEXT: musttail call fastcc void %[[addr2]] ; CHECK-NEXT: ret void declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1 @@ -57,6 +53,7 @@ declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1 declare i1 @llvm.coro.end(ptr, i1, token) #2 declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1 declare ptr @malloc(i64) +declare ptr @await_suspend_function(ptr %awaiter, ptr %hdl) attributes #0 = { presplitcoroutine } attributes #1 = { argmemonly nounwind readonly } diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll index d0d11fc4495e4..3edb8728d8550 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll @@ -1,7 +1,6 @@ -; Tests that coro-split will convert coro.resume followed by a suspend to a -; musttail call. -; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,NOPGO %s -; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,PGO %s +; Tests that coro-split will convert coro.await.suspend.handle to a musttail call. +; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK %s +; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK %s define void @f() #0 { entry: @@ -28,17 +27,14 @@ await.suspend: ] await.resume1: %hdl = call ptr @g() - %addr2 = call ptr @llvm.coro.subfn.addr(ptr %hdl, i8 0) - call fastcc void %addr2(ptr %hdl) + call void @llvm.coro.await.suspend.handle(ptr null, ptr %hdl, ptr @await_suspend_function) br label %final.suspend await.resume2: %hdl2 = call ptr @h() - %addr3 = call ptr @llvm.coro.subfn.addr(ptr %hdl2, i8 0) - call fastcc void %addr3(ptr %hdl2) + call void @llvm.coro.await.suspend.handle(ptr null, ptr %hdl2, ptr @await_suspend_function) br label %final.suspend await.resume3: - %addr4 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0) - call fastcc void %addr4(ptr null) + call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_suspend_function) br label %final.suspend final.suspend: %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false) @@ -63,18 +59,18 @@ unreach: ; Verify that in the resume part resume call is marked with musttail. ; CHECK-LABEL: @f.resume( ; CHECK: %[[hdl:.+]] = call ptr @g() -; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl]], i8 0) -; NOPGO-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]]) -; PGO: musttail call fastcc void %[[addr2]](ptr %[[hdl]]) +; CHECK-NEXT: call ptr @await_suspend_function +; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr +; CHECK-NEXT: musttail call fastcc void %[[addr2]] ; CHECK-NEXT: ret void ; CHECK: %[[hdl2:.+]] = call ptr @h() -; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl2]], i8 0) -; NOPGO-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]]) -; PGO: musttail call fastcc void %[[addr3]](ptr %[[hdl2]]) +; CHECK-NEXT: call ptr @await_suspend_function +; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr +; CHECK-NEXT: musttail call fastcc void %[[addr3]] ; CHECK-NEXT: ret void -; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0) -; NOPGO-NEXT: musttail call fastcc void %[[addr4]](ptr null) -; PGO: musttail call fastcc void %[[addr4]](ptr null) +; CHECK: call ptr @await_suspend_function +; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr +; CHECK-NEXT: musttail call fastcc void %[[addr4]] ; CHECK-NEXT: ret void @@ -93,6 +89,7 @@ declare ptr @malloc(i64) declare i8 @switch_result() declare ptr @g() declare ptr @h() +declare ptr @await_suspend_function(ptr %awaiter, ptr %hdl) attributes #0 = { presplitcoroutine } attributes #1 = { argmemonly nounwind readonly } diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll index 3e91b79c10f73..a55b3d16e2ded 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll @@ -1,4 +1,4 @@ -; Tests that we would convert coro.resume to a musttail call if the target is +; Tests that coro-split will convert coro.await.suspend.handle to a musttail call if the target is ; Wasm64 or Wasm32 with tail-call support. ; REQUIRES: webassembly-registered-target @@ -25,8 +25,7 @@ entry: ] await.ready: %save2 = call token @llvm.coro.save(ptr null) - %addr2 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0) - call fastcc void %addr2(ptr null) + call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_suspend_function) %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false) switch i8 %suspend2, label %exit [ @@ -51,6 +50,7 @@ declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1 declare i1 @llvm.coro.end(ptr, i1, token) #2 declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1 declare ptr @malloc(i64) +declare ptr @await_suspend_function(ptr %awaiter, ptr %hdl) attributes #0 = { presplitcoroutine "target-features"="+tail-call" } attributes #1 = { argmemonly nounwind readonly } diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll index 2f27f79480ab1..ca1611e19b9f9 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll @@ -1,5 +1,4 @@ -; Tests that coro-split will convert coro.resume followed by a suspend to a -; musttail call. +; Tests that coro-split will convert coro.await.suspend.handle to a musttail call. ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s ; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s @@ -8,11 +7,6 @@ entry: ret void; } -define void @fakeresume2(ptr align 8) { -entry: - ret void; -} - define void @g() #0 { entry: %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) @@ -29,7 +23,7 @@ entry: ] await.ready: %save2 = call token @llvm.coro.save(ptr null) - call fastcc void @fakeresume2(ptr align 8 null) + call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_suspend_function) %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false) switch i8 %suspend2, label %exit [ @@ -47,7 +41,9 @@ exit: ; Verify that in the resume part resume call is marked with musttail. ; CHECK-LABEL: @g.resume( -; CHECK: musttail call fastcc void @fakeresume2(ptr align 8 null) +; CHECK: call ptr @await_suspend_function +; CHECK-NEXT: call ptr @llvm.coro.subfn.addr +; CHECK-NEXT: musttail call fastcc void ; CHECK-NEXT: ret void declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1 @@ -61,6 +57,7 @@ declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1 declare i1 @llvm.coro.end(ptr, i1, token) #2 declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1 declare ptr @malloc(i64) +declare ptr @await_suspend_function(ptr %awaiter, ptr %hdl) attributes #0 = { presplitcoroutine } attributes #1 = { argmemonly nounwind readonly } diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll index 4778e3dcaf995..84cdac17beebb 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll @@ -1,7 +1,6 @@ -; Tests that coro-split will convert coro.resume followed by a suspend to a -; musttail call. -; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,NOPGO %s -; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,PGO %s +; Tests that coro-split will convert coro.await.suspend.handle to a musttail call. +; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK %s +; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK %s define void @f() #0 { entry: @@ -26,17 +25,14 @@ await.suspend: ] await.resume1: %hdl = call ptr @g() - %addr2 = call ptr @llvm.coro.subfn.addr(ptr %hdl, i8 0) - call fastcc void %addr2(ptr %hdl) + call void @llvm.coro.await.suspend.handle(ptr null, ptr %hdl, ptr @await_suspend_function) br label %final.suspend await.resume2: %hdl2 = call ptr @h() - %addr3 = call ptr @llvm.coro.subfn.addr(ptr %hdl2, i8 0) - call fastcc void %addr3(ptr %hdl2) + call void @llvm.coro.await.suspend.handle(ptr null, ptr %hdl2, ptr @await_suspend_function) br label %final.suspend await.resume3: - %addr4 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0) - call fastcc void %addr4(ptr null) + call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_suspend_function) br label %final.suspend final.suspend: %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false) @@ -59,22 +55,21 @@ unreach: ; Verify that in the resume part resume call is marked with musttail. ; CHECK-LABEL: @f.resume( ; CHECK: %[[hdl:.+]] = call ptr @g() -; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl]], i8 0) -; NOPGO-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]]) -; PGO: musttail call fastcc void %[[addr2]](ptr %[[hdl]]) +; CHECK-NEXT: call ptr @await_suspend_function +; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr +; CHECK-NEXT: musttail call fastcc void %[[addr2]] ; CHECK-NEXT: ret void ; CHECK: %[[hdl2:.+]] = call ptr @h() -; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl2]], i8 0) -; NOPGO-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]]) -; PGO: musttail call fastcc void %[[addr3]](ptr %[[hdl2]]) +; CHECK-NEXT: call ptr @await_suspend_function +; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr +; CHECK-NEXT: musttail call fastcc void %[[addr3]] ; CHECK-NEXT: ret void -; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0) -; NOPGO-NEXT: musttail call fastcc void %[[addr4]](ptr null) -; PGO: musttail call fastcc void %[[addr4]](ptr null) +; CHECK: call ptr @await_suspend_function +; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr +; CHECK-NEXT: musttail call fastcc void %[[addr4]] ; CHECK-NEXT: ret void - declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1 declare i1 @llvm.coro.alloc(token) #2 declare i64 @llvm.coro.size.i64() #3 @@ -89,6 +84,7 @@ declare ptr @malloc(i64) declare i8 @switch_result() declare ptr @g() declare ptr @h() +declare ptr @await_suspend_function(ptr %awaiter, ptr %hdl) attributes #0 = { presplitcoroutine } attributes #1 = { argmemonly nounwind readonly } diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll index 00ee422ce5863..b647bd2e4a207 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll @@ -1,5 +1,4 @@ -; Tests that coro-split will convert a call before coro.suspend to a musttail call -; while the user of the coro.suspend is a icmpinst. +; Tests that coro-split will convert coro.await.suspend.handle to a musttail call. ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s ; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s @@ -24,7 +23,7 @@ entry: await.ready: %save2 = call token @llvm.coro.save(ptr null) - call fastcc void @fakeresume1(ptr align 8 null) + call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_suspend_function) %suspend = call i8 @llvm.coro.suspend(token %save2, i1 true) %switch = icmp ult i8 %suspend, 2 br i1 %switch, label %cleanup, label %coro.end @@ -44,7 +43,7 @@ coro.end: } ; CHECK-LABEL: @f.resume( -; CHECK: musttail call fastcc void @fakeresume1( +; CHECK: musttail call fastcc void ; CHECK-NEXT: ret void declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1 @@ -59,6 +58,7 @@ declare i1 @llvm.coro.end(ptr, i1, token) #2 declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1 declare ptr @malloc(i64) declare void @delete(ptr nonnull) #2 +declare ptr @await_suspend_function(ptr %awaiter, ptr %hdl) attributes #0 = { presplitcoroutine } attributes #1 = { argmemonly nounwind readonly } diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll index 9afc79abbe88c..7c1a13fd83cec 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll @@ -1,5 +1,4 @@ -; Tests that sinked lifetime markers wouldn't provent optimization -; to convert a resuming call to a musttail call. +; Tests that coro-split will convert coro.await.suspend.handle to a musttail call. ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s ; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s @@ -22,7 +21,7 @@ entry: ] await.suspend: %save2 = call token @llvm.coro.save(ptr null) - call fastcc void @fakeresume1(ptr align 8 null) + call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_suspend_function) %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false) switch i8 %suspend2, label %exit [ i8 0, label %await.ready @@ -39,7 +38,7 @@ exit: ; Verify that in the resume part resume call is marked with musttail. ; CHECK-LABEL: @g.resume( -; CHECK: musttail call fastcc void @fakeresume1(ptr align 8 null) +; CHECK: musttail call fastcc void ; CHECK-NEXT: ret void declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1 @@ -56,6 +55,7 @@ declare ptr @malloc(i64) declare void @consume(ptr) declare void @llvm.lifetime.start.p0(i64, ptr nocapture) declare void @llvm.lifetime.end.p0(i64, ptr nocapture) +declare ptr @await_suspend_function(ptr %awaiter, ptr %hdl) attributes #0 = { presplitcoroutine } attributes #1 = { argmemonly nounwind readonly } diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll index d9dba92ec4eb7..e05169a729168 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll @@ -1,5 +1,5 @@ ; Tests that sinked lifetime markers wouldn't provent optimization -; to convert a resuming call to a musttail call. +; to convert a coro.await.suspend.handle call to a musttail call. ; The difference between this and coro-split-musttail5.ll is that there is ; an extra bitcast instruction in the path, which makes it harder to ; optimize. @@ -25,7 +25,7 @@ entry: ] await.suspend: %save2 = call token @llvm.coro.save(ptr null) - call fastcc void @fakeresume1(ptr align 8 null) + call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_suspend_function) %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false) switch i8 %suspend2, label %exit [ i8 0, label %await.ready @@ -42,7 +42,7 @@ exit: ; Verify that in the resume part resume call is marked with musttail. ; CHECK-LABEL: @g.resume( -; CHECK: musttail call fastcc void @fakeresume1(ptr align 8 null) +; CHECK: musttail call fastcc void ; CHECK-NEXT: ret void ; It has a cleanup bb. @@ -63,7 +63,7 @@ entry: ] await.suspend: %save2 = call token @llvm.coro.save(ptr null) - call fastcc void @fakeresume1(ptr align 8 null) + call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_suspend_function) %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false) switch i8 %suspend2, label %exit [ i8 0, label %await.ready @@ -90,7 +90,7 @@ exit: ; Verify that in the resume part resume call is marked with musttail. ; CHECK-LABEL: @f.resume( -; CHECK: musttail call fastcc void @fakeresume1(ptr align 8 null) +; CHECK: musttail call fastcc void ; CHECK-NEXT: ret void declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1 @@ -108,6 +108,7 @@ declare void @delete(ptr nonnull) #2 declare void @consume(ptr) declare void @llvm.lifetime.start.p0(i64, ptr nocapture) declare void @llvm.lifetime.end.p0(i64, ptr nocapture) +declare ptr @await_suspend_function(ptr %awaiter, ptr %hdl) attributes #0 = { presplitcoroutine } attributes #1 = { argmemonly nounwind readonly } diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll index d0d5005587bda..8ceb0dda94f6a 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll @@ -1,13 +1,11 @@ ; Tests that sinked lifetime markers wouldn't provent optimization -; to convert a resuming call to a musttail call. +; to convert a coro.await.suspend.handle call to a musttail call. ; The difference between this and coro-split-musttail5.ll and coro-split-musttail6.ll ; is that this contains dead instruction generated during the transformation, ; which makes the optimization harder. ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s ; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s -declare void @fakeresume1(ptr align 8) - define i64 @g() #0 { entry: %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) @@ -25,7 +23,7 @@ entry: ] await.suspend: %save2 = call token @llvm.coro.save(ptr null) - call fastcc void @fakeresume1(ptr align 8 null) + call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_suspend_function) %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false) ; These (non-trivially) dead instructions are in the way. @@ -48,7 +46,9 @@ exit: ; Verify that in the resume part resume call is marked with musttail. ; CHECK-LABEL: @g.resume( -; CHECK: musttail call fastcc void @fakeresume1(ptr align 8 null) +; CHECK: %[[FRAME:[0-9]+]] = call ptr @await_suspend_function(ptr null, ptr null) +; CHECK: %[[RESUMEADDR:[0-9]+]] = call ptr @llvm.coro.subfn.addr(ptr %[[FRAME]], i8 0) +; CHECK: musttail call fastcc void %[[RESUMEADDR]](ptr %[[FRAME]]) ; CHECK-NEXT: ret void ; It has a cleanup bb. @@ -69,7 +69,7 @@ entry: ] await.suspend: %save2 = call token @llvm.coro.save(ptr null) - call fastcc void @fakeresume1(ptr align 8 null) + call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_suspend_function) %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false) switch i8 %suspend2, label %exit [ i8 0, label %await.ready @@ -96,7 +96,9 @@ exit: ; Verify that in the resume part resume call is marked with musttail. ; CHECK-LABEL: @f.resume( -; CHECK: musttail call fastcc void @fakeresume1(ptr align 8 null) +; CHECK: %[[FRAME:[0-9]+]] = call ptr @await_suspend_function(ptr null, ptr null) +; CHECK: %[[RESUMEADDR:[0-9]+]] = call ptr @llvm.coro.subfn.addr(ptr %[[FRAME]], i8 0) +; CHECK: musttail call fastcc void %[[RESUMEADDR]](ptr %[[FRAME]]) ; CHECK-NEXT: ret void declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1 @@ -114,6 +116,7 @@ declare void @delete(ptr nonnull) #2 declare void @consume(ptr) declare void @llvm.lifetime.start.p0(i64, ptr nocapture) declare void @llvm.lifetime.end.p0(i64, ptr nocapture) +declare ptr @await_suspend_function(ptr %awaiter, ptr %hdl) attributes #0 = { presplitcoroutine } attributes #1 = { argmemonly nounwind readonly }