Skip to content

Commit

Permalink
[C++20] [Coroutines] Implement return value optimization for get_retu…
Browse files Browse the repository at this point in the history
…rn_object

This patch tries to implement RVO for coroutine's return object got from
get_return_object.
From [dcl.fct.def.coroutine]/p7 we could know that the return value of
get_return_object is either a reference or a prvalue. So it makes sense
to do copy elision for the return value. The return object should be
constructed directly into the storage where they would otherwise be
copied/moved to.

Test Plan: folly, check-all

Reviewed By: junparser

Differential revision: https://reviews.llvm.org/D117087
  • Loading branch information
ChuanqiXu9 committed Feb 16, 2022
1 parent 2aed07e commit d30ca5e
Show file tree
Hide file tree
Showing 16 changed files with 81 additions and 225 deletions.
8 changes: 5 additions & 3 deletions clang/include/clang/AST/StmtCXX.h
Expand Up @@ -327,7 +327,6 @@ class CoroutineBodyStmt final
Allocate, ///< Coroutine frame memory allocation.
Deallocate, ///< Coroutine frame memory deallocation.
ReturnValue, ///< Return value for thunk function: p.get_return_object().
ResultDecl, ///< Declaration holding the result of get_return_object.
ReturnStmt, ///< Return statement for the thunk function.
ReturnStmtOnAllocFailure, ///< Return statement if allocation failed.
FirstParamMove ///< First offset for move construction of parameter copies.
Expand All @@ -354,7 +353,6 @@ class CoroutineBodyStmt final
Expr *Allocate = nullptr;
Expr *Deallocate = nullptr;
Expr *ReturnValue = nullptr;
Stmt *ResultDecl = nullptr;
Stmt *ReturnStmt = nullptr;
Stmt *ReturnStmtOnAllocFailure = nullptr;
ArrayRef<Stmt *> ParamMoves;
Expand Down Expand Up @@ -409,7 +407,11 @@ class CoroutineBodyStmt final
Expr *getReturnValueInit() const {
return cast<Expr>(getStoredStmts()[SubStmt::ReturnValue]);
}
Stmt *getResultDecl() const { return getStoredStmts()[SubStmt::ResultDecl]; }
Expr *getReturnValue() const {
assert(getReturnStmt());
auto *RS = cast<clang::ReturnStmt>(getReturnStmt());
return RS->getRetValue();
}
Stmt *getReturnStmt() const { return getStoredStmts()[SubStmt::ReturnStmt]; }
Stmt *getReturnStmtOnAllocFailure() const {
return getStoredStmts()[SubStmt::ReturnStmtOnAllocFailure];
Expand Down
1 change: 0 additions & 1 deletion clang/lib/AST/StmtCXX.cpp
Expand Up @@ -118,7 +118,6 @@ CoroutineBodyStmt::CoroutineBodyStmt(CoroutineBodyStmt::CtorArgs const &Args)
SubStmts[CoroutineBodyStmt::Allocate] = Args.Allocate;
SubStmts[CoroutineBodyStmt::Deallocate] = Args.Deallocate;
SubStmts[CoroutineBodyStmt::ReturnValue] = Args.ReturnValue;
SubStmts[CoroutineBodyStmt::ResultDecl] = Args.ResultDecl;
SubStmts[CoroutineBodyStmt::ReturnStmt] = Args.ReturnStmt;
SubStmts[CoroutineBodyStmt::ReturnStmtOnAllocFailure] =
Args.ReturnStmtOnAllocFailure;
Expand Down
98 changes: 22 additions & 76 deletions clang/lib/CodeGen/CGCoroutine.cpp
Expand Up @@ -465,72 +465,6 @@ struct CallCoroDelete final : public EHScopeStack::Cleanup {
};
}

namespace {
struct GetReturnObjectManager {
CodeGenFunction &CGF;
CGBuilderTy &Builder;
const CoroutineBodyStmt &S;

Address GroActiveFlag;
CodeGenFunction::AutoVarEmission GroEmission;

GetReturnObjectManager(CodeGenFunction &CGF, const CoroutineBodyStmt &S)
: CGF(CGF), Builder(CGF.Builder), S(S), GroActiveFlag(Address::invalid()),
GroEmission(CodeGenFunction::AutoVarEmission::invalid()) {}

// The gro variable has to outlive coroutine frame and coroutine promise, but,
// it can only be initialized after coroutine promise was created, thus, we
// split its emission in two parts. EmitGroAlloca emits an alloca and sets up
// cleanups. Later when coroutine promise is available we initialize the gro
// and sets the flag that the cleanup is now active.

void EmitGroAlloca() {
auto *GroDeclStmt = dyn_cast<DeclStmt>(S.getResultDecl());
if (!GroDeclStmt) {
// If get_return_object returns void, no need to do an alloca.
return;
}

auto *GroVarDecl = cast<VarDecl>(GroDeclStmt->getSingleDecl());

// Set GRO flag that it is not initialized yet
GroActiveFlag =
CGF.CreateTempAlloca(Builder.getInt1Ty(), CharUnits::One(), "gro.active");
Builder.CreateStore(Builder.getFalse(), GroActiveFlag);

GroEmission = CGF.EmitAutoVarAlloca(*GroVarDecl);

// Remember the top of EHStack before emitting the cleanup.
auto old_top = CGF.EHStack.stable_begin();
CGF.EmitAutoVarCleanups(GroEmission);
auto top = CGF.EHStack.stable_begin();

// Make the cleanup conditional on gro.active
for (auto b = CGF.EHStack.find(top), e = CGF.EHStack.find(old_top);
b != e; b++) {
if (auto *Cleanup = dyn_cast<EHCleanupScope>(&*b)) {
assert(!Cleanup->hasActiveFlag() && "cleanup already has active flag?");
Cleanup->setActiveFlag(GroActiveFlag);
Cleanup->setTestFlagInEHCleanup();
Cleanup->setTestFlagInNormalCleanup();
}
}
}

void EmitGroInit() {
if (!GroActiveFlag.isValid()) {
// No Gro variable was allocated. Simply emit the call to
// get_return_object.
CGF.EmitStmt(S.getResultDecl());
return;
}

CGF.EmitAutoVarInit(GroEmission);
Builder.CreateStore(Builder.getTrue(), GroActiveFlag);
}
};
}

static void emitBodyAndFallthrough(CodeGenFunction &CGF,
const CoroutineBodyStmt &S, Stmt *Body) {
CGF.EmitStmt(Body);
Expand Down Expand Up @@ -597,13 +531,6 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) {
CGM.getIntrinsic(llvm::Intrinsic::coro_begin), {CoroId, Phi});
CurCoro.Data->CoroBegin = CoroBegin;

// We need to emit `get_­return_­object` first. According to:
// [dcl.fct.def.coroutine]p7
// The call to get_­return_­object is sequenced before the call to
// initial_­suspend and is invoked at most once.
GetReturnObjectManager GroManager(*this, S);
GroManager.EmitGroAlloca();

CurCoro.Data->CleanupJD = getJumpDestInCurrentScope(RetBB);
{
CGDebugInfo *DI = getDebugInfo();
Expand Down Expand Up @@ -641,8 +568,23 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) {
// promise local variable was not emitted yet.
CoroId->setArgOperand(1, PromiseAddrVoidPtr);

// Now we have the promise, initialize the GRO
GroManager.EmitGroInit();
// ReturnValue should be valid as long as the coroutine's return type
// is not void. The assertion could help us to reduce the check later.
assert(ReturnValue.isValid() == (bool)S.getReturnStmt());
// Now we have the promise, initialize the GRO.
// We need to emit `get_return_object` first. According to:
// [dcl.fct.def.coroutine]p7
// The call to get_return_­object is sequenced before the call to
// initial_suspend and is invoked at most once.
//
// So we couldn't emit return value when we emit return statment,
// otherwise the call to get_return_object wouldn't be in front
// of initial_suspend.
if (ReturnValue.isValid()) {
EmitAnyExprToMem(S.getReturnValue(), ReturnValue,
S.getReturnValue()->getType().getQualifiers(),
/*IsInit*/ true);
}

EHStack.pushCleanup<CallCoroEnd>(EHCleanup);

Expand Down Expand Up @@ -705,8 +647,12 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) {
llvm::Function *CoroEnd = CGM.getIntrinsic(llvm::Intrinsic::coro_end);
Builder.CreateCall(CoroEnd, {NullPtr, Builder.getFalse()});

if (Stmt *Ret = S.getReturnStmt())
if (Stmt *Ret = S.getReturnStmt()) {
// Since we already emitted the return value above, so we shouldn't
// emit it again here.
cast<ReturnStmt>(Ret)->setRetValue(nullptr);
EmitStmt(Ret);
}

// LLVM require the frontend to add the function attribute. See
// Coroutines.rst.
Expand Down
43 changes: 1 addition & 42 deletions clang/lib/Sema/SemaCoroutine.cpp
Expand Up @@ -1577,7 +1577,6 @@ bool CoroutineStmtBuilder::makeGroDeclAndReturnStmt() {
if (Res.isInvalid())
return false;

this->ResultDecl = Res.get();
return true;
}

Expand All @@ -1590,51 +1589,11 @@ bool CoroutineStmtBuilder::makeGroDeclAndReturnStmt() {
return false;
}

auto *GroDecl = VarDecl::Create(
S.Context, &FD, FD.getLocation(), FD.getLocation(),
&S.PP.getIdentifierTable().get("__coro_gro"), GroType,
S.Context.getTrivialTypeSourceInfo(GroType, Loc), SC_None);
GroDecl->setImplicit();

S.CheckVariableDeclarationType(GroDecl);
if (GroDecl->isInvalidDecl())
return false;

InitializedEntity Entity = InitializedEntity::InitializeVariable(GroDecl);
ExprResult Res =
S.PerformCopyInitialization(Entity, SourceLocation(), ReturnValue);
if (Res.isInvalid())
return false;

Res = S.ActOnFinishFullExpr(Res.get(), /*DiscardedValue*/ false);
if (Res.isInvalid())
return false;

S.AddInitializerToDecl(GroDecl, Res.get(),
/*DirectInit=*/false);

S.FinalizeDeclaration(GroDecl);

// Form a declaration statement for the return declaration, so that AST
// visitors can more easily find it.
StmtResult GroDeclStmt =
S.ActOnDeclStmt(S.ConvertDeclToDeclGroup(GroDecl), Loc, Loc);
if (GroDeclStmt.isInvalid())
return false;

this->ResultDecl = GroDeclStmt.get();

ExprResult declRef = S.BuildDeclRefExpr(GroDecl, GroType, VK_LValue, Loc);
if (declRef.isInvalid())
return false;

StmtResult ReturnStmt = S.BuildReturnStmt(Loc, declRef.get());
StmtResult ReturnStmt = S.BuildReturnStmt(Loc, ReturnValue);
if (ReturnStmt.isInvalid()) {
noteMemberDeclaredHere(S, ReturnValue, Fn);
return false;
}
if (cast<clang::ReturnStmt>(ReturnStmt.get())->getNRVOCandidate() == GroDecl)
GroDecl->setNRVOVariable(true);

this->ReturnStmt = ReturnStmt.get();
return true;
Expand Down
6 changes: 0 additions & 6 deletions clang/lib/Sema/TreeTransform.h
Expand Up @@ -7903,12 +7903,6 @@ TreeTransform<Derived>::TransformCoroutineBodyStmt(CoroutineBodyStmt *S) {
return StmtError();
Builder.Deallocate = DeallocRes.get();

assert(S->getResultDecl() && "ResultDecl must already be built");
StmtResult ResultDecl = getDerived().TransformStmt(S->getResultDecl());
if (ResultDecl.isInvalid())
return StmtError();
Builder.ResultDecl = ResultDecl.get();

if (auto *ReturnStmt = S->getReturnStmt()) {
StmtResult Res = getDerived().TransformStmt(ReturnStmt);
if (Res.isInvalid())
Expand Down
8 changes: 0 additions & 8 deletions clang/test/CodeGenCoroutines/coro-alloc-exp-namespace.cpp
Expand Up @@ -226,7 +226,6 @@ struct std::experimental::coroutine_traits<int, promise_on_alloc_failure_tag> {
// CHECK-LABEL: f4(
extern "C" int f4(promise_on_alloc_failure_tag) {
// CHECK: %[[RetVal:.+]] = alloca i32
// CHECK: %[[Gro:.+]] = alloca i32
// CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16
// CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64()
// CHECK: %[[MEM:.+]] = call noalias noundef i8* @_ZnwmRKSt9nothrow_t(i64 noundef %[[SIZE]], %"struct.std::nothrow_t"* noundef nonnull align 1 dereferenceable(1) @_ZStL7nothrow)
Expand All @@ -240,13 +239,6 @@ extern "C" int f4(promise_on_alloc_failure_tag) {

// CHECK: [[OKBB]]:
// CHECK: %[[OkRet:.+]] = call noundef i32 @_ZNSt12experimental16coroutine_traitsIJi28promise_on_alloc_failure_tagEE12promise_type17get_return_objectEv(
// CHECK: store i32 %[[OkRet]], i32* %[[Gro]]

// CHECK: %[[Tmp1:.*]] = load i32, i32* %[[Gro]]
// CHECK-NEXT: store i32 %[[Tmp1]], i32* %[[RetVal]]
// CHECK-NEXT: %[[Gro_CAST:.+]] = bitcast i32* %[[Gro]] to i8*
// CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* %[[Gro_CAST]]) #2
// CHECK-NEXT: br label %[[RetBB]]

// CHECK: [[RetBB]]:
// CHECK: %[[LoadRet:.+]] = load i32, i32* %[[RetVal]], align 4
Expand Down
8 changes: 0 additions & 8 deletions clang/test/CodeGenCoroutines/coro-alloc.cpp
Expand Up @@ -224,7 +224,6 @@ struct std::coroutine_traits<int, promise_on_alloc_failure_tag> {
// CHECK-LABEL: f4(
extern "C" int f4(promise_on_alloc_failure_tag) {
// CHECK: %[[RetVal:.+]] = alloca i32
// CHECK: %[[Gro:.+]] = alloca i32
// CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16
// CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64()
// CHECK: %[[MEM:.+]] = call noalias noundef i8* @_ZnwmRKSt9nothrow_t(i64 noundef %[[SIZE]], %"struct.std::nothrow_t"* noundef nonnull align 1 dereferenceable(1) @_ZStL7nothrow)
Expand All @@ -238,13 +237,6 @@ extern "C" int f4(promise_on_alloc_failure_tag) {

// CHECK: [[OKBB]]:
// CHECK: %[[OkRet:.+]] = call noundef i32 @_ZNSt16coroutine_traitsIJi28promise_on_alloc_failure_tagEE12promise_type17get_return_objectEv(
// CHECK: store i32 %[[OkRet]], i32* %[[Gro]]

// CHECK: %[[Tmp1:.*]] = load i32, i32* %[[Gro]]
// CHECK-NEXT: store i32 %[[Tmp1]], i32* %[[RetVal]]
// CHECK-NEXT: %[[Gro_CAST:.+]] = bitcast i32* %[[Gro]] to i8*
// CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* %[[Gro_CAST]]) #2
// CHECK-NEXT: br label %[[RetBB]]

// CHECK: [[RetBB]]:
// CHECK: %[[LoadRet:.+]] = load i32, i32* %[[RetVal]], align 4
Expand Down
20 changes: 4 additions & 16 deletions clang/test/CodeGenCoroutines/coro-gro-exp-namespace.cpp
Expand Up @@ -48,14 +48,13 @@ void doSomething() noexcept;
// CHECK: define{{.*}} i32 @_Z1fv(
int f() {
// CHECK: %[[RetVal:.+]] = alloca i32
// CHECK: %[[GroActive:.+]] = alloca i1

// CHECK: %[[Size:.+]] = call i64 @llvm.coro.size.i64()
// CHECK: call noalias noundef nonnull i8* @_Znwm(i64 noundef %[[Size]])
// CHECK: store i1 false, i1* %[[GroActive]]
// CHECK: call void @_ZNSt12experimental16coroutine_traitsIJiEE12promise_typeC1Ev(
// CHECK: call void @_ZNSt12experimental16coroutine_traitsIJiEE12promise_type17get_return_objectEv(
// CHECK: store i1 true, i1* %[[GroActive]]
// CHECK: call void @_ZNSt12experimental16coroutine_traitsIJiEE12promise_type17get_return_objectEv(%struct.GroType* sret(%struct.GroType) align {{[0-9]+}} %[[GRO:.+]],
// CHECK: %[[Conv:.+]] = call noundef i32 @_ZN7GroTypecviEv({{.*}}[[GRO]]
// CHECK: store i32 %[[Conv]], i32* %[[RetVal]]

Cleanup cleanup;
doSomething();
Expand All @@ -71,18 +70,7 @@ int f() {
// CHECK: %[[Mem:.+]] = call i8* @llvm.coro.free(
// CHECK: call void @_ZdlPv(i8* noundef %[[Mem]])

// Initialize retval from Gro and destroy Gro

// CHECK: %[[Conv:.+]] = call noundef i32 @_ZN7GroTypecviEv(
// CHECK: store i32 %[[Conv]], i32* %[[RetVal]]
// CHECK: %[[IsActive:.+]] = load i1, i1* %[[GroActive]]
// CHECK: br i1 %[[IsActive]], label %[[CleanupGro:.+]], label %[[Done:.+]]

// CHECK: [[CleanupGro]]:
// CHECK: call void @_ZN7GroTypeD1Ev(
// CHECK: br label %[[Done]]

// CHECK: [[Done]]:
// CHECK: coro.ret:
// CHECK: %[[LoadRet:.+]] = load i32, i32* %[[RetVal]]
// CHECK: ret i32 %[[LoadRet]]
}
20 changes: 4 additions & 16 deletions clang/test/CodeGenCoroutines/coro-gro.cpp
Expand Up @@ -46,14 +46,13 @@ void doSomething() noexcept;
// CHECK: define{{.*}} i32 @_Z1fv(
int f() {
// CHECK: %[[RetVal:.+]] = alloca i32
// CHECK: %[[GroActive:.+]] = alloca i1

// CHECK: %[[Size:.+]] = call i64 @llvm.coro.size.i64()
// CHECK: call noalias noundef nonnull i8* @_Znwm(i64 noundef %[[Size]])
// CHECK: store i1 false, i1* %[[GroActive]]
// CHECK: call void @_ZNSt16coroutine_traitsIJiEE12promise_typeC1Ev(
// CHECK: call void @_ZNSt16coroutine_traitsIJiEE12promise_type17get_return_objectEv(
// CHECK: store i1 true, i1* %[[GroActive]]
// CHECK: call void @_ZNSt16coroutine_traitsIJiEE12promise_type17get_return_objectEv(%struct.GroType* sret(%struct.GroType) align {{[0-9]+}} %[[GRO:.+]],
// CHECK: %[[Conv:.+]] = call noundef i32 @_ZN7GroTypecviEv({{.*}}[[GRO]]
// CHECK: store i32 %[[Conv]], i32* %[[RetVal]]

Cleanup cleanup;
doSomething();
Expand All @@ -69,18 +68,7 @@ int f() {
// CHECK: %[[Mem:.+]] = call i8* @llvm.coro.free(
// CHECK: call void @_ZdlPv(i8* noundef %[[Mem]])

// Initialize retval from Gro and destroy Gro

// CHECK: %[[Conv:.+]] = call noundef i32 @_ZN7GroTypecviEv(
// CHECK: store i32 %[[Conv]], i32* %[[RetVal]]
// CHECK: %[[IsActive:.+]] = load i1, i1* %[[GroActive]]
// CHECK: br i1 %[[IsActive]], label %[[CleanupGro:.+]], label %[[Done:.+]]

// CHECK: [[CleanupGro]]:
// CHECK: call void @_ZN7GroTypeD1Ev(
// CHECK: br label %[[Done]]

// CHECK: [[Done]]:
// CHECK: coro.ret:
// CHECK: %[[LoadRet:.+]] = load i32, i32* %[[RetVal]]
// CHECK: ret i32 %[[LoadRet]]
}
Expand Up @@ -32,16 +32,14 @@ struct coro {
Impl *impl;
};

// Verify that the NRVO is applied to the Gro object.
// Verify that the RVO is applied.
// CHECK-LABEL: define{{.*}} void @_Z1fi(%struct.coro* noalias sret(%struct.coro) align 8 %agg.result, i32 noundef %0)
coro f(int) {
// CHECK: %call = call noalias noundef nonnull i8* @_Znwm(
// CHECK-NEXT: br label %[[CoroInit:.*]]

// CHECK: {{.*}}[[CoroInit]]:
// CHECK: store i1 false, i1* %gro.active
// CHECK: call void @{{.*get_return_objectEv}}(%struct.coro* sret(%struct.coro) align 8 %agg.result
// CHECK-NEXT: store i1 true, i1* %gro.active
co_return;
}

Expand Down Expand Up @@ -75,9 +73,7 @@ coro_two h(int) {
// CHECK-NEXT: br label %[[RetLabel:.*]]

// CHECK: {{.*}}[[InitOnSuccess]]:
// CHECK: store i1 false, i1* %gro.active
// CHECK: call void @{{.*get_return_objectEv}}(%struct.coro_two* sret(%struct.coro_two) align 8 %agg.result
// CHECK-NEXT: store i1 true, i1* %gro.active

// CHECK: [[RetLabel]]:
// CHECK-NEXT: ret void
Expand Down

0 comments on commit d30ca5e

Please sign in to comment.