Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Coroutines] Optimize the lifespan of temporary co_await object
Summary: If we ever assign co_await to a temporary variable, such as foo(co_await expr), we generate AST that looks like this: MaterializedTemporaryExpr(CoawaitExpr(...)). MaterializedTemporaryExpr would emit an intrinsics that marks the lifetime start of the temporary storage. However such temporary storage will not be used until co_await is ready to write the result. Marking the lifetime start way too early causes extra storage to be put in the coroutine frame instead of the stack. As you can see from https://godbolt.org/z/zVx_eB, the frame generated for get_big_object2 is 12K, which contains a big_object object unnecessarily. After this patch, the frame size for get_big_object2 is now only 8K. There are still room for improvements, in particular, GCC has a 4K frame for this function. But that's a separate problem and not addressed in this patch. The basic idea of this patch is during CoroSplit, look for every local variable in the coroutine created through AllocaInst, identify all the lifetime start/end markers and the use of the variables, and sink the lifetime.start maker to the places as close to the first-ever use as possible. Reviewers: lewissbaker, modocache, junparser Reviewed By: junparser Subscribers: hiraditya, llvm-commits, rsmith, ChuanqiXu, cfe-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D82314
- Loading branch information
Showing
3 changed files
with
175 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
71 changes: 71 additions & 0 deletions
71
llvm/test/Transforms/Coroutines/coro-split-sink-lifetime.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
; Tests that coro-split will optimize the lifetime.start maker of each local variable, | ||
; sink them to the places closest to the actual use. | ||
; RUN: opt < %s -coro-split -S | FileCheck %s | ||
; RUN: opt < %s -passes=coro-split -S | FileCheck %s | ||
|
||
%"struct.std::coroutine_handle" = type { i8* } | ||
%"struct.std::coroutine_handle.0" = type { %"struct.std::coroutine_handle" } | ||
%"struct.lean_future<int>::Awaiter" = type { i32, %"struct.std::coroutine_handle.0" } | ||
|
||
declare i8* @malloc(i64) | ||
declare void @print(i32) | ||
|
||
define void @a() "coroutine.presplit"="1" { | ||
entry: | ||
%ref.tmp7 = alloca %"struct.lean_future<int>::Awaiter", align 8 | ||
%testval = alloca i32 | ||
%cast = bitcast i32* %testval to i8* | ||
; lifetime of %testval starts here, but not used until await.ready. | ||
call void @llvm.lifetime.start.p0i8(i64 4, i8* %cast) | ||
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) | ||
%alloc = call i8* @malloc(i64 16) #3 | ||
%vFrame = call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %alloc) | ||
|
||
%save = call token @llvm.coro.save(i8* null) | ||
%Result.i19 = getelementptr inbounds %"struct.lean_future<int>::Awaiter", %"struct.lean_future<int>::Awaiter"* %ref.tmp7, i64 0, i32 0 | ||
%suspend = call i8 @llvm.coro.suspend(token %save, i1 false) | ||
switch i8 %suspend, label %exit [ | ||
i8 0, label %await.ready | ||
i8 1, label %exit | ||
] | ||
await.ready: | ||
%StrayCoroSave = call token @llvm.coro.save(i8* null) | ||
%val = load i32, i32* %Result.i19 | ||
%test = load i32, i32* %testval | ||
call void @print(i32 %test) | ||
call void @llvm.lifetime.end.p0i8(i64 4, i8* %cast) | ||
call void @print(i32 %val) | ||
br label %exit | ||
exit: | ||
call i1 @llvm.coro.end(i8* null, i1 false) | ||
ret void | ||
} | ||
|
||
; CHECK-LABEL: @a.resume( | ||
; CHECK: %testval = alloca i32, align 4 | ||
; CHECK-NEXT: getelementptr inbounds %a.Frame | ||
; CHECK-NEXT: getelementptr inbounds %"struct.lean_future<int>::Awaiter" | ||
; CHECK-NEXT: %cast1 = bitcast i32* %testval to i8* | ||
; CHECK-NEXT: %val = load i32, i32* %Result | ||
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* %cast1) | ||
; CHECK-NEXT: %test = load i32, i32* %testval | ||
; CHECK-NEXT: call void @print(i32 %test) | ||
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* %cast1) | ||
; CHECK-NEXT: call void @print(i32 %val) | ||
; CHECK-NEXT: ret void | ||
|
||
declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) | ||
declare i1 @llvm.coro.alloc(token) #3 | ||
declare noalias nonnull i8* @"\01??2@YAPEAX_K@Z"(i64) local_unnamed_addr | ||
declare i64 @llvm.coro.size.i64() #5 | ||
declare i8* @llvm.coro.begin(token, i8* writeonly) #3 | ||
declare void @"\01?puts@@YAXZZ"(...) | ||
declare token @llvm.coro.save(i8*) #3 | ||
declare i8* @llvm.coro.frame() #5 | ||
declare i8 @llvm.coro.suspend(token, i1) #3 | ||
declare void @"\01??3@YAXPEAX@Z"(i8*) local_unnamed_addr #10 | ||
declare i8* @llvm.coro.free(token, i8* nocapture readonly) #2 | ||
declare i1 @llvm.coro.end(i8*, i1) #3 | ||
declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #4 | ||
declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #4 | ||
|