Skip to content

Commit

Permalink
[Coroutines] Offering llvm.coro.align intrinsic
Browse files Browse the repository at this point in the history
It is a known problem that we can't align the switch-based coroutine
frame if the alignment exceeds std::max_align_t (which is 16 usually).

We could solve the problem on the middle-end by dynamically transforming
or in the frontend by emitting aligned allocation function.

If we need to solve it in the frontend, the middle end need to offer an
intrinsic to tell the alignment at least. This patch tries to offer such
an intrinsic called llvm.coro.align.

Reviewed By: https://reviews.llvm.org/D117542

Differential revision: https://reviews.llvm.org/D117542
  • Loading branch information
ChuanqiXu9 committed Jan 19, 2022
1 parent 76b7423 commit c8ecf12
Show file tree
Hide file tree
Showing 12 changed files with 315 additions and 2 deletions.
26 changes: 26 additions & 0 deletions llvm/docs/Coroutines.rst
Expand Up @@ -948,6 +948,32 @@ Semantics:
The `coro.size` intrinsic is lowered to a constant representing the size of
the coroutine frame.

.. _coro.align:

'llvm.coro.align' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
::

declare i32 @llvm.coro.align.i32()
declare i64 @llvm.coro.align.i64()

Overview:
"""""""""

The '``llvm.coro.align``' intrinsic returns the alignment of a `coroutine frame`_.
This is only supported for switched-resume coroutines.

Arguments:
""""""""""

None

Semantics:
""""""""""

The `coro.align` intrinsic is lowered to a constant representing the alignment of
the coroutine frame.

.. _coro.begin:

'llvm.coro.begin' Intrinsic
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Expand Up @@ -633,6 +633,7 @@ class TargetTransformInfoImplBase {
case Intrinsic::coro_end:
case Intrinsic::coro_frame:
case Intrinsic::coro_size:
case Intrinsic::coro_align:
case Intrinsic::coro_suspend:
case Intrinsic::coro_subfn_addr:
// These intrinsics don't actually represent code after lowering.
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/IR/Intrinsics.td
Expand Up @@ -1272,6 +1272,7 @@ def int_coro_end_async
def int_coro_frame : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
def int_coro_noop : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
def int_coro_size : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>;
def int_coro_align : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>;

def int_coro_save : Intrinsic<[llvm_token_ty], [llvm_ptr_ty], []>;
def int_coro_suspend : Intrinsic<[llvm_i8_ty], [llvm_token_ty, llvm_i1_ty], []>;
Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Transforms/Coroutines/CoroInstr.h
Expand Up @@ -599,6 +599,18 @@ class LLVM_LIBRARY_VISIBILITY CoroSizeInst : public IntrinsicInst {
}
};

/// This represents the llvm.coro.align instruction.
class LLVM_LIBRARY_VISIBILITY CoroAlignInst : public IntrinsicInst {
public:
// Methods to support type inquiry through isa, cast, and dyn_cast:
static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_align;
}
static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};

class LLVM_LIBRARY_VISIBILITY AnyCoroEndInst : public IntrinsicInst {
enum { FrameArg, UnwindArg };

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Coroutines/CoroInternal.h
Expand Up @@ -104,6 +104,7 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
CoroBeginInst *CoroBegin;
SmallVector<AnyCoroEndInst *, 4> CoroEnds;
SmallVector<CoroSizeInst *, 2> CoroSizes;
SmallVector<CoroAlignInst *, 2> CoroAligns;
SmallVector<AnyCoroSuspendInst *, 4> CoroSuspends;
SmallVector<CallInst*, 2> SwiftErrorOps;

Expand Down
10 changes: 8 additions & 2 deletions llvm/lib/Transforms/Coroutines/CoroSplit.cpp
Expand Up @@ -1083,10 +1083,16 @@ static void updateAsyncFuncPointerContextSize(coro::Shape &Shape) {
Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct);
}

static void replaceFrameSize(coro::Shape &Shape) {
static void replaceFrameSizeAndAlignment(coro::Shape &Shape) {
if (Shape.ABI == coro::ABI::Async)
updateAsyncFuncPointerContextSize(Shape);

for (CoroAlignInst *CA : Shape.CoroAligns) {
CA->replaceAllUsesWith(
ConstantInt::get(CA->getType(), Shape.FrameAlign.value()));
CA->eraseFromParent();
}

if (Shape.CoroSizes.empty())
return;

Expand Down Expand Up @@ -1884,7 +1890,7 @@ static coro::Shape splitCoroutine(Function &F,

simplifySuspendPoints(Shape);
buildCoroutineFrame(F, Shape);
replaceFrameSize(Shape);
replaceFrameSizeAndAlignment(Shape);

// If there are no suspend points, no split required, just remove
// the allocation and deallocation blocks, they are not needed.
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Transforms/Coroutines/Coroutines.cpp
Expand Up @@ -123,6 +123,7 @@ Value *coro::LowererBase::makeSubFnCall(Value *Arg, int Index,
static bool isCoroutineIntrinsicName(StringRef Name) {
// NOTE: Must be sorted!
static const char *const CoroIntrinsics[] = {
"llvm.coro.align",
"llvm.coro.alloc",
"llvm.coro.async.context.alloc",
"llvm.coro.async.context.dealloc",
Expand Down Expand Up @@ -268,6 +269,9 @@ void coro::Shape::buildFrom(Function &F) {
case Intrinsic::coro_size:
CoroSizes.push_back(cast<CoroSizeInst>(II));
break;
case Intrinsic::coro_align:
CoroAligns.push_back(cast<CoroAlignInst>(II));
break;
case Intrinsic::coro_frame:
CoroFrames.push_back(cast<CoroFrameInst>(II));
break;
Expand Down
54 changes: 54 additions & 0 deletions llvm/test/Transforms/Coroutines/coro-align-01.ll
@@ -0,0 +1,54 @@
; Tests that the coro.align intrinsic could be lowered to correct alignment
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s

define i8* @f() "coroutine.presplit"="1" {
entry:
%x = alloca i64
%y = alloca i64
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%size = call i32 @llvm.coro.size.i32()
%align = call i32 @llvm.coro.align.i32()
%alloc = call i8* @aligned_alloc(i32 %align, i32 %size)
%hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
%sp1 = call i8 @llvm.coro.suspend(token none, i1 false)
switch i8 %sp1, label %suspend [i8 0, label %resume
i8 1, label %cleanup]
resume:
%x.alias = bitcast i64* %x to i32*
call void @capture_call(i32* %x.alias)
%y.alias = bitcast i64* %y to i32*
call void @nocapture_call(i32* %y.alias)
br label %cleanup

cleanup:
%mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend

suspend:
call i1 @llvm.coro.end(i8* %hdl, i1 0)
ret i8* %hdl
}

; %x needs to go to the frame since it's escaped; %y will stay as local since it doesn't escape.
; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i1 }
; CHECK-LABEL: define i8* @f()
; CHECK: %[[ALLOC:.+]] = call i8* @aligned_alloc(i32 8, i32 32)
; CHECK-NEXT: call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %[[ALLOC]])

declare i8* @llvm.coro.free(token, i8*)
declare i32 @llvm.coro.size.i32()
declare i32 @llvm.coro.align.i32()
declare i8 @llvm.coro.suspend(token, i1)
declare void @llvm.coro.resume(i8*)
declare void @llvm.coro.destroy(i8*)

declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i1 @llvm.coro.alloc(token)
declare i8* @llvm.coro.begin(token, i8*)
declare i1 @llvm.coro.end(i8*, i1)

declare void @capture_call(i32*)
declare void @nocapture_call(i32* nocapture)
declare noalias i8* @aligned_alloc(i32, i32)
declare void @free(i8*)
46 changes: 46 additions & 0 deletions llvm/test/Transforms/Coroutines/coro-align-02.ll
@@ -0,0 +1,46 @@
; Tests that the coro.align intrinsic could be lowered to correct alignment
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s

define i8* @f() "coroutine.presplit"="1" {
entry:
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%size = call i32 @llvm.coro.size.i32()
%align = call i32 @llvm.coro.align.i32()
%alloc = call i8* @aligned_alloc(i32 %align, i32 %size)
%hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
%sp1 = call i8 @llvm.coro.suspend(token none, i1 false)
switch i8 %sp1, label %suspend [i8 0, label %resume
i8 1, label %cleanup]
resume:
br label %cleanup

cleanup:
%mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend

suspend:
call i1 @llvm.coro.end(i8* %hdl, i1 0)
ret i8* %hdl
}

; %x needs to go to the frame since it's escaped; %y will stay as local since it doesn't escape.
; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1 }
; CHECK-LABEL: define i8* @f()
; CHECK: %[[ALLOC:.+]] = call i8* @aligned_alloc(i32 8, i32 24)
; CHECK-NEXT: call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %[[ALLOC]])

declare i8* @llvm.coro.free(token, i8*)
declare i32 @llvm.coro.size.i32()
declare i32 @llvm.coro.align.i32()
declare i8 @llvm.coro.suspend(token, i1)
declare void @llvm.coro.resume(i8*)
declare void @llvm.coro.destroy(i8*)

declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i1 @llvm.coro.alloc(token)
declare i8* @llvm.coro.begin(token, i8*)
declare i1 @llvm.coro.end(i8*, i1)

declare noalias i8* @aligned_alloc(i32, i32)
declare void @free(i8*)
54 changes: 54 additions & 0 deletions llvm/test/Transforms/Coroutines/coro-align-03.ll
@@ -0,0 +1,54 @@
; Tests that the coro.align intrinsic could be lowered to correct alignment
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s

define i8* @f() "coroutine.presplit"="1" {
entry:
%x = alloca i64, align 16
%y = alloca i64
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%size = call i32 @llvm.coro.size.i32()
%align = call i32 @llvm.coro.align.i32()
%alloc = call i8* @aligned_alloc(i32 %align, i32 %size)
%hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
%sp1 = call i8 @llvm.coro.suspend(token none, i1 false)
switch i8 %sp1, label %suspend [i8 0, label %resume
i8 1, label %cleanup]
resume:
%x.alias = bitcast i64* %x to i32*
call void @capture_call(i32* %x.alias)
%y.alias = bitcast i64* %y to i32*
call void @capture_call(i32* %y.alias)
br label %cleanup

cleanup:
%mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend

suspend:
call i1 @llvm.coro.end(i8* %hdl, i1 0)
ret i8* %hdl
}

; %x needs to go to the frame since it's escaped; %y will stay as local since it doesn't escape.
; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i64, i1 }
; CHECK-LABEL: define i8* @f()
; CHECK: %[[ALLOC:.+]] = call i8* @aligned_alloc(i32 16, i32 40)
; CHECK-NEXT: call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %[[ALLOC]])

declare i8* @llvm.coro.free(token, i8*)
declare i32 @llvm.coro.size.i32()
declare i32 @llvm.coro.align.i32()
declare i8 @llvm.coro.suspend(token, i1)
declare void @llvm.coro.resume(i8*)
declare void @llvm.coro.destroy(i8*)

declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i1 @llvm.coro.alloc(token)
declare i8* @llvm.coro.begin(token, i8*)
declare i1 @llvm.coro.end(i8*, i1)

declare void @capture_call(i32*)
declare void @nocapture_call(i32* nocapture)
declare noalias i8* @aligned_alloc(i32, i32)
declare void @free(i8*)
54 changes: 54 additions & 0 deletions llvm/test/Transforms/Coroutines/coro-align-04.ll
@@ -0,0 +1,54 @@
; Tests that the coro.align intrinsic could be lowered to correct alignment
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s

define i8* @f() "coroutine.presplit"="1" {
entry:
%x = alloca i1, align 64
%y = alloca i64
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%size = call i32 @llvm.coro.size.i32()
%align = call i32 @llvm.coro.align.i32()
%alloc = call i8* @aligned_alloc(i32 %align, i32 %size)
%hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
%sp1 = call i8 @llvm.coro.suspend(token none, i1 false)
switch i8 %sp1, label %suspend [i8 0, label %resume
i8 1, label %cleanup]
resume:
%x.alias = bitcast i1* %x to i32*
call void @capture_call(i32* %x.alias)
%y.alias = bitcast i64* %y to i32*
call void @capture_call(i32* %y.alias)
br label %cleanup

cleanup:
%mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend

suspend:
call i1 @llvm.coro.end(i8* %hdl, i1 0)
ret i8* %hdl
}

; %x needs to go to the frame since it's escaped; %y will stay as local since it doesn't escape.
; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i1, [39 x i8], i1 }
; CHECK-LABEL: define i8* @f()
; CHECK: %[[ALLOC:.+]] = call i8* @aligned_alloc(i32 64, i32 72)
; CHECK-NEXT: call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %[[ALLOC]])

declare i8* @llvm.coro.free(token, i8*)
declare i32 @llvm.coro.size.i32()
declare i32 @llvm.coro.align.i32()
declare i8 @llvm.coro.suspend(token, i1)
declare void @llvm.coro.resume(i8*)
declare void @llvm.coro.destroy(i8*)

declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i1 @llvm.coro.alloc(token)
declare i8* @llvm.coro.begin(token, i8*)
declare i1 @llvm.coro.end(i8*, i1)

declare void @capture_call(i32*)
declare void @nocapture_call(i32* nocapture)
declare noalias i8* @aligned_alloc(i32, i32)
declare void @free(i8*)
54 changes: 54 additions & 0 deletions llvm/test/Transforms/Coroutines/coro-align-05.ll
@@ -0,0 +1,54 @@
; Tests that the coro.align intrinsic could be lowered to correct alignment
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s

define i8* @f() "coroutine.presplit"="1" {
entry:
%x = alloca i1, align 64
%y = alloca i64, align 32
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%size = call i32 @llvm.coro.size.i32()
%align = call i32 @llvm.coro.align.i32()
%alloc = call i8* @aligned_alloc(i32 %align, i32 %size)
%hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
%sp1 = call i8 @llvm.coro.suspend(token none, i1 false)
switch i8 %sp1, label %suspend [i8 0, label %resume
i8 1, label %cleanup]
resume:
%x.alias = bitcast i1* %x to i32*
call void @capture_call(i32* %x.alias)
%y.alias = bitcast i64* %y to i32*
call void @capture_call(i32* %y.alias)
br label %cleanup

cleanup:
%mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend

suspend:
call i1 @llvm.coro.end(i8* %hdl, i1 0)
ret i8* %hdl
}

; %x needs to go to the frame since it's escaped; %y will stay as local since it doesn't escape.
; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, [15 x i8], i64, [24 x i8], i1 }
; CHECK-LABEL: define i8* @f()
; CHECK: %[[ALLOC:.+]] = call i8* @aligned_alloc(i32 64, i32 72)
; CHECK-NEXT: call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %[[ALLOC]])

declare i8* @llvm.coro.free(token, i8*)
declare i32 @llvm.coro.size.i32()
declare i32 @llvm.coro.align.i32()
declare i8 @llvm.coro.suspend(token, i1)
declare void @llvm.coro.resume(i8*)
declare void @llvm.coro.destroy(i8*)

declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i1 @llvm.coro.alloc(token)
declare i8* @llvm.coro.begin(token, i8*)
declare i1 @llvm.coro.end(i8*, i1)

declare void @capture_call(i32*)
declare void @nocapture_call(i32* nocapture)
declare noalias i8* @aligned_alloc(i32, i32)
declare void @free(i8*)

0 comments on commit c8ecf12

Please sign in to comment.