-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
error in coroutine intrinsic lowering #60713
Comments
Also when I try to compile the code in #50004, even when I changed file to a fatal error: error in backend: Cannot select: intrinsic %llvm.coro.size
clang: error: clang frontend command failed with exit code 70 (use -v to see invocation)
Ubuntu clang version 15.0.7
Target: x86_64-pc-linux-gnu
Thread model: posix
InstalledDir: /usr/bin
clang: note: diagnostic msg: |
@llvm/issue-subscribers-coroutines |
I had similar issues recently. From what I've observed there seems to be a regression started in llvm-14 Without In version 14 Output after lowering with opt-13 (click arrow to expand)source_filename = "test.ll"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
%f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i32, i1 }
@.str = private unnamed_addr constant [7 x i8] c"n: %d\0A\00", align 1
@f.resumers = private constant [3 x void (%f.Frame*)*] [void (%f.Frame*)* @f.resume, void (%f.Frame*)* @f.destroy, void (%f.Frame*)* @f.cleanup]
define i8* @f(i32 %n) {
entry:
%alloc = call i8* @malloc(i32 24)
%FramePtr = bitcast i8* %alloc to %f.Frame*
%resume.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 0
store void (%f.Frame*)* @f.resume, void (%f.Frame*)** %resume.addr, align 8
%destroy.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 1
store void (%f.Frame*)* @f.destroy, void (%f.Frame*)** %destroy.addr, align 8
%val = alloca i32, align 4
store i32 0, i32* %val, align 4
%n.val.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2
store i32 %n, i32* %n.val.spill.addr, align 4
%inc = add nsw i32 %n, 1
call void @print(i32 %n)
%index.addr2 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 3
store i1 false, i1* %index.addr2, align 1
ret i8* %alloc
}
; Function Attrs: argmemonly nounwind readonly
declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) #0
; Function Attrs: nounwind readnone
declare i32 @llvm.coro.size.i32() #1
declare noalias i8* @malloc(i32)
declare void @free(i8*)
; Function Attrs: nounwind
declare i8* @llvm.coro.begin(token, i8* writeonly) #2
declare void @print(i32)
; Function Attrs: nounwind
declare i8 @llvm.coro.suspend(token, i1) #2
; Function Attrs: argmemonly nounwind readonly
declare i8* @llvm.coro.free(token, i8* nocapture readonly) #0
; Function Attrs: nounwind
declare i1 @llvm.coro.end(i8*, i1) #2
define i32 @main() {
entry:
%hdl = call i8* @f(i32 4)
%0 = bitcast i8* %hdl to { i8*, i8* }*
%1 = getelementptr inbounds { i8*, i8* }, { i8*, i8* }* %0, i32 0, i32 0
%2 = load i8*, i8** %1, align 8
%3 = bitcast i8* %2 to void (i8*)*
call fastcc void %3(i8* %hdl)
%4 = bitcast i8* %hdl to { i8*, i8* }*
%5 = getelementptr inbounds { i8*, i8* }, { i8*, i8* }* %4, i32 0, i32 0
%6 = load i8*, i8** %5, align 8
%7 = bitcast i8* %6 to void (i8*)*
call fastcc void %7(i8* %hdl)
%8 = bitcast i8* %hdl to { i8*, i8* }*
%9 = getelementptr inbounds { i8*, i8* }, { i8*, i8* }* %8, i32 0, i32 1
%10 = load i8*, i8** %9, align 8
%11 = bitcast i8* %10 to void (i8*)*
call fastcc void %11(i8* %hdl)
ret i32 0
}
declare void @llvm.coro.resume(i8*)
declare void @llvm.coro.destroy(i8*)
; Function Attrs: argmemonly nounwind readonly
declare i8* @llvm.coro.subfn.addr(i8* nocapture readonly, i8) #0
declare void @i64_print(i64)
; Function Attrs: cold noreturn nounwind
declare void @llvm.trap() #3
; Function Attrs: nomerge nounwind
declare token @llvm.coro.save(i8*) #4
define internal fastcc void @f.resume(%f.Frame* noalias nonnull align 8 dereferenceable(24) %FramePtr) {
entry.resume:
%vFrame = bitcast %f.Frame* %FramePtr to i8*
%val = alloca i32, align 4
br label %resume.entry
loop: ; preds = %loop.from.AfterCoroSuspend
%n.val.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2
store i32 %inc1, i32* %n.val.spill.addr, align 4
%inc = add nsw i32 %inc1, 1
call void @print(i32 %inc1)
br label %CoroSave
CoroSave: ; preds = %loop
%index.addr2 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 3
store i1 false, i1* %index.addr2, align 1
br label %CoroSuspend
CoroSuspend: ; preds = %CoroSave
br label %resume.0.landing
resume.0: ; preds = %resume.entry
br label %resume.0.landing
resume.0.landing: ; preds = %resume.0, %CoroSuspend
%0 = phi i8 [ -1, %CoroSuspend ], [ 0, %resume.0 ]
br label %AfterCoroSuspend
AfterCoroSuspend: ; preds = %resume.0.landing
switch i8 %0, label %suspend [
i8 0, label %loop.from.AfterCoroSuspend
i8 1, label %cleanup
]
loop.from.AfterCoroSuspend: ; preds = %AfterCoroSuspend
%n.val.reload.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2
%n.val.reload = load i32, i32* %n.val.reload.addr, align 4
%inc1 = add nsw i32 %n.val.reload, 1
br label %loop
cleanup: ; preds = %AfterCoroSuspend
call void @free(i8* %vFrame)
br label %suspend
suspend: ; preds = %cleanup, %AfterCoroSuspend
br label %CoroEnd
CoroEnd: ; preds = %suspend
ret void
resume.entry: ; preds = %entry.resume
br label %resume.0
}
define internal fastcc void @f.destroy(%f.Frame* noalias nonnull align 8 dereferenceable(24) %FramePtr) {
entry.destroy:
%vFrame = bitcast %f.Frame* %FramePtr to i8*
%val = alloca i32, align 4
br label %resume.entry
loop: ; preds = %loop.from.AfterCoroSuspend
%n.val.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2
store i32 %inc1, i32* %n.val.spill.addr, align 4
%inc = add nsw i32 %inc1, 1
call void @print(i32 %inc1)
br label %CoroSave
CoroSave: ; preds = %loop
%index.addr2 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 3
store i1 false, i1* %index.addr2, align 1
br label %CoroSuspend
CoroSuspend: ; preds = %CoroSave
br label %resume.0.landing
resume.0: ; preds = %resume.entry
br label %resume.0.landing
resume.0.landing: ; preds = %resume.0, %CoroSuspend
%0 = phi i8 [ -1, %CoroSuspend ], [ 1, %resume.0 ]
br label %AfterCoroSuspend
AfterCoroSuspend: ; preds = %resume.0.landing
switch i8 %0, label %suspend [
i8 0, label %loop.from.AfterCoroSuspend
i8 1, label %cleanup
]
loop.from.AfterCoroSuspend: ; preds = %AfterCoroSuspend
%n.val.reload.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2
%n.val.reload = load i32, i32* %n.val.reload.addr, align 4
%inc1 = add nsw i32 %n.val.reload, 1
br label %loop
cleanup: ; preds = %AfterCoroSuspend
call void @free(i8* %vFrame)
br label %suspend
suspend: ; preds = %cleanup, %AfterCoroSuspend
br label %CoroEnd
CoroEnd: ; preds = %suspend
ret void
resume.entry: ; preds = %entry.destroy
br label %resume.0
}
define internal fastcc void @f.cleanup(%f.Frame* noalias nonnull align 8 dereferenceable(24) %FramePtr) {
entry.cleanup:
%vFrame = bitcast %f.Frame* %FramePtr to i8*
%val = alloca i32, align 4
br label %resume.entry
loop: ; preds = %loop.from.AfterCoroSuspend
%n.val.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2
store i32 %inc1, i32* %n.val.spill.addr, align 4
%inc = add nsw i32 %inc1, 1
call void @print(i32 %inc1)
br label %CoroSave
CoroSave: ; preds = %loop
%index.addr2 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 3
store i1 false, i1* %index.addr2, align 1
br label %CoroSuspend
CoroSuspend: ; preds = %CoroSave
br label %resume.0.landing
resume.0: ; preds = %resume.entry
br label %resume.0.landing
resume.0.landing: ; preds = %resume.0, %CoroSuspend
%0 = phi i8 [ -1, %CoroSuspend ], [ 1, %resume.0 ]
br label %AfterCoroSuspend
AfterCoroSuspend: ; preds = %resume.0.landing
switch i8 %0, label %suspend [
i8 0, label %loop.from.AfterCoroSuspend
i8 1, label %cleanup
]
loop.from.AfterCoroSuspend: ; preds = %AfterCoroSuspend
%n.val.reload.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2
%n.val.reload = load i32, i32* %n.val.reload.addr, align 4
%inc1 = add nsw i32 %n.val.reload, 1
br label %loop
cleanup: ; preds = %AfterCoroSuspend
call void @free(i8* null)
br label %suspend
suspend: ; preds = %cleanup, %AfterCoroSuspend
br label %CoroEnd
CoroEnd: ; preds = %suspend
ret void
resume.entry: ; preds = %entry.cleanup
br label %resume.0
}
attributes #0 = { argmemonly nounwind readonly }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind }
attributes #3 = { cold noreturn nounwind }
attributes #4 = { nomerge nounwind }
!llvm.linker.options = !{}
!llvm.module.flags = !{!0, !1, !2, !3, !4}
!llvm.ident = !{!5}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"PIC Level", i32 2}
!2 = !{i32 7, !"PIE Level", i32 2}
!3 = !{i32 7, !"uwtable", i32 1}
!4 = !{i32 7, !"frame-pointer", i32 2}
!5 = !{!"Ubuntu clang version 14.0.6"}
|
Looks like you are right. I am able to compile the module with So, this is a bug needs to be fixed, right? |
Yeah, this one should be a bug to be addressed. |
This looks related to pipeline. @aeubanks would you like to take a look? |
unless I'm missing something, all the modules and C++ code linked in this bug aren't complete or don't compile. is there some complete C++ code or LLVM IR plus a clang/opt invocation to look at |
@aeubanks Here's the full LLVM IR used to generate output using opt-13 few posts above: ; ModuleID = 'test.ll'
source_filename = "test.ll"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
@.str = private unnamed_addr constant [7 x i8] c"n: %d\0A\00", align 1
define i8* @f(i32 %n) {
entry:
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%size = call i32 @llvm.coro.size.i32()
%alloc = call i8* @malloc(i32 %size)
%hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
%val = alloca i32
store i32 0, i32* %val
br label %loop
loop:
%n.val = phi i32 [ %n, %entry ], [ %inc, %loop ]
%inc = add nsw i32 %n.val, 1
call void @print(i32 %n.val)
%0 = call i8 @llvm.coro.suspend(token none, i1 false)
switch i8 %0, label %suspend [i8 0, label %loop
i8 1, label %cleanup]
cleanup:
%mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend
suspend:
%unused = call i1 @llvm.coro.end(i8* %hdl, i1 false)
ret i8* %hdl
}
declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i32 @llvm.coro.size.i32()
declare noalias i8* @malloc(i32)
declare void @free(i8*)
declare i8* @llvm.coro.begin(token, i8*)
declare void @print(i32)
declare i8 @llvm.coro.suspend(token, i1)
declare i8* @llvm.coro.free(token, i8*)
declare i1 @llvm.coro.end(i8*, i1)
declare void @llvm.coro.resume(i8*)
declare void @llvm.coro.destroy(i8*)
define i32 @main() {
entry:
%hdl = call i8* @f(i32 4)
call void @llvm.coro.resume(i8* %hdl)
call void @llvm.coro.resume(i8* %hdl)
call void @llvm.coro.destroy(i8* %hdl)
ret i32 0
} It's basically the example from the docs The expected output is in the post above (using opt-13), below there's the output when using opt-14 or newer ; ModuleID = 'test.ll'
source_filename = "test.ll"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
define i8* @f(i32 %n) {
entry:
%size = call i32 @llvm.coro.size.i32()
%alloc = call i8* @malloc(i32 %size)
%val = alloca i32, align 4
store i32 0, i32* %val, align 4
br label %loop
loop: ; preds = %loop, %entry
%n.val = phi i32 [ %n, %entry ], [ %inc, %loop ]
%inc = add nsw i32 %n.val, 1
call void @print(i32 %n.val)
%0 = call i8 @llvm.coro.suspend(token none, i1 false)
switch i8 %0, label %suspend [
i8 0, label %loop
i8 1, label %cleanup
]
cleanup: ; preds = %loop
call void @free(i8* %alloc)
br label %suspend
suspend: ; preds = %cleanup, %loop
%unused = call i1 @llvm.coro.end(i8* %alloc, i1 false) #2
ret i8* %alloc
}
; Function Attrs: nounwind readnone
declare i32 @llvm.coro.size.i32() #0
declare noalias i8* @malloc(i32)
declare void @free(i8*)
declare void @print(i32)
; Function Attrs: nounwind
declare i8 @llvm.coro.suspend(token, i1) #1
; Function Attrs: nounwind
declare i1 @llvm.coro.end(i8*, i1) #1
define i32 @main() {
entry:
%hdl = call i8* @f(i32 4)
%0 = bitcast i8* %hdl to { i8*, i8* }*
%1 = getelementptr inbounds { i8*, i8* }, { i8*, i8* }* %0, i32 0, i32 0
%2 = load i8*, i8** %1, align 8
%3 = bitcast i8* %2 to void (i8*)*
call fastcc void %3(i8* %hdl)
%4 = bitcast i8* %hdl to { i8*, i8* }*
%5 = getelementptr inbounds { i8*, i8* }, { i8*, i8* }* %4, i32 0, i32 0
%6 = load i8*, i8** %5, align 8
%7 = bitcast i8* %6 to void (i8*)*
call fastcc void %7(i8* %hdl)
%8 = bitcast i8* %hdl to { i8*, i8* }*
%9 = getelementptr inbounds { i8*, i8* }, { i8*, i8* }* %8, i32 0, i32 1
%10 = load i8*, i8** %9, align 8
%11 = bitcast i8* %10 to void (i8*)*
call fastcc void %11(i8* %hdl)
ret i32 0
}
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
attributes #2 = { noduplicate } |
I'm not too familiar with coroutines, which pass is supposed to lower the coroutine size intrinsic? (I'm not figuring it out from a quick glance at the various coroutine passes) |
@aeubanks |
(running |
Well, this is not a bug, but my fault. I forgot to add define ptr @f(i32 %0) presplitcoroutine {
entry:
%1 = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
%2 = call i32 @llvm.coro.size.i32()
%3 = call ptr @malloc(i32 %2)
%4 = call ptr @llvm.coro.begin(token %1, ptr %3)
br label %loop
loop: ; preds = %loop, %entry
%5 = phi i32 [ %0, %entry ], [ %6, %loop ]
%6 = add nsw i32 %5, 1
%7 = call i32 (ptr, ...) @printf(ptr @0, i32 %5)
%8 = call i8 @llvm.coro.suspend(token %1, i1 false)
switch i8 %8, label %suspend [
i8 0, label %loop
i8 1, label %cleanup
]
cleanup: ; preds = %loop
%9 = call ptr @llvm.coro.free(token none, ptr %4)
call void @free(ptr %9)
br label %suspend
suspend: ; preds = %cleanup, %loop
%10 = call i1 @llvm.coro.end(ptr %4, i1 false)
ret ptr %4
}
define i32 @main() {
entry:
%0 = call ptr @f(i32 4)
call void @llvm.coro.resume(ptr %0)
call void @llvm.coro.resume(ptr %0)
call void @llvm.coro.destroy(ptr %0)
ret i32 0
} This compiles fine with |
That should probably be a verifier check instead of a random assert? |
going to reopen this as the current error reporting seems confusing, this seems very easy to get wrong and there should be some sort of error reporting, whether in the lowering pass itself or in the verifier |
Yeah, a verifier looks better. Let's track the issue in #60863. |
Hey, I was trying to compile a module similar to one in the coroutine documentation ^.
When I try to compile this with (apt.llvm distribution of Clang)
clang-15 coro.ll
. I got the following error.Also with
llc-15 coro.ll
I get:So I tried to run coroutine lowering passes manually with
Resulting module looks like this:
Documentation says
llvm.coro.size.i32
should be lowered to a constant, so I guess call to that should not be here.The text was updated successfully, but these errors were encountered: