-
Notifications
You must be signed in to change notification settings - Fork 15.2k
Fix Lambda Mangling in Namespace-Scope Variable Initializers. #159115
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
b197e92
a5292c2
b70972c
ea29248
1b279d8
80ca41f
b9a581a
8fb0464
c96de5a
7de57f2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
// RUN: %clang_cc1 -O0 -triple x86_64-unknown-unknown \ | ||
// RUN: -emit-llvm %s -o - | FileCheck %s | ||
|
||
// RUN: %clang_cc1 -O0 -triple x86_64-pc-windows-msvc \ | ||
// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=MSVC | ||
|
||
namespace QL { | ||
auto dg1 = [] { return 1; }; | ||
inline auto dg_inline1 = [] { return 1; }; | ||
} | ||
|
||
namespace QL { | ||
auto dg2 = [] { return 2; }; | ||
template<int N> | ||
auto dg_template = [] { return N; }; | ||
} | ||
|
||
using namespace QL; | ||
template<typename T> | ||
void f(T t) { | ||
t(); | ||
} | ||
|
||
void g() { | ||
f(dg1); | ||
f(dg2); | ||
f(dg_inline1); | ||
f(dg_template<3>); | ||
} | ||
|
||
// CHECK: @_ZN2QL3dg1E = internal global %class.anon undef, align 1 | ||
// CHECK: @_ZN2QL3dg2E = internal global %class.anon.0 undef, align 1 | ||
// CHECK: @_ZN2QL10dg_inline1E = linkonce_odr global %class.anon.2 undef, comdat, align 1 | ||
// CHECK: @_ZN2QL11dg_templateILi3EEE = linkonce_odr global %class.anon.4 undef, comdat, align 1 | ||
|
||
// MSVC: @"?dg1@QL@@3V<lambda_0>@1@A" = internal global %class.anon undef, align 1 | ||
// MSVC: @"?dg2@QL@@3V<lambda_1>@1@A" = internal global %class.anon.0 undef, align 1 | ||
// MSVC: @"?dg_inline1@QL@@3V<lambda_1>@01@A" = linkonce_odr dso_local global %class.anon.2 undef, comdat, align 1 | ||
// MSVC: @"??$dg_template@$02@QL@@3V<lambda_1>@01@A" = linkonce_odr dso_local global %class.anon.4 undef, comdat, align 1 | ||
|
||
|
||
// CHECK: define internal void @"_Z1fIN2QL3$_0EEvT_" | ||
// CHECK: call noundef i32 @"_ZNK2QL3$_0clEv" | ||
// CHECK: define internal void @"_Z1fIN2QL3$_1EEvT_" | ||
// CHECK: define linkonce_odr void @_Z1fIN2QL10dg_inline1MUlvE_EEvT_ | ||
// CHECK: call noundef i32 @_ZNK2QL10dg_inline1MUlvE_clEv | ||
// CHECK: define linkonce_odr void @_Z1fIN2QL11dg_templateILi3EEMUlvE_EEvT_ | ||
// CHECK: call noundef i32 @_ZNK2QL11dg_templateILi3EEMUlvE_clEv | ||
// CHECK: define internal noundef i32 @"_ZNK2QL3$_0clEv" | ||
// CHECK: define internal noundef i32 @"_ZNK2QL3$_1clEv" | ||
// CHECK: define linkonce_odr noundef i32 @_ZNK2QL10dg_inline1MUlvE_clEv | ||
// CHECK: define linkonce_odr noundef i32 @_ZNK2QL11dg_templateILi3EEMUlvE_clEv | ||
|
||
// MSVC: define linkonce_odr dso_local void @"??$f@V<lambda_1>@dg_inline1@QL@@@@YAXV<lambda_1>@dg_inline1@QL@@@Z" | ||
// MSVC: call noundef i32 @"??R<lambda_1>@dg_inline1@QL@@QEBA?A?<auto>@@XZ" | ||
// MSVC: define linkonce_odr dso_local void @"??$f@V<lambda_1>@?$dg_template@$02@QL@@@@YAXV<lambda_1>@?$dg_template@$02@QL@@@Z" | ||
// MSVC: call noundef i32 @"??R<lambda_1>@?$dg_template@$02@QL@@QEBA?A?<auto>@@XZ" | ||
// MSVC: define internal noundef i32 @"??R<lambda_0>@QL@@QEBA?A?<auto>@@XZ" | ||
// MSVC: define internal noundef i32 @"??R<lambda_1>@QL@@QEBA?A?<auto>@@XZ" | ||
// MSVC: define linkonce_odr dso_local noundef i32 @"??R<lambda_1>@dg_inline1@QL@@QEBA?A?<auto>@@XZ" | ||
// MSVC: define linkonce_odr dso_local noundef i32 @"??R<lambda_1>@?$dg_template@$02@QL@@QEBA?A?<auto>@@XZ" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
// RUN: %clang_cc1 -fsycl-is-device -O0 -triple spirv64-unknown-unknown \ | ||
// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=DEVICE | ||
|
||
// RUN: %clang_cc1 -fsycl-is-host -O0 -triple spirv64-unknown-unknown \ | ||
// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST | ||
|
||
// RUN: %clang_cc1 -fsycl-is-device -emit-llvm \ | ||
// RUN: -aux-triple x86_64-pc-windows-msvc -triple spir-unknown--unknown \ | ||
// RUN: %s -o - | FileCheck %s --check-prefix=MSVC | ||
|
||
namespace QL { | ||
auto dg1 = [] { return 1; }; | ||
inline auto dg_inline1 = [] { return 1; }; | ||
} | ||
|
||
namespace QL { | ||
auto dg2 = [] { return 2; }; | ||
template<int N> | ||
auto dg_template = [] { return N; }; | ||
} | ||
|
||
using namespace QL; | ||
template<typename T> | ||
[[clang::sycl_kernel_entry_point(T)]] void f(T t) { | ||
t(); | ||
} | ||
|
||
void g() { | ||
f(dg1); | ||
f(dg2); | ||
f(dg_inline1); | ||
f(dg_template<3>); | ||
} | ||
|
||
// HOST: @_ZN2QL3dg1E = internal global %class.anon undef, align 1 | ||
// HOST: @_ZN2QL3dg2E = internal global %class.anon.0 undef, align 1 | ||
// HOST: @_ZN2QL10dg_inline1E = linkonce_odr global %class.anon.2 undef, comdat, align 1 | ||
// HOST: @_ZN2QL11dg_templateILi3EEE = linkonce_odr global %class.anon.4 undef, comdat, align 1 | ||
|
||
// DEVICE: define spir_kernel void @_ZTSN2QL3dg1MUlvE_E | ||
// DEVICE: call spir_func noundef i32 @_ZNK2QL3dg1MUlvE_clEv | ||
// DEVICE: define internal spir_func noundef i32 @_ZNK2QL3dg1MUlvE_clEv | ||
// DEVICE: define spir_kernel void @_ZTSN2QL3dg2MUlvE_E | ||
// DEVICE: call spir_func noundef i32 @_ZNK2QL3dg2MUlvE_clEv | ||
// DEVICE: define internal spir_func noundef i32 @_ZNK2QL3dg2MUlvE_clEv | ||
// DEVICE: define spir_kernel void @_ZTSN2QL10dg_inline1MUlvE_E | ||
// DEVICE: call spir_func noundef i32 @_ZNK2QL10dg_inline1MUlvE_clEv | ||
// DEVICE: define linkonce_odr spir_func noundef i32 @_ZNK2QL10dg_inline1MUlvE_clEv | ||
// DEVICE: define spir_kernel void @_ZTSN2QL11dg_templateILi3EEMUlvE_E | ||
// DEVICE: call spir_func noundef i32 @_ZNK2QL11dg_templateILi3EEMUlvE_clEv | ||
// DEVICE: define linkonce_odr spir_func noundef i32 @_ZNK2QL11dg_templateILi3EEMUlvE_clEv | ||
|
||
// HOST: define spir_func void @_Z1gv | ||
// HOST: call spir_func void @_Z1fIN2QL3dg1MUlvE_EEvT_ | ||
// HOST: call spir_func void @_Z1fIN2QL3dg2MUlvE_EEvT_ | ||
// HOST: call spir_func void @_Z1fIN2QL10dg_inline1MUlvE_EEvT_ | ||
// HOST: call spir_func void @_Z1fIN2QL11dg_templateILi3EEMUlvE_EEvT_ | ||
// HOST: define internal spir_func void @_Z1fIN2QL3dg1MUlvE_EEvT | ||
zahiraam marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// HOST: define internal spir_func void @_Z1fIN2QL3dg2MUlvE_EEvT_ | ||
// HOST: define linkonce_odr spir_func void @_Z1fIN2QL10dg_inline1MUlvE_EEvT_ | ||
// HOST: define linkonce_odr spir_func void @_Z1fIN2QL11dg_templateILi3EEMUlvE_EEvT_ | ||
|
||
// MSVC: define dso_local spir_kernel void @_ZTSN2QL3dg1MUlvE_E | ||
// MSVC: call spir_func noundef i32 @_ZNK2QL3dg1MUlvE_clEv | ||
// MSVC: define internal spir_func noundef i32 @_ZNK2QL3dg1MUlvE_clEv | ||
// MSVC: define dso_local spir_kernel void @_ZTSN2QL3dg2MUlvE_E | ||
// MSVC: call spir_func noundef i32 @_ZNK2QL3dg2MUlvE_clEv | ||
// MSVC: define internal spir_func noundef i32 @_ZNK2QL3dg2MUlvE_clEv | ||
// MSVC: define dso_local spir_kernel void @_ZTSN2QL10dg_inline1MUlvE_E | ||
// MSVC: call spir_func noundef i32 @_ZNK2QL10dg_inline1MUlvE_clEv | ||
// MSVC: define linkonce_odr spir_func noundef i32 @_ZNK2QL10dg_inline1MUlvE_clEv | ||
// MSVC: define dso_local spir_kernel void @_ZTSN2QL11dg_templateILi3EEMUlvE_E | ||
// MSVC: call spir_func noundef i32 @_ZNK2QL11dg_templateILi3EEMUlvE_clEv | ||
// MSVC: define linkonce_odr spir_func noundef i32 @_ZNK2QL11dg_templateILi3EEMUlvE_clEv | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've been looking at mangled names generated by GCC and Clang in order to convince myself that these changes won't cause other problems. The good news is that, I think, these changes are a strict improvement over the status quo. The bad news is, there appear to be a lot of problems with mangling of lambdas in Clang (and a few in GCC). I don't have a comprehensive set of cases to review, but I think the following case provides a good starting point for discussion. Tagging @zygoloid and @rjmccall for any thoughts they have. In the following, the "SYCL" annotation means Clang with CUDA, HIP, or SYCL enabled, it doesn't matter which. I chose SYCL because that is what I'm using in the examples. The annotated mangled names correspond to the generated call operator for the lambda expression that appears in each example. https://godbolt.org/z/j7roYMsEv
A few observations:
The current changes in this PR have no effect on this example. Ideally, additional changes would be made to include a The question for this PR is, how far should we go in addressing this and other cases? I have, or will have, additional test cases, but it might be best to file new issues for those and follow up on them separately. I suspect some of the other examples I have might warrant new issues for the Itanium C++ ABI to address. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
That lambda should have internal linkage (the relevant part of the ODR does not apply to lambdas in default template arguments), so that seems fine. Except that Clang fails to actually give the lambda internal linkage, which looks like a bug to me.
That's correct. Lambdas in default template arguments are not "owned" by the template and so shouldn't allocate mangling numbers within it. Note that their existence can differ across redeclarations in different TUs, so numbering them would mean the same entity could get different manglings in different TUs.
We'd previously agreed (on a different github issue, I could probably dig it up if needed) that we want to stop using the But separately from that, in the
What new requirements does SYCL have compared to C++ here? It looks like at a minimum we effectively want to treat all entities as visible-across-TUs for lambda mangling purposes, but as I noted above, we want to do that anyway. And those manglings should presumably exactly match the Itanium ones. But it sounds like there's more going on here: for lambdas that don't have a defined mangling at all under the ABI (where there's no context declaration, such as lambdas appearing in default arguments of functions or templates, or other exotic places that otherwise aren't within the scope of some lambda mangling context), SYCL needs a mangling scheme. How stable does that need to be? Would it be OK to number them within the translation unit, or within the namespace, as Clang happens to do at the moment? Or do you need a scheme that attempts to associate the lambdas with the declaration they end up being default arguments for, in some way? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks @zygoloid!
I see, ok, we can follow up on that.
Thank you! This is a very helpful insight that I was missing and failed to pick up from the standard wording. Assuming I'm reading things correctly, [basic.def.odr]p(16.11) also makes it clear that uses of such default template arguments result in distinct closure types in the contexts where those uses occur. The example in [basic.def.odr]p19 makes it clear that correspondence across TUs is required for default arguments of function parameters in member functions. The preceding text appears to treat default template arguments and function default arguments the same, so I presume the example is applicable to default template arguments as well. I think the example is more subtle than the description suggests though. Where it states, "The definition of Back to the ABI, my take away from this is that discriminators allocated from namespace and class contexts need not correspond across TUs; when correspondence is required, discriminators are allocated from more local declaration contexts (except in cases like those reported in itanium-cxx-abi/cxx-abi#165). If so, then it seems I need not worry (too much) about discriminators being allocated and unused or about the order in which they are allocated in those contexts. Another example to check what I'm learning; https://godbolt.org/z/M4fY4e3Px:
Per your reference to [basic.def.odr]p18, if GCC includes a For Clang, the situation looks much like the last case. Hmm, I think I'm rediscovering #143218.
That would be great.
Ok, we can follow up on that.
That is correct. The extra requirements for CUDA/HIP/SYCL stem from the need for types used in kernel invocations to correspond across the host target TU and each of the device target TUs, even for types that don't have external linkage. SYCL issue 454 seeks to clarify what kinds of heroics are required; at least in the face of preprocessor shenanigans. For SYCL, the correspondence concerns are limited to the type used to name the kernel, the lambda/class type used to define the kernel, and the types of its captures/data members. For CUDA/HIP, I think the correspondence concerns are limited to the types used as kernel arguments. I can provide examples if it would be helpful. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Oops, I wrote that while playing with a different test case and then switched to a different test case for the comment. I think the desired behavior is to use the declaration context for the point of use in each of these cases with a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes. I suspect we probably don't get that right just yet, and there's some ABI subtleties here that I think the Itanium ABI doesn't precisely describe yet (but fortunately there's not a lot of choice unless an implementation gets "clever"). For example: template<auto a = [] { static int n; return &n; }, auto b = [] { static int n; return &n; }>
int *f(bool x) { return x ? a() : b(); }
inline int *g(bool x) { return f(x); } Here, the two lambdas should be mangled within the context of function
Broadly, the rule is that the whole program must behave as if the class definition is only defined in one place, and somehow the textual repeats of it in other places act as if they were importing / referencing that one definition rather than adding another definition. So the behavior must be as if there is only one lambda-expression in the whole program -- regardless of how you reach the closure type, it's the same type. In practice this means we need to give it external (linkonce_odr) linkage and a mangling :) So, for example, if we had:
... then a call to
For namespace contexts: yes. The Itanium ABI never allocates discriminators for namespace context, and there's no way they could align across translation units anyway. Lambdas at namespace context should never be emitted with external linkage -- except that maybe SYCL wants some special rule here for things that are notionally internal but still need to link between host and device. For class contexts, we need cross-TU correspondence. (@rjmccall and I had a discussion a while back about trying to use a context more specific than the class where possible, to avoid the ABI relying on declaration order within a class definition, but as I recall we didn't come up with anything that we liked substantially more than just numbering lexically within the class for the long tail of cases where the lambda isn't within, say, a function declaration.)
Looks like everyone gets this wrong. Each template-id is supposed to perform a fresh substitution into the default template arguments, meaning that
I think GCC is getting this one approximately right. The context declaration here should be
The discriminator numbering used by GCC is obviously wrong, though. (The
I don't think that's right, but I'm not entirely certain, and I think there might be a wording issue here. The ODR says the behavior is as if the token sequences of the lambdas appeared within the inline function, and the lambdas would be equivalent under those terms. However, it also says in /18 that we should be "excluding entities defined within default arguments or default template arguments of either D or an entity not defined within D". But what does that mean for entities formed by instantiating a default template argument, especially one where the templated lambda appears within a class (or similar entity where we'd "normally" deduplicate the lambda across translation units? When the lambda is formed by instantiating a default [template] argument, we in general need for it to have the place where instantiation was triggered as its context declaration, and it seems fine for lambdas to be given an "identity" on that basis. I think the most reasonable and certainly the most useful interpretation is that the ODR does apply in those cases still, and therefore we need to give these instantiated lambdas a mangling in the context in which the instantiation happens. That'd mean: // ODR violation if this appears across TUs, each g would have a different lambda
void f(void (*arg)() = []{});
inline void g() { f(); }
// OK! Lambda is instantiated in the call to `f()`, so `g()` has its own lambda, and
// it's the same one each time, so no ODR violation.
template<typename = void>
void f(void (*arg)() = []{});
inline void g() { f(); } We probably need some CWG feedback on this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks @zygoloid! It is likely going to be at least a few weeks before I'm able to get back to this. When I do, I'll add more tests so that we have explicit examples in source files to document and iterate on. |
Uh oh!
There was an error while loading. Please reload this page.