-
Notifications
You must be signed in to change notification settings - Fork 12.7k
/
Copy pathAttrDocs.td
9029 lines (7300 loc) · 346 KB
/
AttrDocs.td
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//==--- AttrDocs.td - Attribute documentation ----------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===---------------------------------------------------------------------===//
// To test that the documentation builds cleanly, you must run clang-tblgen to
// convert the .td file into a .rst file, and then run sphinx to convert the
// .rst file into an HTML file. After completing testing, you should revert the
// generated .rst file so that the modified version does not get checked in to
// version control.
//
// To run clang-tblgen to generate the .rst file:
// clang-tblgen -gen-attr-docs -I <root>/llvm/tools/clang/include
// <root>/llvm/tools/clang/include/clang/Basic/Attr.td -o
// <root>/llvm/tools/clang/docs/AttributeReference.rst
//
// To run sphinx to generate the .html files (note that sphinx-build must be
// available on the PATH):
// Windows (from within the clang\docs directory):
// make.bat html
// Non-Windows (from within the clang\docs directory):
// sphinx-build -b html _build/html
def GlobalDocumentation {
code Intro =[{..
-------------------------------------------------------------------
NOTE: This file is automatically generated by running clang-tblgen
-gen-attr-docs. Do not edit this file by hand!!
-------------------------------------------------------------------
===================
Attributes in Clang
===================
.. contents::
:local:
.. |br| raw:: html
<br/>
Introduction
============
This page lists the attributes currently supported by Clang.
}];
}
def SectionDocs : Documentation {
let Category = DocCatVariable;
let Content = [{
The ``section`` attribute allows you to specify a specific section a
global variable or function should be in after translation.
}];
let Heading = "section, __declspec(allocate)";
}
def CodeModelDocs : Documentation {
let Category = DocCatVariable;
let Content = [{
The ``model`` attribute allows overriding the translation unit's
code model (specified by ``-mcmodel``) for a specific global variable.
}];
let Heading = "model";
}
def UsedDocs : Documentation {
let Category = DocCatFunction;
let Content = [{
This attribute, when attached to a function or variable definition, indicates
that there may be references to the entity which are not apparent in the source
code. For example, it may be referenced from inline ``asm``, or it may be
found through a dynamic symbol or section lookup.
The compiler must emit the definition even if it appears to be unused, and it
must not apply optimizations which depend on fully understanding how the entity
is used.
Whether this attribute has any effect on the linker depends on the target and
the linker. Most linkers support the feature of section garbage collection
(``--gc-sections``), also known as "dead stripping" (``ld64 -dead_strip``) or
discarding unreferenced sections (``link.exe /OPT:REF``). On COFF and Mach-O
targets (Windows and Apple platforms), the `used` attribute prevents symbols
from being removed by linker section GC. On ELF targets, it has no effect on its
own, and the linker may remove the definition if it is not otherwise referenced.
This linker GC can be avoided by also adding the ``retain`` attribute. Note
that ``retain`` requires special support from the linker; see that attribute's
documentation for further information.
}];
}
def RetainDocs : Documentation {
let Category = DocCatFunction;
let Content = [{
This attribute, when attached to a function or variable definition, prevents
section garbage collection in the linker. It does not prevent other discard
mechanisms, such as archive member selection, and COMDAT group resolution.
If the compiler does not emit the definition, e.g. because it was not used in
the translation unit or the compiler was able to eliminate all of the uses,
this attribute has no effect. This attribute is typically combined with the
``used`` attribute to force the definition to be emitted and preserved into the
final linked image.
This attribute is only necessary on ELF targets; other targets prevent section
garbage collection by the linker when using the ``used`` attribute alone.
Using the attributes together should result in consistent behavior across
targets.
This attribute requires the linker to support the ``SHF_GNU_RETAIN`` extension.
This support is available in GNU ``ld`` and ``gold`` as of binutils 2.36, as
well as in ``ld.lld`` 13.
}];
}
def InitPriorityDocs : Documentation {
let Category = DocCatVariable;
let Content = [{
In C++, the order in which global variables are initialized across translation
units is unspecified, unlike the ordering within a single translation unit. The
``init_priority`` attribute allows you to specify a relative ordering for the
initialization of objects declared at namespace scope in C++ within a single
linked image on supported platforms. The priority is given as an integer constant
expression between 101 and 65535 (inclusive). Priorities outside of that range are
reserved for use by the implementation. A lower value indicates a higher priority
of initialization. Note that only the relative ordering of values is important.
For example:
.. code-block:: c++
struct SomeType { SomeType(); };
__attribute__((init_priority(200))) SomeType Obj1;
__attribute__((init_priority(101))) SomeType Obj2;
``Obj2`` will be initialized *before* ``Obj1`` despite the usual order of
initialization being the opposite.
Note that this attribute does not control the initialization order of objects
across final linked image boundaries like shared objects and executables.
On Windows, ``init_seg(compiler)`` is represented with a priority of 200 and
``init_seg(library)`` is represented with a priority of 400. ``init_seg(user)``
uses the default 65535 priority.
On MachO platforms, this attribute also does not control the order of initialization
across translation units, where it only affects the order within a single TU.
This attribute is only supported for C++ and Objective-C++ and is ignored in
other language modes. Currently, this attribute is not implemented on z/OS.
}];
}
def InitSegDocs : Documentation {
let Category = DocCatVariable;
let Content = [{
The attribute applied by ``pragma init_seg()`` controls the section into
which global initialization function pointers are emitted. It is only
available with ``-fms-extensions``. Typically, this function pointer is
emitted into ``.CRT$XCU`` on Windows. The user can change the order of
initialization by using a different section name with the same
``.CRT$XC`` prefix and a suffix that sorts lexicographically before or
after the standard ``.CRT$XCU`` sections. See the init_seg_
documentation on MSDN for more information.
.. _init_seg: http://msdn.microsoft.com/en-us/library/7977wcck(v=vs.110).aspx
}];
}
def TLSModelDocs : Documentation {
let Category = DocCatVariable;
let Content = [{
The ``tls_model`` attribute allows you to specify which thread-local storage
model to use. It accepts the following strings:
* global-dynamic
* local-dynamic
* initial-exec
* local-exec
TLS models are mutually exclusive.
}];
}
def DLLExportDocs : Documentation {
let Category = DocCatVariable;
let Content = [{
The ``__declspec(dllexport)`` attribute declares a variable, function, or
Objective-C interface to be exported from the module. It is available under the
``-fdeclspec`` flag for compatibility with various compilers. The primary use
is for COFF object files which explicitly specify what interfaces are available
for external use. See the dllexport_ documentation on MSDN for more
information.
.. _dllexport: https://msdn.microsoft.com/en-us/library/3y1sfaz2.aspx
}];
}
def DLLImportDocs : Documentation {
let Category = DocCatVariable;
let Content = [{
The ``__declspec(dllimport)`` attribute declares a variable, function, or
Objective-C interface to be imported from an external module. It is available
under the ``-fdeclspec`` flag for compatibility with various compilers. The
primary use is for COFF object files which explicitly specify what interfaces
are imported from external modules. See the dllimport_ documentation on MSDN
for more information.
Note that a dllimport function may still be inlined, if its definition is
available and it doesn't reference any non-dllimport functions or global
variables.
.. _dllimport: https://msdn.microsoft.com/en-us/library/3y1sfaz2.aspx
}];
}
def ThreadDocs : Documentation {
let Category = DocCatVariable;
let Content = [{
The ``__declspec(thread)`` attribute declares a variable with thread local
storage. It is available under the ``-fms-extensions`` flag for MSVC
compatibility. See the documentation for `__declspec(thread)`_ on MSDN.
.. _`__declspec(thread)`: http://msdn.microsoft.com/en-us/library/9w1sdazb.aspx
In Clang, ``__declspec(thread)`` is generally equivalent in functionality to the
GNU ``__thread`` keyword. The variable must not have a destructor and must have
a constant initializer, if any. The attribute only applies to variables
declared with static storage duration, such as globals, class static data
members, and static locals.
}];
}
def NoEscapeDocs : Documentation {
let Category = DocCatVariable;
let Content = [{
``noescape`` placed on a function parameter of a pointer type is used to inform
the compiler that the pointer cannot escape: that is, no reference to the object
the pointer points to that is derived from the parameter value will survive
after the function returns. Users are responsible for making sure parameters
annotated with ``noescape`` do not actually escape. Calling ``free()`` on such
a parameter does not constitute an escape.
For example:
.. code-block:: c
int *gp;
void nonescapingFunc(__attribute__((noescape)) int *p) {
*p += 100; // OK.
}
void escapingFunc(__attribute__((noescape)) int *p) {
gp = p; // Not OK.
}
Additionally, when the parameter is a `block pointer
<https://clang.llvm.org/docs/BlockLanguageSpec.html>`, the same restriction
applies to copies of the block. For example:
.. code-block:: c
typedef void (^BlockTy)();
BlockTy g0, g1;
void nonescapingFunc(__attribute__((noescape)) BlockTy block) {
block(); // OK.
}
void escapingFunc(__attribute__((noescape)) BlockTy block) {
g0 = block; // Not OK.
g1 = Block_copy(block); // Not OK either.
}
}];
}
def MaybeUndefDocs : Documentation {
let Category = DocCatVariable;
let Content = [{
The ``maybe_undef`` attribute can be placed on a function parameter. It indicates
that the parameter is allowed to use undef values. It informs the compiler
to insert a freeze LLVM IR instruction on the function parameter.
Please note that this is an attribute that is used as an internal
implementation detail and not intended to be used by external users.
In languages HIP, CUDA etc., some functions have multi-threaded semantics and
it is enough for only one or some threads to provide defined arguments.
Depending on semantics, undef arguments in some threads don't produce
undefined results in the function call. Since, these functions accept undefined
arguments, ``maybe_undef`` attribute can be placed.
Sample usage:
.. code-block:: c
void maybeundeffunc(int __attribute__((maybe_undef))param);
}];
}
def CarriesDependencyDocs : Documentation {
let Category = DocCatFunction;
let Content = [{
The ``carries_dependency`` attribute specifies dependency propagation into and
out of functions.
When specified on a function or Objective-C method, the ``carries_dependency``
attribute means that the return value carries a dependency out of the function,
so that the implementation need not constrain ordering upon return from that
function. Implementations of the function and its caller may choose to preserve
dependencies instead of emitting memory ordering instructions such as fences.
Note, this attribute does not change the meaning of the program, but may result
in generation of more efficient code.
}];
}
def CPUSpecificCPUDispatchDocs : Documentation {
let Category = DocCatFunction;
let Content = [{
The ``cpu_specific`` and ``cpu_dispatch`` attributes are used to define and
resolve multiversioned functions. This form of multiversioning provides a
mechanism for declaring versions across translation units and manually
specifying the resolved function list. A specified CPU defines a set of minimum
features that are required for the function to be called. The result of this is
that future processors execute the most restrictive version of the function the
new processor can execute.
In addition, unlike the ICC implementation of this feature, the selection of the
version does not consider the manufacturer or microarchitecture of the processor.
It tests solely the list of features that are both supported by the specified
processor and present in the compiler-rt library. This can be surprising at times,
as the runtime processor may be from a completely different manufacturer, as long
as it supports the same feature set.
This can additionally be surprising, as some processors are indistringuishable from
others based on the list of testable features. When this happens, the variant
is selected in an unspecified manner.
Function versions are defined with ``cpu_specific``, which takes one or more CPU
names as a parameter. For example:
.. code-block:: c
// Declares and defines the ivybridge version of single_cpu.
__attribute__((cpu_specific(ivybridge)))
void single_cpu(void){}
// Declares and defines the atom version of single_cpu.
__attribute__((cpu_specific(atom)))
void single_cpu(void){}
// Declares and defines both the ivybridge and atom version of multi_cpu.
__attribute__((cpu_specific(ivybridge, atom)))
void multi_cpu(void){}
A dispatching (or resolving) function can be declared anywhere in a project's
source code with ``cpu_dispatch``. This attribute takes one or more CPU names
as a parameter (like ``cpu_specific``). Functions marked with ``cpu_dispatch``
are not expected to be defined, only declared. If such a marked function has a
definition, any side effects of the function are ignored; trivial function
bodies are permissible for ICC compatibility.
.. code-block:: c
// Creates a resolver for single_cpu above.
__attribute__((cpu_dispatch(ivybridge, atom)))
void single_cpu(void){}
// Creates a resolver for multi_cpu, but adds a 3rd version defined in another
// translation unit.
__attribute__((cpu_dispatch(ivybridge, atom, sandybridge)))
void multi_cpu(void){}
Note that it is possible to have a resolving function that dispatches based on
more or fewer options than are present in the program. Specifying fewer will
result in the omitted options not being considered during resolution. Specifying
a version for resolution that isn't defined in the program will result in a
linking failure.
It is also possible to specify a CPU name of ``generic`` which will be resolved
if the executing processor doesn't satisfy the features required in the CPU
name. The behavior of a program executing on a processor that doesn't satisfy
any option of a multiversioned function is undefined.
}];
}
def SYCLKernelDocs : Documentation {
let Category = DocCatFunction;
let Content = [{
The ``sycl_kernel`` attribute specifies that a function template will be used
to outline device code and to generate an OpenCL kernel.
Here is a code example of the SYCL program, which demonstrates the compiler's
outlining job:
.. code-block:: c++
int foo(int x) { return ++x; }
using namespace cl::sycl;
queue Q;
buffer<int, 1> a(range<1>{1024});
Q.submit([&](handler& cgh) {
auto A = a.get_access<access::mode::write>(cgh);
cgh.parallel_for<init_a>(range<1>{1024}, [=](id<1> index) {
A[index] = index[0] + foo(42);
});
}
A C++ function object passed to the ``parallel_for`` is called a "SYCL kernel".
A SYCL kernel defines the entry point to the "device part" of the code. The
compiler will emit all symbols accessible from a "kernel". In this code
example, the compiler will emit "foo" function. More details about the
compilation of functions for the device part can be found in the SYCL 1.2.1
specification Section 6.4.
To show to the compiler entry point to the "device part" of the code, the SYCL
runtime can use the ``sycl_kernel`` attribute in the following way:
.. code-block:: c++
namespace cl {
namespace sycl {
class handler {
template <typename KernelName, typename KernelType/*, ...*/>
__attribute__((sycl_kernel)) void sycl_kernel_function(KernelType KernelFuncObj) {
// ...
KernelFuncObj();
}
template <typename KernelName, typename KernelType, int Dims>
void parallel_for(range<Dims> NumWorkItems, KernelType KernelFunc) {
#ifdef __SYCL_DEVICE_ONLY__
sycl_kernel_function<KernelName, KernelType, Dims>(KernelFunc);
#else
// Host implementation
#endif
}
};
} // namespace sycl
} // namespace cl
The compiler will also generate an OpenCL kernel using the function marked with
the ``sycl_kernel`` attribute.
Here is the list of SYCL device compiler expectations with regard to the
function marked with the ``sycl_kernel`` attribute:
- The function must be a template with at least two type template parameters.
The compiler generates an OpenCL kernel and uses the first template parameter
as a unique name for the generated OpenCL kernel. The host application uses
this unique name to invoke the OpenCL kernel generated for the SYCL kernel
specialized by this name and second template parameter ``KernelType`` (which
might be an unnamed function object type).
- The function must have at least one parameter. The first parameter is
required to be a function object type (named or unnamed i.e. lambda). The
compiler uses function object type fields to generate OpenCL kernel
parameters.
- The function must return void. The compiler reuses the body of marked functions to
generate the OpenCL kernel body, and the OpenCL kernel must return ``void``.
The SYCL kernel in the previous code sample meets these expectations.
}];
}
def SYCLKernelEntryPointDocs : Documentation {
let Category = DocCatFunction;
let Content = [{
The ``sycl_kernel_entry_point`` attribute facilitates the generation of an
offload kernel entry point, sometimes called a SYCL kernel caller function,
suitable for invoking a SYCL kernel on an offload device. The attribute is
intended for use in the implementation of SYCL kernel invocation functions
like the ``single_task`` and ``parallel_for`` member functions of the
``sycl::handler`` class specified in section 4.9.4, "Command group ``handler``
class", of the SYCL 2020 specification.
The attribute requires a single type argument that specifies a class type that
meets the requirements for a SYCL kernel name as described in section 5.2,
"Naming of kernels", of the SYCL 2020 specification. A unique kernel name type
is required for each function declared with the attribute. The attribute may
not first appear on a declaration that follows a definition of the function.
The attribute only appertains to functions and only those that meet the
following requirements.
* Has a non-deduced ``void`` return type.
* Is not a non-static member function, constructor, or destructor.
* Is not a C variadic function.
* Is not a coroutine.
* Is not defined as deleted or as defaulted.
* Is not defined with a function try block.
* Is not declared with the ``constexpr`` or ``consteval`` specifiers.
* Is not declared with the ``[[noreturn]]`` attribute.
Use in the implementation of a SYCL kernel invocation function might look as
follows.
.. code-block:: c++
namespace sycl {
class handler {
template<typename KernelNameType, typename KernelType>
[[ clang::sycl_kernel_entry_point(KernelNameType) ]]
static void kernel_entry_point(KernelType kernel) {
kernel();
}
public:
template<typename KernelNameType, typename KernelType>
void single_task(KernelType kernel) {
// Call kernel_entry_point() to trigger generation of an offload
// kernel entry point.
kernel_entry_point<KernelNameType>(kernel);
// Call functions appropriate for the desired offload backend
// (OpenCL, CUDA, HIP, Level Zero, etc...).
}
};
} // namespace sycl
A SYCL kernel is a callable object of class type that is constructed on a host,
often via a lambda expression, and then passed to a SYCL kernel invocation
function to be executed on an offload device. A SYCL kernel invocation function
is responsible for copying the provided SYCL kernel object to an offload
device and initiating a call to it. The SYCL kernel object and its data members
constitute the parameters of an offload kernel.
A SYCL kernel type is required to satisfy the device copyability requirements
specified in section 3.13.1, "Device copyable", of the SYCL 2020 specification.
Additionally, any data members of the kernel object type are required to satisfy
section 4.12.4, "Rules for parameter passing to kernels". For most types, these
rules require that the type is trivially copyable. However, the SYCL
specification mandates that certain special SYCL types, such as
``sycl::accessor`` and ``sycl::stream`` be device copyable even if they are not
trivially copyable. These types require special handling because they cannot
be copied to device memory as if by ``memcpy()``. Additionally, some offload
backends, OpenCL for example, require objects of some of these types to be
passed as individual arguments to the offload kernel.
An offload kernel consists of an entry point function that declares the
parameters of the offload kernel and the set of all functions and variables that
are directly or indirectly used by the entry point function.
A SYCL kernel invocation function invokes a SYCL kernel on a device by
performing the following tasks (likely with the help of an offload backend
like OpenCL):
#. Identifying the offload kernel entry point to be used for the SYCL kernel.
#. Deconstructing the SYCL kernel object, if necessary, to produce the set of
offload kernel arguments required by the offload kernel entry point.
#. Copying the offload kernel arguments to device memory.
#. Initiating execution of the offload kernel entry point.
The offload kernel entry point for a SYCL kernel performs the following tasks:
#. Reconstituting the SYCL kernel object, if necessary, using the offload
kernel parameters.
#. Calling the ``operator()`` member function of the (reconstituted) SYCL kernel
object.
The ``sycl_kernel_entry_point`` attribute automates generation of an offload
kernel entry point that performs those latter tasks. The parameters and body of
a function declared with the ``sycl_kernel_entry_point`` attribute specify a
pattern from which the parameters and body of the entry point function are
derived. Consider the following call to a SYCL kernel invocation function.
.. code-block:: c++
struct S { int i; };
void f(sycl::handler &handler, sycl::stream &sout, S s) {
handler.single_task<struct KN>([=] {
sout << "The value of s.i is " << s.i << "\n";
});
}
The SYCL kernel object is the result of the lambda expression. It has two
data members corresponding to the captures of ``sout`` and ``s``. Since one
of these data members corresponds to a special SYCL type that must be passed
individually as an offload kernel parameter, it is necessary to decompose the
SYCL kernel object into its constituent parts; the offload kernel will have
two kernel parameters. Given a SYCL implementation that uses a
``sycl_kernel_entry_point`` attributed function like the one shown above, an
offload kernel entry point function will be generated that looks approximately
as follows.
.. code-block:: c++
void sycl-kernel-caller-for-KN(sycl::stream sout, S s) {
kernel-type kernel = { sout, s );
kernel();
}
There are a few items worthy of note:
#. The name of the generated function incorporates the SYCL kernel name,
``KN``, that was passed as the ``KernelNameType`` template parameter to
``kernel_entry_point()`` and provided as the argument to the
``sycl_kernel_entry_point`` attribute. There is a one-to-one correspondence
between SYCL kernel names and offload kernel entry points.
#. The SYCL kernel is a lambda closure type and therefore has no name;
``kernel-type`` is substituted above and corresponds to the ``KernelType``
template parameter deduced in the call to ``kernel_entry_point()``.
Lambda types cannot be declared and initialized using the aggregate
initialization syntax used above, but the intended behavior should be clear.
#. ``S`` is a device copyable type that does not directly or indirectly contain
a data member of a SYCL special type. It therefore does not need to be
decomposed into its constituent members to be passed as a kernel argument.
#. The depiction of the ``sycl::stream`` parameter as a single self contained
kernel parameter is an oversimplification. SYCL special types may require
additional decomposition such that the generated function might have three
or more parameters depending on how the SYCL library implementation defines
these types.
#. The call to ``kernel_entry_point()`` has no effect other than to trigger
emission of the entry point function. The statments that make up the body
of the function are not executed when the function is called; they are
only used in the generation of the entry point function.
It is not necessary for a function declared with the ``sycl_kernel_entry_point``
attribute to be called for the offload kernel entry point to be emitted. For
inline functions and function templates, any ODR-use will suffice. For other
functions, an ODR-use is not required; the offload kernel entry point will be
emitted if the function is defined.
Functions declared with the ``sycl_kernel_entry_point`` attribute are not
limited to the simple example shown above. They may have additional template
parameters, declare additional function parameters, and have complex control
flow in the function body. Function parameter decomposition and reconstitution
is performed for all function parameters. The function must abide by the
language feature restrictions described in section 5.4, "Language restrictions
for device functions" in the SYCL 2020 specification.
}];
}
def SYCLSpecialClassDocs : Documentation {
let Category = DocCatStmt;
let Content = [{
SYCL defines some special classes (accessor, sampler, and stream) which require
specific handling during the generation of the SPIR entry point.
The ``__attribute__((sycl_special_class))`` attribute is used in SYCL
headers to indicate that a class or a struct needs a specific handling when
it is passed from host to device.
Special classes will have a mandatory ``__init`` method and an optional
``__finalize`` method (the ``__finalize`` method is used only with the
``stream`` type). Kernel parameters types are extract from the ``__init`` method
parameters. The kernel function arguments list is derived from the
arguments of the ``__init`` method. The arguments of the ``__init`` method are
copied into the kernel function argument list and the ``__init`` and
``__finalize`` methods are called at the beginning and the end of the kernel,
respectively.
The ``__init`` and ``__finalize`` methods must be defined inside the
special class.
Please note that this is an attribute that is used as an internal
implementation detail and not intended to be used by external users.
The syntax of the attribute is as follows:
.. code-block:: text
class __attribute__((sycl_special_class)) accessor {};
class [[clang::sycl_special_class]] accessor {};
This is a code example that illustrates the use of the attribute:
.. code-block:: c++
class __attribute__((sycl_special_class)) SpecialType {
int F1;
int F2;
void __init(int f1) {
F1 = f1;
F2 = f1;
}
void __finalize() {}
public:
SpecialType() = default;
int getF2() const { return F2; }
};
int main () {
SpecialType T;
cgh.single_task([=] {
T.getF2();
});
}
This would trigger the following kernel entry point in the AST:
.. code-block:: c++
void __sycl_kernel(int f1) {
SpecialType T;
T.__init(f1);
...
T.__finalize()
}
}];
}
def C11NoReturnDocs : Documentation {
let Category = DocCatFunction;
let Content = [{
A function declared as ``_Noreturn`` shall not return to its caller. The
compiler will generate a diagnostic for a function declared as ``_Noreturn``
that appears to be capable of returning to its caller. Despite being a type
specifier, the ``_Noreturn`` attribute cannot be specified on a function
pointer type.
}];
}
def CXX11NoReturnDocs : Documentation {
let Category = DocCatFunction;
let Heading = "noreturn, _Noreturn";
let Content = [{
A function declared as ``[[noreturn]]`` shall not return to its caller. The
compiler will generate a diagnostic for a function declared as ``[[noreturn]]``
that appears to be capable of returning to its caller.
The ``[[_Noreturn]]`` spelling is deprecated and only exists to ease code
migration for code using ``[[noreturn]]`` after including ``<stdnoreturn.h>``.
}];
}
def NoMergeDocs : Documentation {
let Category = DocCatStmt;
let Content = [{
If a statement is marked ``nomerge`` and contains call expressions, those call
expressions inside the statement will not be merged during optimization. This
attribute can be used to prevent the optimizer from obscuring the source
location of certain calls. For example, it will prevent tail merging otherwise
identical code sequences that raise an exception or terminate the program. Tail
merging normally reduces the precision of source location information, making
stack traces less useful for debugging. This attribute gives the user control
over the tradeoff between code size and debug information precision.
``nomerge`` attribute can also be used as function attribute to prevent all
calls to the specified function from merging. It has no effect on indirect
calls to such functions. For example:
.. code-block:: c++
[[clang::nomerge]] void foo(int) {}
void bar(int x) {
auto *ptr = foo;
if (x) foo(1); else foo(2); // will not be merged
if (x) ptr(1); else ptr(2); // indirect call, can be merged
}
``nomerge`` attribute can also be used for pointers to functions to
prevent calls through such pointer from merging. In such case the
effect applies only to a specific function pointer. For example:
.. code-block:: c++
[[clang::nomerge]] void (*foo)(int);
void bar(int x) {
auto *ptr = foo;
if (x) foo(1); else foo(2); // will not be merged
if (x) ptr(1); else ptr(2); // 'ptr' has no 'nomerge' attribute, can be merged
}
}];
}
def NoInlineDocs : Documentation {
let Category = DocCatFunction;
let Content = [{
This function attribute suppresses the inlining of a function at the call sites
of the function.
``[[clang::noinline]]`` spelling can be used as a statement attribute; other
spellings of the attribute are not supported on statements. If a statement is
marked ``[[clang::noinline]]`` and contains calls, those calls inside the
statement will not be inlined by the compiler.
``__noinline__`` can be used as a keyword in CUDA/HIP languages. This is to
avoid diagnostics due to usage of ``__attribute__((__noinline__))``
with ``__noinline__`` defined as a macro as ``__attribute__((noinline))``.
.. code-block:: c
int example(void) {
int r;
[[clang::noinline]] foo();
[[clang::noinline]] r = bar();
return r;
}
}];
}
def MustTailDocs : Documentation {
let Category = DocCatStmt;
let Content = [{
If a ``return`` statement is marked ``musttail``, this indicates that the
compiler must generate a tail call for the program to be correct, even when
optimizations are disabled. This guarantees that the call will not cause
unbounded stack growth if it is part of a recursive cycle in the call graph.
If the callee is a virtual function that is implemented by a thunk, there is
no guarantee in general that the thunk tail-calls the implementation of the
virtual function, so such a call in a recursive cycle can still result in
unbounded stack growth.
``clang::musttail`` can only be applied to a ``return`` statement whose value
is the result of a function call (even functions returning void must use
``return``, although no value is returned). The target function must have the
same number of arguments as the caller. The types of the return value and all
arguments must be similar according to C++ rules (differing only in cv
qualifiers or array size), including the implicit "this" argument, if any.
Any variables in scope, including all arguments to the function and the
return value must be trivially destructible. The calling convention of the
caller and callee must match, and they must not be variadic functions or have
old style K&R C function declarations.
The lifetimes of all local variables and function parameters end immediately
before the call to the function. This means that it is undefined behaviour to
pass a pointer or reference to a local variable to the called function, which
is not the case without the attribute. Clang will emit a warning in common
cases where this happens.
``clang::musttail`` provides assurances that the tail call can be optimized on
all targets, not just one.
}];
}
def AssertCapabilityDocs : Documentation {
let Category = DocCatFunction;
let Heading = "assert_capability, assert_shared_capability";
let Content = [{
Marks a function that dynamically tests whether a capability is held, and halts
the program if it is not held.
}];
}
def AcquireCapabilityDocs : Documentation {
let Category = DocCatFunction;
let Heading = "acquire_capability, acquire_shared_capability";
let Content = [{
Marks a function as acquiring a capability.
}];
}
def TryAcquireCapabilityDocs : Documentation {
let Category = DocCatFunction;
let Heading = "try_acquire_capability, try_acquire_shared_capability";
let Content = [{
Marks a function that attempts to acquire a capability. This function may fail to
actually acquire the capability; they accept a Boolean value determining
whether acquiring the capability means success (true), or failing to acquire
the capability means success (false).
}];
}
def ReleaseCapabilityDocs : Documentation {
let Category = DocCatFunction;
let Heading = "release_capability, release_shared_capability";
let Content = [{
Marks a function as releasing a capability.
}];
}
def AssumeAlignedDocs : Documentation {
let Category = DocCatFunction;
let Content = [{
Use ``__attribute__((assume_aligned(<alignment>[,<offset>]))`` on a function
declaration to specify that the return value of the function (which must be a
pointer type) has the specified offset, in bytes, from an address with the
specified alignment. The offset is taken to be zero if omitted.
.. code-block:: c++
// The returned pointer value has 32-byte alignment.
void *a() __attribute__((assume_aligned (32)));
// The returned pointer value is 4 bytes greater than an address having
// 32-byte alignment.
void *b() __attribute__((assume_aligned (32, 4)));
Note that this attribute provides information to the compiler regarding a
condition that the code already ensures is true. It does not cause the compiler
to enforce the provided alignment assumption.
}];
}
def AllocSizeDocs : Documentation {
let Category = DocCatFunction;
let Content = [{
The ``alloc_size`` attribute can be placed on functions that return pointers in
order to hint to the compiler how many bytes of memory will be available at the
returned pointer. ``alloc_size`` takes one or two arguments.
- ``alloc_size(N)`` implies that argument number N equals the number of
available bytes at the returned pointer.
- ``alloc_size(N, M)`` implies that the product of argument number N and
argument number M equals the number of available bytes at the returned
pointer.
Argument numbers are 1-based.
An example of how to use ``alloc_size``
.. code-block:: c
void *my_malloc(int a) __attribute__((alloc_size(1)));
void *my_calloc(int a, int b) __attribute__((alloc_size(1, 2)));
int main() {
void *const p = my_malloc(100);
assert(__builtin_object_size(p, 0) == 100);
void *const a = my_calloc(20, 5);
assert(__builtin_object_size(a, 0) == 100);
}
.. Note:: This attribute works differently in clang than it does in GCC.
Specifically, clang will only trace ``const`` pointers (as above); we give up
on pointers that are not marked as ``const``. In the vast majority of cases,
this is unimportant, because LLVM has support for the ``alloc_size``
attribute. However, this may cause mildly unintuitive behavior when used with
other attributes, such as ``enable_if``.
}];
}
def CodeSegDocs : Documentation {
let Category = DocCatFunction;
let Content = [{
The ``__declspec(code_seg)`` attribute enables the placement of code into separate
named segments that can be paged or locked in memory individually. This attribute
is used to control the placement of instantiated templates and compiler-generated
code. See the documentation for `__declspec(code_seg)`_ on MSDN.
.. _`__declspec(code_seg)`: http://msdn.microsoft.com/en-us/library/dn636922.aspx
}];
}
def AllocAlignDocs : Documentation {
let Category = DocCatFunction;
let Content = [{
Use ``__attribute__((alloc_align(<alignment>))`` on a function
declaration to specify that the return value of the function (which must be a
pointer type) is at least as aligned as the value of the indicated parameter. The
parameter is given by its index in the list of formal parameters; the first
parameter has index 1 unless the function is a C++ non-static member function,
in which case the first parameter has index 2 to account for the implicit ``this``
parameter.
.. code-block:: c++
// The returned pointer has the alignment specified by the first parameter.
void *a(size_t align) __attribute__((alloc_align(1)));
// The returned pointer has the alignment specified by the second parameter.
void *b(void *v, size_t align) __attribute__((alloc_align(2)));
// The returned pointer has the alignment specified by the second visible
// parameter, however it must be adjusted for the implicit 'this' parameter.
void *Foo::b(void *v, size_t align) __attribute__((alloc_align(3)));
Note that this attribute merely informs the compiler that a function always
returns a sufficiently aligned pointer. It does not cause the compiler to
emit code to enforce that alignment. The behavior is undefined if the returned
pointer is not sufficiently aligned.
}];
}
def EnableIfDocs : Documentation {
let Category = DocCatFunction;
let Content = [{
.. Note:: Some features of this attribute are experimental. The meaning of
multiple enable_if attributes on a single declaration is subject to change in
a future version of clang. Also, the ABI is not standardized and the name
mangling may change in future versions. To avoid that, use asm labels.
The ``enable_if`` attribute can be placed on function declarations to control
which overload is selected based on the values of the function's arguments.
When combined with the ``overloadable`` attribute, this feature is also
available in C.
.. code-block:: c++
int isdigit(int c);
int isdigit(int c) __attribute__((enable_if(c <= -1 || c > 255, "chosen when 'c' is out of range"))) __attribute__((unavailable("'c' must have the value of an unsigned char or EOF")));
void foo(char c) {
isdigit(c);
isdigit(10);
isdigit(-10); // results in a compile-time error.
}
The enable_if attribute takes two arguments, the first is an expression written
in terms of the function parameters, the second is a string explaining why this
overload candidate could not be selected to be displayed in diagnostics. The
expression is part of the function signature for the purposes of determining
whether it is a redeclaration (following the rules used when determining