-
Notifications
You must be signed in to change notification settings - Fork 18
/
atomics.cc
811 lines (765 loc) · 30.8 KB
/
atomics.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
//===-- atomics.cc - Implement OpenMP atomic operations -------*- C++ -*-===//
//
// Part of the LOMP project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file contains the implementation of OpenMP atomic operations for the cases
/// when they are not inlined by the compiler.
/// Where possible we just delegate to std::atomic, however for operations on
/// floats, complex, ... which are not supported by C++11's std::atomic we use
/// our own cmpxchg based implementation.
/// We also optimize some operations such as max, min, && || which do not necessarily
/// require an update.
// This description comes from the kmp_atomic.cpp file in teh LLVM OpenMP runtime.
// Here we do not bother to implement some of these operations or data types, since
// their use is rare. (For instance, the float10 ops, and complex<float10> ops).
/*!
@defgroup ATOMIC_OPS Atomic Operations
These functions are used for implementing the many different varieties of atomic
operations.
The compiler is at liberty to inline atomic operations that are naturally
supported by the target architecture. For instance on IA-32 architecture an
atomic like this can be inlined
@code
static int s = 0;
#pragma omp atomic
s++;
@endcode
using the single instruction: `lock; incl s`
However the runtime does provide entrypoints for these operations to support
compilers that choose not to inline them. (For instance,
`__kmpc_atomic_fixed4_add` could be used to perform the increment above.)
The names of the functions are encoded by using the data type name and the
operation name, as in these tables.
Data Type | Data type encoding
-----------|---------------
int8_t | `fixed1`
uint8_t | `fixed1u`
int16_t | `fixed2`
uint16_t | `fixed2u`
int32_t | `fixed4`
uint32_t | `fixed4u`
int64_t | `fixed8`
uint64_t | `fixed8u`
float | `float4`
double | `float8`
float 10 (8087 eighty bit float) | `float10`
complex<float> | `cmplx4`
complex<double> | `cmplx8`
complex<float10> | `cmplx10`
<br>
Operation | Operation encoding
----------|-------------------
+ | add
- | sub
\* | mul
/ | div
& | andb
<< | shl
\>\> | shr
\| | orb
^ | xor
&& | andl
\|\| | orl
maximum | max
minimum | min
.eqv. | eqv
.neqv. | neqv
<br>
For non-commutative operations, `_rev` can also be added for the reversed
operation. For the functions that capture the result, the suffix `_cpt` is
added.
Update Functions
================
The general form of an atomic function that just performs an update (without a
`capture`)
@code
void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE *
lhs, TYPE rhs );
@endcode
@param ident_t a pointer to source location
@param gtid the global thread id
@param lhs a pointer to the left operand
@param rhs the right operand
`capture` functions
===================
The capture functions perform an atomic update and return a result, which is
either the value before the capture, or that after. They take an additional
argument to determine which result is returned.
Their general form is therefore
@code
TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE *
lhs, TYPE rhs, int flag );
@endcode
@param ident_t a pointer to source location
@param gtid the global thread id
@param lhs a pointer to the left operand
@param rhs the right operand
@param flag one if the result is to be captured *after* the operation, zero if
captured *before*.
The one set of exceptions to this is the `complex<float>` type where the value
is not returned, rather an extra argument pointer is passed.
They look like
@code
void __kmpc_atomic_cmplx4_<op>_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 *
lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
@endcode
Read and Write Operations
=========================
The OpenMP<sup>*</sup> standard now supports atomic operations that simply
ensure that the value is read or written atomically, with no modification
performed. In many cases on IA-32 architecture these operations can be inlined
since the architecture guarantees that no tearing occurs on aligned objects
accessed with a single memory operation of up to 64 bits in size.
The general form of the read operations is
@code
TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc );
@endcode
For the write operations the form is
@code
void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs);
@endcode
Full list of functions
======================
This leads to the generation of 376 atomic functions, as follows.
Functons for integers
---------------------
There are versions here for integers of size 1,2,4 and 8 bytes both signed and
unsigned (where that matters).
@code
__kmpc_atomic_fixed1_add
__kmpc_atomic_fixed1_add_cpt
__kmpc_atomic_fixed1_add_fp
__kmpc_atomic_fixed1_andb
__kmpc_atomic_fixed1_andb_cpt
__kmpc_atomic_fixed1_andl
__kmpc_atomic_fixed1_andl_cpt
__kmpc_atomic_fixed1_div
__kmpc_atomic_fixed1_div_cpt
__kmpc_atomic_fixed1_div_cpt_rev
__kmpc_atomic_fixed1_div_float8
__kmpc_atomic_fixed1_div_fp
__kmpc_atomic_fixed1_div_rev
__kmpc_atomic_fixed1_eqv
__kmpc_atomic_fixed1_eqv_cpt
__kmpc_atomic_fixed1_max
__kmpc_atomic_fixed1_max_cpt
__kmpc_atomic_fixed1_min
__kmpc_atomic_fixed1_min_cpt
__kmpc_atomic_fixed1_mul
__kmpc_atomic_fixed1_mul_cpt
__kmpc_atomic_fixed1_mul_float8
__kmpc_atomic_fixed1_mul_fp
__kmpc_atomic_fixed1_neqv
__kmpc_atomic_fixed1_neqv_cpt
__kmpc_atomic_fixed1_orb
__kmpc_atomic_fixed1_orb_cpt
__kmpc_atomic_fixed1_orl
__kmpc_atomic_fixed1_orl_cpt
__kmpc_atomic_fixed1_rd
__kmpc_atomic_fixed1_shl
__kmpc_atomic_fixed1_shl_cpt
__kmpc_atomic_fixed1_shl_cpt_rev
__kmpc_atomic_fixed1_shl_rev
__kmpc_atomic_fixed1_shr
__kmpc_atomic_fixed1_shr_cpt
__kmpc_atomic_fixed1_shr_cpt_rev
__kmpc_atomic_fixed1_shr_rev
__kmpc_atomic_fixed1_sub
__kmpc_atomic_fixed1_sub_cpt
__kmpc_atomic_fixed1_sub_cpt_rev
__kmpc_atomic_fixed1_sub_fp
__kmpc_atomic_fixed1_sub_rev
__kmpc_atomic_fixed1_swp
__kmpc_atomic_fixed1_wr
__kmpc_atomic_fixed1_xor
__kmpc_atomic_fixed1_xor_cpt
__kmpc_atomic_fixed1u_add_fp
__kmpc_atomic_fixed1u_sub_fp
__kmpc_atomic_fixed1u_mul_fp
__kmpc_atomic_fixed1u_div
__kmpc_atomic_fixed1u_div_cpt
__kmpc_atomic_fixed1u_div_cpt_rev
__kmpc_atomic_fixed1u_div_fp
__kmpc_atomic_fixed1u_div_rev
__kmpc_atomic_fixed1u_shr
__kmpc_atomic_fixed1u_shr_cpt
__kmpc_atomic_fixed1u_shr_cpt_rev
__kmpc_atomic_fixed1u_shr_rev
__kmpc_atomic_fixed2_add
__kmpc_atomic_fixed2_add_cpt
__kmpc_atomic_fixed2_add_fp
__kmpc_atomic_fixed2_andb
__kmpc_atomic_fixed2_andb_cpt
__kmpc_atomic_fixed2_andl
__kmpc_atomic_fixed2_andl_cpt
__kmpc_atomic_fixed2_div
__kmpc_atomic_fixed2_div_cpt
__kmpc_atomic_fixed2_div_cpt_rev
__kmpc_atomic_fixed2_div_float8
__kmpc_atomic_fixed2_div_fp
__kmpc_atomic_fixed2_div_rev
__kmpc_atomic_fixed2_eqv
__kmpc_atomic_fixed2_eqv_cpt
__kmpc_atomic_fixed2_max
__kmpc_atomic_fixed2_max_cpt
__kmpc_atomic_fixed2_min
__kmpc_atomic_fixed2_min_cpt
__kmpc_atomic_fixed2_mul
__kmpc_atomic_fixed2_mul_cpt
__kmpc_atomic_fixed2_mul_float8
__kmpc_atomic_fixed2_mul_fp
__kmpc_atomic_fixed2_neqv
__kmpc_atomic_fixed2_neqv_cpt
__kmpc_atomic_fixed2_orb
__kmpc_atomic_fixed2_orb_cpt
__kmpc_atomic_fixed2_orl
__kmpc_atomic_fixed2_orl_cpt
__kmpc_atomic_fixed2_rd
__kmpc_atomic_fixed2_shl
__kmpc_atomic_fixed2_shl_cpt
__kmpc_atomic_fixed2_shl_cpt_rev
__kmpc_atomic_fixed2_shl_rev
__kmpc_atomic_fixed2_shr
__kmpc_atomic_fixed2_shr_cpt
__kmpc_atomic_fixed2_shr_cpt_rev
__kmpc_atomic_fixed2_shr_rev
__kmpc_atomic_fixed2_sub
__kmpc_atomic_fixed2_sub_cpt
__kmpc_atomic_fixed2_sub_cpt_rev
__kmpc_atomic_fixed2_sub_fp
__kmpc_atomic_fixed2_sub_rev
__kmpc_atomic_fixed2_swp
__kmpc_atomic_fixed2_wr
__kmpc_atomic_fixed2_xor
__kmpc_atomic_fixed2_xor_cpt
__kmpc_atomic_fixed2u_add_fp
__kmpc_atomic_fixed2u_sub_fp
__kmpc_atomic_fixed2u_mul_fp
__kmpc_atomic_fixed2u_div
__kmpc_atomic_fixed2u_div_cpt
__kmpc_atomic_fixed2u_div_cpt_rev
__kmpc_atomic_fixed2u_div_fp
__kmpc_atomic_fixed2u_div_rev
__kmpc_atomic_fixed2u_shr
__kmpc_atomic_fixed2u_shr_cpt
__kmpc_atomic_fixed2u_shr_cpt_rev
__kmpc_atomic_fixed2u_shr_rev
__kmpc_atomic_fixed4_add
__kmpc_atomic_fixed4_add_cpt
__kmpc_atomic_fixed4_add_fp
__kmpc_atomic_fixed4_andb
__kmpc_atomic_fixed4_andb_cpt
__kmpc_atomic_fixed4_andl
__kmpc_atomic_fixed4_andl_cpt
__kmpc_atomic_fixed4_div
__kmpc_atomic_fixed4_div_cpt
__kmpc_atomic_fixed4_div_cpt_rev
__kmpc_atomic_fixed4_div_float8
__kmpc_atomic_fixed4_div_fp
__kmpc_atomic_fixed4_div_rev
__kmpc_atomic_fixed4_eqv
__kmpc_atomic_fixed4_eqv_cpt
__kmpc_atomic_fixed4_max
__kmpc_atomic_fixed4_max_cpt
__kmpc_atomic_fixed4_min
__kmpc_atomic_fixed4_min_cpt
__kmpc_atomic_fixed4_mul
__kmpc_atomic_fixed4_mul_cpt
__kmpc_atomic_fixed4_mul_float8
__kmpc_atomic_fixed4_mul_fp
__kmpc_atomic_fixed4_neqv
__kmpc_atomic_fixed4_neqv_cpt
__kmpc_atomic_fixed4_orb
__kmpc_atomic_fixed4_orb_cpt
__kmpc_atomic_fixed4_orl
__kmpc_atomic_fixed4_orl_cpt
__kmpc_atomic_fixed4_rd
__kmpc_atomic_fixed4_shl
__kmpc_atomic_fixed4_shl_cpt
__kmpc_atomic_fixed4_shl_cpt_rev
__kmpc_atomic_fixed4_shl_rev
__kmpc_atomic_fixed4_shr
__kmpc_atomic_fixed4_shr_cpt
__kmpc_atomic_fixed4_shr_cpt_rev
__kmpc_atomic_fixed4_shr_rev
__kmpc_atomic_fixed4_sub
__kmpc_atomic_fixed4_sub_cpt
__kmpc_atomic_fixed4_sub_cpt_rev
__kmpc_atomic_fixed4_sub_fp
__kmpc_atomic_fixed4_sub_rev
__kmpc_atomic_fixed4_swp
__kmpc_atomic_fixed4_wr
__kmpc_atomic_fixed4_xor
__kmpc_atomic_fixed4_xor_cpt
__kmpc_atomic_fixed4u_add_fp
__kmpc_atomic_fixed4u_sub_fp
__kmpc_atomic_fixed4u_mul_fp
__kmpc_atomic_fixed4u_div
__kmpc_atomic_fixed4u_div_cpt
__kmpc_atomic_fixed4u_div_cpt_rev
__kmpc_atomic_fixed4u_div_fp
__kmpc_atomic_fixed4u_div_rev
__kmpc_atomic_fixed4u_shr
__kmpc_atomic_fixed4u_shr_cpt
__kmpc_atomic_fixed4u_shr_cpt_rev
__kmpc_atomic_fixed4u_shr_rev
__kmpc_atomic_fixed8_add
__kmpc_atomic_fixed8_add_cpt
__kmpc_atomic_fixed8_add_fp
__kmpc_atomic_fixed8_andb
__kmpc_atomic_fixed8_andb_cpt
__kmpc_atomic_fixed8_andl
__kmpc_atomic_fixed8_andl_cpt
__kmpc_atomic_fixed8_div
__kmpc_atomic_fixed8_div_cpt
__kmpc_atomic_fixed8_div_cpt_rev
__kmpc_atomic_fixed8_div_float8
__kmpc_atomic_fixed8_div_fp
__kmpc_atomic_fixed8_div_rev
__kmpc_atomic_fixed8_eqv
__kmpc_atomic_fixed8_eqv_cpt
__kmpc_atomic_fixed8_max
__kmpc_atomic_fixed8_max_cpt
__kmpc_atomic_fixed8_min
__kmpc_atomic_fixed8_min_cpt
__kmpc_atomic_fixed8_mul
__kmpc_atomic_fixed8_mul_cpt
__kmpc_atomic_fixed8_mul_float8
__kmpc_atomic_fixed8_mul_fp
__kmpc_atomic_fixed8_neqv
__kmpc_atomic_fixed8_neqv_cpt
__kmpc_atomic_fixed8_orb
__kmpc_atomic_fixed8_orb_cpt
__kmpc_atomic_fixed8_orl
__kmpc_atomic_fixed8_orl_cpt
__kmpc_atomic_fixed8_rd
__kmpc_atomic_fixed8_shl
__kmpc_atomic_fixed8_shl_cpt
__kmpc_atomic_fixed8_shl_cpt_rev
__kmpc_atomic_fixed8_shl_rev
__kmpc_atomic_fixed8_shr
__kmpc_atomic_fixed8_shr_cpt
__kmpc_atomic_fixed8_shr_cpt_rev
__kmpc_atomic_fixed8_shr_rev
__kmpc_atomic_fixed8_sub
__kmpc_atomic_fixed8_sub_cpt
__kmpc_atomic_fixed8_sub_cpt_rev
__kmpc_atomic_fixed8_sub_fp
__kmpc_atomic_fixed8_sub_rev
__kmpc_atomic_fixed8_swp
__kmpc_atomic_fixed8_wr
__kmpc_atomic_fixed8_xor
__kmpc_atomic_fixed8_xor_cpt
__kmpc_atomic_fixed8u_add_fp
__kmpc_atomic_fixed8u_sub_fp
__kmpc_atomic_fixed8u_mul_fp
__kmpc_atomic_fixed8u_div
__kmpc_atomic_fixed8u_div_cpt
__kmpc_atomic_fixed8u_div_cpt_rev
__kmpc_atomic_fixed8u_div_fp
__kmpc_atomic_fixed8u_div_rev
__kmpc_atomic_fixed8u_shr
__kmpc_atomic_fixed8u_shr_cpt
__kmpc_atomic_fixed8u_shr_cpt_rev
__kmpc_atomic_fixed8u_shr_rev
@endcode
Functions for floating point
----------------------------
There are versions here for floating point numbers of size 4, 8, 10 and 16
bytes. (Ten byte floats are used by X87, but are now rare).
@code
__kmpc_atomic_float4_add
__kmpc_atomic_float4_add_cpt
__kmpc_atomic_float4_add_float8
__kmpc_atomic_float4_add_fp
__kmpc_atomic_float4_div
__kmpc_atomic_float4_div_cpt
__kmpc_atomic_float4_div_cpt_rev
__kmpc_atomic_float4_div_float8
__kmpc_atomic_float4_div_fp
__kmpc_atomic_float4_div_rev
__kmpc_atomic_float4_max
__kmpc_atomic_float4_max_cpt
__kmpc_atomic_float4_min
__kmpc_atomic_float4_min_cpt
__kmpc_atomic_float4_mul
__kmpc_atomic_float4_mul_cpt
__kmpc_atomic_float4_mul_float8
__kmpc_atomic_float4_mul_fp
__kmpc_atomic_float4_rd
__kmpc_atomic_float4_sub
__kmpc_atomic_float4_sub_cpt
__kmpc_atomic_float4_sub_cpt_rev
__kmpc_atomic_float4_sub_float8
__kmpc_atomic_float4_sub_fp
__kmpc_atomic_float4_sub_rev
__kmpc_atomic_float4_swp
__kmpc_atomic_float4_wr
__kmpc_atomic_float8_add
__kmpc_atomic_float8_add_cpt
__kmpc_atomic_float8_add_fp
__kmpc_atomic_float8_div
__kmpc_atomic_float8_div_cpt
__kmpc_atomic_float8_div_cpt_rev
__kmpc_atomic_float8_div_fp
__kmpc_atomic_float8_div_rev
__kmpc_atomic_float8_max
__kmpc_atomic_float8_max_cpt
__kmpc_atomic_float8_min
__kmpc_atomic_float8_min_cpt
__kmpc_atomic_float8_mul
__kmpc_atomic_float8_mul_cpt
__kmpc_atomic_float8_mul_fp
__kmpc_atomic_float8_rd
__kmpc_atomic_float8_sub
__kmpc_atomic_float8_sub_cpt
__kmpc_atomic_float8_sub_cpt_rev
__kmpc_atomic_float8_sub_fp
__kmpc_atomic_float8_sub_rev
__kmpc_atomic_float8_swp
__kmpc_atomic_float8_wr
__kmpc_atomic_float10_add
__kmpc_atomic_float10_add_cpt
__kmpc_atomic_float10_add_fp
__kmpc_atomic_float10_div
__kmpc_atomic_float10_div_cpt
__kmpc_atomic_float10_div_cpt_rev
__kmpc_atomic_float10_div_fp
__kmpc_atomic_float10_div_rev
__kmpc_atomic_float10_mul
__kmpc_atomic_float10_mul_cpt
__kmpc_atomic_float10_mul_fp
__kmpc_atomic_float10_rd
__kmpc_atomic_float10_sub
__kmpc_atomic_float10_sub_cpt
__kmpc_atomic_float10_sub_cpt_rev
__kmpc_atomic_float10_sub_fp
__kmpc_atomic_float10_sub_rev
__kmpc_atomic_float10_swp
__kmpc_atomic_float10_wr
__kmpc_atomic_float16_add
__kmpc_atomic_float16_add_cpt
__kmpc_atomic_float16_div
__kmpc_atomic_float16_div_cpt
__kmpc_atomic_float16_div_cpt_rev
__kmpc_atomic_float16_div_rev
__kmpc_atomic_float16_max
__kmpc_atomic_float16_max_cpt
__kmpc_atomic_float16_min
__kmpc_atomic_float16_min_cpt
__kmpc_atomic_float16_mul
__kmpc_atomic_float16_mul_cpt
__kmpc_atomic_float16_rd
__kmpc_atomic_float16_sub
__kmpc_atomic_float16_sub_cpt
__kmpc_atomic_float16_sub_cpt_rev
__kmpc_atomic_float16_sub_rev
__kmpc_atomic_float16_swp
__kmpc_atomic_float16_wr
@endcode
Functions for Complex types
---------------------------
Functions for complex types whose component floating point variables are of size
4,8,10 or 16 bytes. The names here are based on the size of the component float,
*not* the size of the complex type. So `__kmpc_atomc_cmplx8_add` is an operation
on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`.
@code
__kmpc_atomic_cmplx4_add
__kmpc_atomic_cmplx4_add_cmplx8
__kmpc_atomic_cmplx4_add_cpt
__kmpc_atomic_cmplx4_div
__kmpc_atomic_cmplx4_div_cmplx8
__kmpc_atomic_cmplx4_div_cpt
__kmpc_atomic_cmplx4_div_cpt_rev
__kmpc_atomic_cmplx4_div_rev
__kmpc_atomic_cmplx4_mul
__kmpc_atomic_cmplx4_mul_cmplx8
__kmpc_atomic_cmplx4_mul_cpt
__kmpc_atomic_cmplx4_rd
__kmpc_atomic_cmplx4_sub
__kmpc_atomic_cmplx4_sub_cmplx8
__kmpc_atomic_cmplx4_sub_cpt
__kmpc_atomic_cmplx4_sub_cpt_rev
__kmpc_atomic_cmplx4_sub_rev
__kmpc_atomic_cmplx4_swp
__kmpc_atomic_cmplx4_wr
__kmpc_atomic_cmplx8_add
__kmpc_atomic_cmplx8_add_cpt
__kmpc_atomic_cmplx8_div
__kmpc_atomic_cmplx8_div_cpt
__kmpc_atomic_cmplx8_div_cpt_rev
__kmpc_atomic_cmplx8_div_rev
__kmpc_atomic_cmplx8_mul
__kmpc_atomic_cmplx8_mul_cpt
__kmpc_atomic_cmplx8_rd
__kmpc_atomic_cmplx8_sub
__kmpc_atomic_cmplx8_sub_cpt
__kmpc_atomic_cmplx8_sub_cpt_rev
__kmpc_atomic_cmplx8_sub_rev
__kmpc_atomic_cmplx8_swp
__kmpc_atomic_cmplx8_wr
__kmpc_atomic_cmplx10_add
__kmpc_atomic_cmplx10_add_cpt
__kmpc_atomic_cmplx10_div
__kmpc_atomic_cmplx10_div_cpt
__kmpc_atomic_cmplx10_div_cpt_rev
__kmpc_atomic_cmplx10_div_rev
__kmpc_atomic_cmplx10_mul
__kmpc_atomic_cmplx10_mul_cpt
__kmpc_atomic_cmplx10_rd
__kmpc_atomic_cmplx10_sub
__kmpc_atomic_cmplx10_sub_cpt
__kmpc_atomic_cmplx10_sub_cpt_rev
__kmpc_atomic_cmplx10_sub_rev
__kmpc_atomic_cmplx10_swp
__kmpc_atomic_cmplx10_wr
__kmpc_atomic_cmplx16_add
__kmpc_atomic_cmplx16_add_cpt
__kmpc_atomic_cmplx16_div
__kmpc_atomic_cmplx16_div_cpt
__kmpc_atomic_cmplx16_div_cpt_rev
__kmpc_atomic_cmplx16_div_rev
__kmpc_atomic_cmplx16_mul
__kmpc_atomic_cmplx16_mul_cpt
__kmpc_atomic_cmplx16_rd
__kmpc_atomic_cmplx16_sub
__kmpc_atomic_cmplx16_sub_cpt
__kmpc_atomic_cmplx16_sub_cpt_rev
__kmpc_atomic_cmplx16_swp
__kmpc_atomic_cmplx16_wr
@endcode
*/
#include <atomic>
#include <cstdint>
#include "interface.h"
#include "mlfsr32.h" /* For random backoffs */
#if (0)
// Here temporarily... we may need something like this (though we're already relying
// on __uint128_t and atomic<> for the double complex cases below).
typedef __uint128_t uint128_t;
typedef union intValue {
struct {
uintptr_t p1;
uintptr_t p2;
};
uint128_t intValue;
} pointerPair_t;
bool cas128(pointerPair_t * old, pointerPair_t & expected,
pointerPair_t & desired) {
std::atomic<uint128_t> * ap = (std::atomic<uint128_t> *)old;
return ap->compare_exchange_strong(expected.intValue, desired.intValue);
}
#endif
namespace lomp {
#define expandInlineBinaryOp(type, typetag, op, optag, reversed) \
void __kmpc_atomic_##typetag##_##optag(ident_t *, int *, type * target, \
type operand) { \
std::atomic<type> * t = (std::atomic<type> *)target; \
*t op## = operand; \
}
template <typename T>
union bitRep {
alignas(sizeof(T)) T typeValue;
typename typeTraits_t<T>::uint_t uintValue;
// Need empty constructor and destructor since std::complex<double>
// causes a hidden destructor problem otherwise.
// (Why it is different from complex<float> is unclear!)
bitRep() {}
~bitRep() {}
};
// Note that "reversed" here is compile time known, so the test
// should not occur at runtime.
#define expandCasBinaryOp(type, typetag, mutator, optag, reversed) \
void __kmpc_atomic_##typetag##_##optag(ident_t *, int *, type * target, \
type operand) { \
typedef typename typeTraits_t<type>::uint_t unsignedType; \
typedef typename std::atomic<unsignedType> atomicType; \
atomicType * t = (atomicType *)target; \
typedef bitRep<type> sharedBits; \
\
sharedBits current; \
sharedBits next; \
\
current.uintValue = *t; \
if (reversed) \
next.typeValue = mutator(operand, current.typeValue); \
else \
next.typeValue = mutator(current.typeValue, operand); \
if (t->compare_exchange_strong(current.uintValue, next.uintValue)) \
return; \
\
if (reversed) \
next.typeValue = mutator(operand, current.typeValue); \
else \
next.typeValue = mutator(current.typeValue, operand); \
if (t->compare_exchange_strong(current.uintValue, next.uintValue)) \
return; \
\
randomExponentialBackoff backoff; \
for (;;) { \
backoff.sleep(); \
current.uintValue = *t; \
if (reversed) \
next.typeValue = mutator(operand, current.typeValue); \
else \
next.typeValue = mutator(current.typeValue, operand); \
if (t->compare_exchange_strong(current.uintValue, next.uintValue)) \
return; \
\
if (reversed) \
next.typeValue = mutator(operand, current.typeValue); \
else \
next.typeValue = mutator(current.typeValue, operand); \
if (t->compare_exchange_strong(current.uintValue, next.uintValue)) \
return; \
} \
}
#define expandCasCheckedOp(type, typetag, function, optag) \
void __kmpc_atomic_##typetag##_##optag(ident_t *, int *, type * target, \
type operand) { \
typedef typename typeTraits_t<type>::uint_t unsignedType; \
typedef typename std::atomic<unsignedType> atomicType; \
typedef bitRep<type> sharedBits; \
atomicType * t = (atomicType *)target; \
\
sharedBits current; \
sharedBits next; \
\
current.uintValue = *t; \
next.typeValue = function(current.typeValue, operand); \
if (next.typeValue == current.typeValue || \
t->compare_exchange_strong(current.uintValue, next.uintValue)) \
return; \
\
next.typeValue = function(current.typeValue, operand); \
if (next.typeValue == current.typeValue || \
t->compare_exchange_strong(current.uintValue, next.uintValue)) \
return; \
\
randomExponentialBackoff backoff; \
for (;;) { \
backoff.sleep(); \
current.uintValue = *t; \
next.typeValue = function(current.typeValue, operand); \
if (next.typeValue == current.typeValue || \
t->compare_exchange_strong(current.uintValue, next.uintValue)) \
return; \
\
next.typeValue = function(current.typeValue, operand); \
if (next.typeValue == current.typeValue || \
t->compare_exchange_strong(current.uintValue, next.uintValue)) \
return; \
} \
}
// clang-format off
// Don't want any of these turned into two per line, which is hard to read and edit
//
// Operations which can be expressed as std::atomic<type> operator op=(type operand).
// The choices of macros here are somewhat determined by what makes sense
// for integers vs floats, and, also what implementation is possible.
// For instance C++ (<C++20) does not have float atomics, multiply/divide,
// or && and || atomics on any type.
#define FOREACH_ADD_OPERATION(macro, type, typetag) \
macro(type, typetag, +, add, false) \
macro(type, typetag, -, sub, false)
#define FOREACH_MUL_OPERATION(macro, type, typetag) \
macro(type, typetag, doMul, mul, false) \
macro(type, typetag, doDiv, div, false) \
macro(type, typetag, doSub, sub_rev, true) \
macro(type, typetag, doDiv, div_rev, true)
#define FOREACH_BITLOGICAL_OPERATION(macro, type, typetag) \
macro(type, typetag, &, andb, false) \
macro(type, typetag, |, orb, false) \
macro(type, typetag, ^, xor, false)
// Operators not supoorted as op= by std::atomic<int>
// We'll need to do them with CAS
#define FOREACH_SHIFT_OPERATION(macro, type, typetag) \
macro(type, typetag, doShiftLeft, shl, false) \
macro(type, typetag, doShiftRight, shr, false) \
macro(type, typetag, doShiftLeft, shl_rev, true) \
macro(type, typetag, doShiftRight, shr_rev, true)
#define FOREACH_LOGICAL_OPERATION(macro, type, typetag) \
macro(type, typetag, doLogAnd, andl) \
macro(type, typetag, doLogOr, orl )
#define FOREACH_EXTREME_OPERATION(macro, type, typetag) \
macro(type, typetag, std::min, min ) \
macro(type, typetag, std::max, max )
// Need functions for all of the CAS operations, even if they
// could be expressed as a simple operation.
#define generateOperatorFunction(name, op) \
template<typename T> T do##name(T current, T operand) { \
return current op operand; \
}
#define FOREACH_OPERATOR(macro) \
macro(Add,+) \
macro(Sub,-) \
macro(Mul, *) \
macro(Div, /) \
macro(ShiftRight, >>) \
macro(ShiftLeft, <<) \
macro(BitAnd, &) \
macro(BitOr, |) \
macro(Xor, ^) \
macro(LogAnd, &&) \
macro(LogOr, ||)
// Create all of the functions to encapsulate operators
FOREACH_OPERATOR(generateOperatorFunction)
// FP operations all need CAS (until C++20)
// These are
// +, -
// *, /, reversed - reversed /
// max, min
#define FOREACH_FP_OPERATION(macro, type, typetag) \
macro(type, typetag, doAdd, add, false) \
macro(type, typetag, doSub, sub, false) \
FOREACH_MUL_OPERATION(macro,type,typetag)
#define FOREACH_STD_INTEGER_TYPE(expansionMacro, leafMacro) \
expansionMacro(leafMacro, int8_t, fixed1) \
expansionMacro(leafMacro, uint8_t, fixed1u) \
expansionMacro(leafMacro, int16_t, fixed2) \
expansionMacro(leafMacro, uint16_t, fixed2u) \
expansionMacro(leafMacro, int32_t, fixed4) \
expansionMacro(leafMacro, uint32_t, fixed4u) \
expansionMacro(leafMacro, int64_t, fixed8) \
expansionMacro(leafMacro, uint64_t, fixed8u)
#define FOREACH_FP_TYPE(expansionMacro, leafMacro) \
expansionMacro(leafMacro, float, float4) \
expansionMacro(leafMacro, double, float8)
#define FOREACH_COMPLEX_TYPE(expansionMacro, leafMacro) \
expansionMacro(leafMacro, std::complex<float>, cmplx4) \
expansionMacro(leafMacro, std::complex<double>, cmplx8)
// clang-format on
// complex<double> needs __uint128_t. All of the compilers seem to
// support that for our targets, and the associated
// std::atomic<__uint128_t>, though GCC does not inline the "lock;
// cmpxchg16b" on X86_64... (Conclusion: use LLVM, which does when
// given the -mcx16 flag).
// ***TODO*** check alignment issues. If the complex<double> is
// only alignas(8), then the atomic may be very slow when crossing
// a cache line.
/* ***TODO***: capture versions of operations, reads and writes with no operator,
* swap (exchange).
*/
extern "C" {
// Expand all of the integer cases.
FOREACH_STD_INTEGER_TYPE(FOREACH_ADD_OPERATION, expandInlineBinaryOp)
FOREACH_STD_INTEGER_TYPE(FOREACH_BITLOGICAL_OPERATION, expandInlineBinaryOp)
FOREACH_STD_INTEGER_TYPE(FOREACH_SHIFT_OPERATION, expandCasBinaryOp)
FOREACH_STD_INTEGER_TYPE(FOREACH_MUL_OPERATION, expandCasBinaryOp)
FOREACH_STD_INTEGER_TYPE(FOREACH_LOGICAL_OPERATION, expandCasCheckedOp)
FOREACH_STD_INTEGER_TYPE(FOREACH_EXTREME_OPERATION, expandCasCheckedOp)
// Expand all of the floating point cases
FOREACH_FP_TYPE(FOREACH_FP_OPERATION, expandCasBinaryOp)
FOREACH_FP_TYPE(FOREACH_EXTREME_OPERATION, expandCasCheckedOp)
// Complex operations Work OK with clang 9.0, but not with the (old,
// gcc 4.8.5) headers I find on Linux.
#if (GENERATE_COMPLEX_ATOMICS)
FOREACH_COMPLEX_TYPE(FOREACH_FP_OPERATION, expandCasBinaryOp)
#endif
}
} // namespace lomp