-
Notifications
You must be signed in to change notification settings - Fork 1.2k
/
Copy pathintrinsic_wrapper.py
77 lines (61 loc) · 2.19 KB
/
intrinsic_wrapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
from .decorators import jit
import numba
@jit(device=True)
def all_sync(mask, predicate):
"""
If for all threads in the masked warp the predicate is true, then
a non-zero value is returned, otherwise 0 is returned.
"""
return numba.cuda.vote_sync_intrinsic(mask, 0, predicate)[1]
@jit(device=True)
def any_sync(mask, predicate):
"""
If for any thread in the masked warp the predicate is true, then
a non-zero value is returned, otherwise 0 is returned.
"""
return numba.cuda.vote_sync_intrinsic(mask, 1, predicate)[1]
@jit(device=True)
def eq_sync(mask, predicate):
"""
If for all threads in the masked warp the boolean predicate is the same,
then a non-zero value is returned, otherwise 0 is returned.
"""
return numba.cuda.vote_sync_intrinsic(mask, 2, predicate)[1]
@jit(device=True)
def ballot_sync(mask, predicate):
"""
Returns a mask of all threads in the warp whose predicate is true,
and are within the given mask.
"""
return numba.cuda.vote_sync_intrinsic(mask, 3, predicate)[0]
@jit(device=True)
def shfl_sync(mask, value, src_lane):
"""
Shuffles value across the masked warp and returns the value
from src_lane. If this is outside the warp, then the
given value is returned.
"""
return numba.cuda.shfl_sync_intrinsic(mask, 0, value, src_lane, 0x1f)[0]
@jit(device=True)
def shfl_up_sync(mask, value, delta):
"""
Shuffles value across the masked warp and returns the value
from (laneid - delta). If this is outside the warp, then the
given value is returned.
"""
return numba.cuda.shfl_sync_intrinsic(mask, 1, value, delta, 0)[0]
@jit(device=True)
def shfl_down_sync(mask, value, delta):
"""
Shuffles value across the masked warp and returns the value
from (laneid + delta). If this is outside the warp, then the
given value is returned.
"""
return numba.cuda.shfl_sync_intrinsic(mask, 2, value, delta, 0x1f)[0]
@jit(device=True)
def shfl_xor_sync(mask, value, lane_mask):
"""
Shuffles value across the masked warp and returns the value
from (laneid ^ lane_mask).
"""
return numba.cuda.shfl_sync_intrinsic(mask, 3, value, lane_mask, 0x1f)[0]