forked from JuliaGPU/CUDA.jl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
vadd.ptx
39 lines (32 loc) · 766 Bytes
/
vadd.ptx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
.version 3.1
.target sm_20
.address_size 64
.visible .entry vadd(
.param .u64 vadd_param_0,
.param .u64 vadd_param_1,
.param .u64 vadd_param_2
)
{
.reg .s32 %r<8>;
.reg .f32 %f<4>;
.reg .s64 %rd<11>;
ld.param.u64 %rd1, [vadd_param_0];
ld.param.u64 %rd2, [vadd_param_1];
ld.param.u64 %rd3, [vadd_param_2];
cvta.to.global.u64 %rd4, %rd3;
mov.u32 %r1, %ntid.x;
mov.u32 %r2, %ctaid.x;
mov.u32 %r3, %tid.x;
mad.lo.s32 %r4, %r1, %r2, %r3;
cvta.to.global.u64 %rd5, %rd1;
mul.wide.s32 %rd6, %r4, 4;
add.s64 %rd7, %rd5, %rd6;
cvta.to.global.u64 %rd8, %rd2;
add.s64 %rd9, %rd8, %rd6;
ld.global.f32 %f1, [%rd9];
ld.global.f32 %f2, [%rd7];
add.f32 %f3, %f2, %f1;
add.s64 %rd10, %rd4, %rd6;
st.global.f32 [%rd10], %f3;
ret;
}