1+ # Copyright 2017 the Arraymancer contributors
2+ #
3+ # Licensed under the Apache License, Version 2.0 (the "License");
4+ # you may not use this file except in compliance with the License.
5+ # You may obtain a copy of the License at
6+ #
7+ # http://www.apache.org/licenses/LICENSE-2.0
8+ #
9+ # Unless required by applicable law or agreed to in writing, software
10+ # distributed under the License is distributed on an "AS IS" BASIS,
11+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+ # See the License for the specific language governing permissions and
13+ # limitations under the License.
14+
15+ import ../ data_structure,
16+ ./ opencl_global_state,
17+ ./ global_config,
18+ ./ metadataArray,
19+ nimcl, opencl, macros
20+
21+ export nimcl, opencl, opencl_global_state
22+
23+
24+ # Data structures to ease interfacing with OpenCL and kernels
25+
26+ proc toClpointer * [T](p: ptr T| ptr UncheckedArray [T]): PMem {.noSideEffect .}=
27+ cast [PMem ](p)
28+
29+ proc clMalloc * [T](size: Natural ): ptr UncheckedArray [T] {.inline .}=
30+ # # Internal proc.
31+ # # Wrap OpenCL createBuffer
32+ cast [type result ](
33+ buffer [T](clContext0, size)
34+ )
35+
36+ proc deallocCl * [T](p: ref [ptr UncheckedArray [T]]) {.noSideEffect .}=
37+ if not p[].isNil:
38+ check releaseMemObject p[].toClpointer
39+
40+ # ##############################################################
41+ # # Base ClStorage type
42+
43+ proc newClStorage * [T: SomeReal ](length: int ): ClStorage [T] =
44+ result .Flen = length
45+ new (result .Fref_tracking, deallocCl)
46+ result .Fdata = clMalloc [T](result .Flen )
47+ result .Fref_tracking[] = result .Fdata
48+
49+ # #########################################################
50+ # # Sending tensor layout to OpenCL Kernel
51+
52+ type
53+ ClLayoutArray = ref [ptr UncheckedArray [cint ]]
54+ # # Reference to an array on the device
55+ # TODO : finalizer
56+ # or replace by a distinct type with a destructor
57+
58+ ClTensorLayout [T: SomeReal ] = object
59+ # # Mimicks CudaTensor
60+ # # Metadata stored on GPU or Accelerators
61+
62+ rank* : cint # Number of dimension of the tensor
63+ shape* : ClLayoutArray
64+ strides* : ClLayoutArray
65+ offset* : cint
66+ data* : ptr T # Data on OpenCL device
67+ len* : cint # Number of elements allocated in memory
68+
69+ proc layoutOnDevice * [T:SomeReal ](t: ClTensor [T]): ClTensorLayout [T] =
70+ # # Store a ClTensor shape, strides, etc information on the GPU
71+ #
72+ # TODO : instead of storing pointers to shape/stride/etc that are passed to each kernel
73+ # pass the layout object directly and call it with layout->shape, layout->rank
74+
75+ result .rank = t.rank.cint
76+
77+ result .offset = t.offset.cint
78+ result .data = t.get_data_ptr
79+ result .len = t.size.cint
80+
81+ new result .shape, deallocCl
82+ new result .strides, deallocCl
83+
84+ result .shape [] = clMalloc [cint ](MAXRANK )
85+ result .strides [] = clMalloc [cint ](MAXRANK )
86+
87+ var
88+ tmp_shape: array [MAXRANK , cint ] # ClLayoutArray
89+ tmp_strides: array [MAXRANK , cint ] # ClLayoutArray
90+
91+ for i in 0 ..< t.rank:
92+ tmp_shape[i] = t.shape[i].cint
93+ tmp_strides[i] = t.strides[i].cint
94+
95+
96+ # TODO : use streams and async
97+ let size = t.rank * sizeof (cint )
98+ check enqueueWriteBuffer (
99+ clQueue0,
100+ result .shape[].toClpointer,
101+ CL_false, # Non-blocking copy
102+ 0 ,
103+ size,
104+ addr tmp_shape[0 ],
105+ 0 , nil , nil
106+ )
107+
108+ check enqueueWriteBuffer (
109+ clQueue0,
110+ result .strides[].toClpointer,
111+ CL_true, # Blocking copy, we don't want tmp_strides (and tmp_shape) to disappear whil copy is pending
112+ 0 ,
113+ size,
114+ addr tmp_strides[0 ],
115+ 0 , nil , nil
116+ )
117+
118+
119+ # #########################################################
120+ # # Variadic number of args, to remove after https://github.com/unicredit/nimcl/pull/1
121+
122+ # ### Taken from nimcl
123+ template setArg (kernel: PKernel , item: PMem , index: int ) =
124+ var x = item
125+ check setKernelArg (kernel, index.uint32 , sizeof (Pmem ), addr x)
126+
127+ template setArg [A](kernel: PKernel , item: var A, index: int ) =
128+ check setKernelArg (kernel, index.uint32 , sizeof (A), addr item)
129+
130+ template setArg [A](kernel: PKernel , item: LocalBuffer [A], index: int ) =
131+ check setKernelArg (kernel, index.uint32 , int (item) * sizeof (A), nil )
132+
133+ template setArg (kernel: PKernel , item: SomeInteger , index: int ) =
134+ var x = item
135+ check setKernelArg (kernel, index.uint32 , sizeof (type (item)), addr x)
136+ # ###
137+
138+ macro args * (kernel: Pkernel , args: varargs [untyped ]): untyped =
139+
140+ result = newStmtList ()
141+
142+ var i = 0 # no pairs for macro for loop
143+ for arg in items (args):
144+ let s = quote do :
145+ `kernel`.setArg (`arg`, `i`)
146+ result .add (s)
147+ inc i
0 commit comments