-
Notifications
You must be signed in to change notification settings - Fork 51
/
cbf.py
102 lines (82 loc) · 3.34 KB
/
cbf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# Demonstrator for Byte offset decompression in OpenCL
import numpy
import fabio
import pyopencl, pyopencl.array
import time
import os
# os.environ["PYOPENCL_CTX"] = "1:0"
os.environ["PYOPENCL_COMPILER_OUTPUT"] = "1"
def decomp_vec(raw_n):
"principle of implementation in numpy"
size = raw_n.size
lel = numpy.ones(size, dtype="uint8")
mask8 = raw_n == -128
lel[mask8] = 3
for i in numpy.where(mask8)[0]:
if (raw_n[i + 1] == 0) and (raw_n[i + 2] == -128):
lel[i] = 7
print(i)
lem = numpy.zeros_like(lel)
return lel
def profile(evt, cmt=""):
evt.wait()
print("%s Exec time: %.3fms" % (cmt, 1e-6 * (evt.profile.end - evt.profile.start)))
ctx = pyopencl.create_some_context(interactive=True)
fname = "testimages/run2_1_00148.cbf"
cbf = fabio.cbfimage.CbfImage()
data = fabio.open(fname).data
raw = cbf.read(fname, only_raw=True)
properties = pyopencl.command_queue_properties.PROFILING_ENABLE
# properties = None
queue = pyopencl.CommandQueue(ctx, properties=properties)
raw_n = numpy.fromstring(raw, dtype="int8")
size = raw_n.size
raw_d = pyopencl.array.to_device(queue, raw_n)
int_d = pyopencl.array.empty(queue, (size,), dtype="int32")
data_d = pyopencl.array.empty(queue, (data.size,), dtype="int32")
tmp1_d = pyopencl.array.zeros_like(data_d)
tmp2_d = pyopencl.array.zeros_like(data_d)
tmp3_d = pyopencl.array.zeros_like(data_d)
lem_d = pyopencl.array.empty_like(data_d)
zero_d = pyopencl.array.zeros(queue, shape=1, dtype="int32")
src = open("sandbox/cbf.cl").read()
prg = pyopencl.Program(ctx, src).build()
for i in range(11):
WG = 1 << i
print("#" * 80)
print("WG: %s" % WG)
la = pyopencl.LocalMemory(4 * WG)
lb = pyopencl.LocalMemory(4 * WG)
lc = pyopencl.LocalMemory(4 * WG)
# ld = pyopencl.LocalMemory(4)
debug1_d = pyopencl.array.zeros(queue, shape=WG, dtype="int32")
debug2_d = pyopencl.array.zeros(queue, shape=WG, dtype="int32")
debug3_d = pyopencl.array.zeros(queue, shape=WG, dtype="int32")
size = data.size
wgsum_d = pyopencl.array.zeros(queue, shape=WG, dtype="int32")
t0 = time.time()
data_d.set(data.ravel())
size = data.size
WS = (size + WG - 1) & ~(WG - 1)
chunk = ((size + WG - 1) // WG + WG - 1) // WG
zero_d.fill(0)
tmp2_d.fill(0)
evt = prg.comp_byte_offset1(queue, (WG * WG,), (WG,),
data_d.data, tmp2_d.data, numpy.uint32(size), numpy.uint32(chunk), wgsum_d.data, zero_d.data,
la, lb, lc, debug1_d.data, debug2_d.data, debug3_d.data)
profile(evt, "comp_byte_offset1")
# Create dest buffers
tmp_cumsum = wgsum_d.get()
dest_size = tmp_cumsum[-1]
print("Start process: %s" % debug3_d)
print("End process: %s" % debug2_d)
print("Total Size: %s" % (dest_size))
print("After small cumsum=%s" % (tmp_cumsum))
print("Counters= %s" % (debug1_d))
target_d = pyopencl.array.zeros(queue, (dest_size,), dtype="int8")
evt = prg.comp_byte_offset2(queue, (WG * WG,), (WG,),
data_d.data, tmp2_d.data, wgsum_d.data, target_d.data,
numpy.uint32(size), numpy.uint32(dest_size), numpy.uint32(chunk))
profile(evt, "comp_byte_offset2")
print("Total time : %.3fms" % (1000 * (time.time() - t0)))
print("residual error: %s" % (numpy.where(raw_n - target_d.get())))