Skip to content

Commit e7bef4c

Browse files
authored
fixed UNKNOWN CUDA error on slope (#141)
* fixed UNKNOWN CUDA error on slope * added cupy support * aspect wip * gpu aspect tests passing * added gpu == cpu slope test * added skipif CUDA Device not Available * added GPU support for SAVI * fixed aspect tests
1 parent 295b661 commit e7bef4c

File tree

7 files changed

+455
-29
lines changed

7 files changed

+455
-29
lines changed

xrspatial/aspect.py

Lines changed: 92 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,23 @@
1+
from math import atan2
2+
from math import pi
13
import numpy as np
4+
import numba as nb
5+
6+
from numba import cuda
7+
28
from xarray import DataArray
39

410
from xrspatial.utils import ngjit
5-
11+
from xrspatial.utils import has_cuda
12+
from xrspatial.utils import cuda_args
613

714
RADIAN = 180 / np.pi
815

916

1017
@ngjit
1118
def _horn_aspect(data):
1219
out = np.zeros_like(data, dtype=np.float64)
20+
out[:] = np.nan
1321
rows, cols = data.shape
1422
for y in range(1, rows-1):
1523
for x in range(1, cols-1):
@@ -28,7 +36,7 @@ def _horn_aspect(data):
2836

2937
if dz_dx == 0 and dz_dy == 0:
3038
# flat surface, slope = 0, thus invalid aspect
31-
out[y, x] = np.nan
39+
out[y, x] = -1.
3240
else:
3341
aspect = np.arctan2(dz_dy, -dz_dx) * RADIAN
3442
# convert to compass direction values (0-360 degrees)
@@ -42,7 +50,57 @@ def _horn_aspect(data):
4250
return out
4351

4452

45-
def aspect(agg, name='aspect'):
53+
@cuda.jit(device=True)
54+
def _gpu_aspect(arr):
55+
56+
a = arr[0, 0]
57+
b = arr[0, 1]
58+
c = arr[0, 2]
59+
d = arr[1, 0]
60+
f = arr[1, 2]
61+
g = arr[2, 0]
62+
h = arr[2, 1]
63+
i = arr[2, 2]
64+
65+
two = nb.int32(2.) # reducing size to int8 causes wrong results
66+
eight = nb.int32(8.) # reducing size to int8 causes wrong results
67+
ninety = nb.float32(90.)
68+
69+
dz_dx = ((c + two * f + i) - (a + two * d + g)) / eight
70+
dz_dy = ((g + two * h + i) - (a + two * b + c)) / eight
71+
72+
if dz_dx == 0 and dz_dy == 0:
73+
# flat surface, slope = 0, thus invalid aspect
74+
aspect = nb.float32(-1.) # TODO: return null instead
75+
else:
76+
aspect = atan2(dz_dy, -dz_dx) * nb.float32(57.29578)
77+
# convert to compass direction values (0-360 degrees)
78+
if aspect < nb.float32(0.):
79+
aspect = ninety - aspect
80+
elif aspect > ninety:
81+
aspect = nb.float32(360.0) - aspect + ninety
82+
else:
83+
aspect = ninety - aspect
84+
85+
if aspect > nb.float32(359.999): # lame float equality check...
86+
return nb.float32(0.)
87+
else:
88+
return aspect
89+
90+
91+
@cuda.jit
92+
def _horn_aspect_cuda(arr, out):
93+
minus_one = nb.float32(-1.)
94+
i, j = cuda.grid(2)
95+
di = 1
96+
dj = 1
97+
if (i-di >= 1 and i+di < out.shape[0] - 1 and
98+
j-dj >= 1 and j+dj < out.shape[1] - 1):
99+
out[i, j] = _gpu_aspect(arr[i-di:i+di+1, j-dj:j+dj+1])
100+
101+
102+
103+
def aspect(agg, name='aspect', use_cuda=True, pad=True, use_cupy=True):
46104
"""Returns downward slope direction in compass degrees (0 - 360) with 0 at 12 o'clock.
47105
48106
Parameters
@@ -63,7 +121,37 @@ def aspect(agg, name='aspect'):
63121
if not isinstance(agg, DataArray):
64122
raise TypeError("agg must be instance of DataArray")
65123

66-
return DataArray(_horn_aspect(agg.data),
124+
if has_cuda() and use_cuda:
125+
126+
if pad:
127+
pad_rows = 3 // 2
128+
pad_cols = 3 // 2
129+
pad_width = ((pad_rows, pad_rows),
130+
(pad_cols, pad_cols))
131+
else:
132+
# If padding is not desired, set pads to 0
133+
pad_rows = 0
134+
pad_cols = 0
135+
pad_width = 0
136+
137+
data = np.pad(agg.data, pad_width=pad_width, mode="reflect")
138+
139+
griddim, blockdim = cuda_args(data.shape)
140+
out = np.empty(data.shape, dtype='f4')
141+
out[:] = np.nan
142+
143+
if use_cupy:
144+
import cupy
145+
out = cupy.asarray(out)
146+
147+
_horn_aspect_cuda[griddim, blockdim](data, out)
148+
if pad:
149+
out = out[pad_rows:-pad_rows, pad_cols:-pad_cols]
150+
151+
else:
152+
out = _horn_aspect(agg.data)
153+
154+
return DataArray(out,
67155
name=name,
68156
dims=agg.dims,
69157
coords=agg.coords,

xrspatial/multispectral.py

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
11
import numpy as np
2+
import numba as nb
3+
4+
from numba import cuda
25

36
import datashader as ds
47

58
from PIL import Image
69

710
from xarray import DataArray
811

12+
from xrspatial.utils import has_cuda
13+
from xrspatial.utils import cuda_args
914
from xrspatial.utils import ngjit
1015

1116

@@ -408,7 +413,19 @@ def _savi(nir_data, red_data, soil_factor):
408413
return out
409414

410415

411-
def savi(nir_agg, red_agg, soil_factor=1.0, name='savi'):
416+
@cuda.jit
417+
def _savi_gpu(nir_data, red_data, soil_factor, out):
418+
y, x = cuda.grid(2)
419+
if y < out.shape[0] and x < out.shape[1]:
420+
nir = nir_data[y, x]
421+
red = red_data[y, x]
422+
numerator = nir - red
423+
soma = nir + red + soil_factor
424+
denominator = soma * (1.0 + soil_factor)
425+
out[y, x] = numerator / denominator
426+
427+
428+
def savi(nir_agg, red_agg, soil_factor=1.0, name='savi', use_cuda=True, use_cupy=True):
412429
"""Returns Soil Adjusted Vegetation Index (SAVI).
413430
414431
Parameters
@@ -441,7 +458,26 @@ def savi(nir_agg, red_agg, soil_factor=1.0, name='savi'):
441458
if soil_factor > 1.0 or soil_factor < -1.0:
442459
raise ValueError("soil factor must be between (-1.0, 1.0)")
443460

444-
return DataArray(_savi(nir_agg.data, red_agg.data, soil_factor),
461+
nir_data = nir_agg.data
462+
red_data = red_agg.data
463+
464+
if has_cuda() and use_cuda:
465+
griddim, blockdim = cuda_args(nir_data.shape)
466+
out = np.empty(nir_data.shape, dtype='f4')
467+
out[:] = np.nan
468+
469+
if use_cupy:
470+
import cupy
471+
out = cupy.asarray(out)
472+
473+
_savi_gpu[griddim, blockdim](nir_data,
474+
red_data,
475+
soil_factor,
476+
out)
477+
else:
478+
out = _savi(nir_agg.data, red_agg.data, soil_factor)
479+
480+
return DataArray(out,
445481
name=name,
446482
coords=nir_agg.coords,
447483
dims=nir_agg.dims,

xrspatial/slope.py

Lines changed: 39 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from math import atan
22
import numpy as np
3+
import numba as nb
34

45
from numba import cuda
56

@@ -13,6 +14,7 @@
1314
@ngjit
1415
def _horn_slope(data, cellsize_x, cellsize_y):
1516
out = np.zeros_like(data)
17+
out[:] = np.nan
1618
rows, cols = data.shape
1719
for y in range(1, rows-1):
1820
for x in range(1, cols-1):
@@ -42,27 +44,27 @@ def _gpu_slope(arr, cellsize_x, cellsize_y):
4244
h = arr[0, 1]
4345
i = arr[0, 2]
4446

45-
dz_dx = ((c + 2 * f + i) - (a + 2 * d + g)) / (8 * cellsize_x[0])
46-
dz_dy = ((g + 2 * h + i) - (a + 2 * b + c)) / (8 * cellsize_y[0])
47-
p = (dz_dx * dz_dx + dz_dy * dz_dy) ** .5
48-
return atan(p) * 57.29578
47+
two = nb.int32(2.) # reducing size to int8 causes wrong results
48+
49+
dz_dx = ((c + two * f + i) - (a + two * d + g)) / (nb.float32(8.) * cellsize_x[0])
50+
dz_dy = ((g + two * h + i) - (a + two * b + c)) / (nb.float32(8.) * cellsize_y[0])
51+
p = (dz_dx * dz_dx + dz_dy * dz_dy) ** nb.float32(.5)
52+
return atan(p) * nb.float32(57.29578)
4953

5054

5155
@cuda.jit
5256
def _horn_slope_cuda(arr, cellsize_x_arr, cellsize_y_arr, out):
5357
i, j = cuda.grid(2)
5458
di = 1
5559
dj = 1
56-
if (i-di >= 0 and i+di < out.shape[0] and
57-
j-dj >= 0 and j+dj < out.shape[1]):
60+
if (i-di >= 1 and i+di < out.shape[0] - 1 and
61+
j-dj >= 1 and j+dj < out.shape[1] - 1):
5862
out[i, j] = _gpu_slope(arr[i-di:i+di+1, j-dj:j+dj+1],
5963
cellsize_x_arr,
6064
cellsize_y_arr)
61-
else:
62-
out[i, j] = np.nan
6365

6466

65-
def slope(agg, name='slope', use_cuda=True):
67+
def slope(agg, name='slope', use_cuda=True, pad=True, use_cupy=True):
6668
"""Returns slope of input aggregate in degrees.
6769
Parameters
6870
----------
@@ -103,21 +105,41 @@ def slope(agg, name='slope', use_cuda=True):
103105
' or a tuple of numeric values.')
104106

105107
if has_cuda() and use_cuda:
106-
cellsize_x_arr = np.array([float(cellsize_x)], dtype='f8')
107-
cellsize_y_arr = np.array([float(cellsize_y)], dtype='f8')
108-
109-
griddim, blockdim = cuda_args(agg.data.shape)
110-
slope_agg = np.zeros(agg.data.shape, dtype='f8')
111-
_horn_slope_cuda[griddim, blockdim](agg.data,
108+
cellsize_x_arr = np.array([float(cellsize_x)], dtype='f4')
109+
cellsize_y_arr = np.array([float(cellsize_y)], dtype='f4')
110+
111+
if pad:
112+
pad_rows = 3 // 2
113+
pad_cols = 3 // 2
114+
pad_width = ((pad_rows, pad_rows),
115+
(pad_cols, pad_cols))
116+
else:
117+
# If padding is not desired, set pads to 0
118+
pad_rows = 0
119+
pad_cols = 0
120+
pad_width = 0
121+
122+
slope_data = np.pad(agg.data, pad_width=pad_width, mode="reflect")
123+
124+
griddim, blockdim = cuda_args(slope_data.shape)
125+
slope_agg = np.empty(slope_data.shape, dtype='f4')
126+
slope_agg[:] = np.nan
127+
128+
if use_cupy:
129+
import cupy
130+
slope_agg = cupy.asarray(slope_agg)
131+
132+
_horn_slope_cuda[griddim, blockdim](slope_data,
112133
cellsize_x_arr,
113134
cellsize_y_arr,
114135
slope_agg)
115-
pass
136+
if pad:
137+
slope_agg = slope_agg[pad_rows:-pad_rows, pad_cols:-pad_cols]
116138
else:
117139
slope_agg = _horn_slope(agg.data, cellsize_x, cellsize_y)
118140

119141
return DataArray(slope_agg,
120142
name=name,
121143
coords=agg.coords,
122144
dims=agg.dims,
123-
attrs=agg.attrs)
145+
attrs=agg.attrs)

0 commit comments

Comments
 (0)