Skip to content

Commit afca7d7

Browse files
alex-soklevAlexander Soklev
andauthored
Added a pure numba hillshade that is 10x faster compared to numpy (#542)
* Added a pure numba hillshade GPU acceleration that does not require cupy and runs 3x faster * Hillshade updated * Address review comments and remove the code that is no longer used. Co-authored-by: Alexander Soklev <alexander.soklev@software-supreme.com>
1 parent 50f8790 commit afca7d7

File tree

1 file changed

+42
-46
lines changed

1 file changed

+42
-46
lines changed

xrspatial/hillshade.py

Lines changed: 42 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# std lib
22
from functools import partial
3-
from math import sqrt
3+
import math
44

55
# 3rd-party
66
try:
@@ -23,7 +23,6 @@ class cupy(object):
2323

2424
from typing import Optional
2525

26-
2726
def _run_numpy(data, azimuth=225, angle_altitude=25):
2827
azimuth = 360.0 - azimuth
2928
x, y = np.gradient(data)
@@ -48,61 +47,58 @@ def _run_dask_numpy(data, azimuth, angle_altitude):
4847
meta=np.array(()))
4948
return out
5049

50+
def _run_dask_cupy(data, azimuth, angle_altitude):
51+
msg = 'Upstream bug in dask prevents cupy backed arrays'
52+
raise NotImplementedError(msg)
5153

5254
@cuda.jit
53-
def _gpu_calc(x, y, out):
54-
i, j = cuda.grid(2)
55-
if i < out.shape[0] and j < out.shape[1]:
56-
out[i, j] = sqrt(x[i, j] * x[i, j] + y[i, j] * y[i, j])
57-
58-
59-
@cuda.jit
60-
def _gpu_cos_part(cos_altituderad, cos_slope, cos_aspect, out):
55+
def _gpu_calc_numba(data, output, sin_altituderad, cos_altituderad, azimuthrad):
6156
i, j = cuda.grid(2)
62-
if i < out.shape[0] and j < out.shape[1]:
63-
out[i, j] = cos_altituderad * cos_slope[i, j] * cos_aspect[i, j]
57+
if i>0 and i < data.shape[0]-1 and j>0 and j < data.shape[1]-1:
58+
x = (data[i+1, j]-data[i-1, j])/2
59+
y = (data[i, j+1]-data[i, j-1])/2
6460

61+
len = math.sqrt(x*x + y*y)
62+
slope = 1.57079632679 - math.atan(len)
63+
aspect = (azimuthrad - 1.57079632679) - math.atan2(-x, y)
6564

66-
def _run_cupy(data, azimuth, angle_altitude):
67-
x, y = np.gradient(data.get())
68-
x = cupy.asarray(x, dtype=x.dtype)
69-
y = cupy.asarray(y, dtype=y.dtype)
70-
65+
sin_slope = math.sin(slope)
66+
sin_part = sin_altituderad * sin_slope
67+
68+
cos_aspect = math.cos(aspect)
69+
cos_slope = math.cos(slope)
70+
cos_part = cos_altituderad * cos_slope * cos_aspect
71+
72+
res = sin_part + cos_part
73+
output[i, j] = (res + 1) * 0.5
74+
75+
def calc_dims(shape):
76+
threadsperblock = (32,32)
77+
blockspergrid = (
78+
(shape[0] + (threadsperblock[0] - 1)) // threadsperblock[0],
79+
(shape[1] + (threadsperblock[1] - 1)) // threadsperblock[1]
80+
)
81+
return blockspergrid, threadsperblock
82+
83+
def _run_cupy(d_data, azimuth, angle_altitude):
84+
# Precompute constant values shared between all threads
7185
altituderad = angle_altitude * np.pi / 180.
7286
sin_altituderad = np.sin(altituderad)
7387
cos_altituderad = np.cos(altituderad)
74-
75-
griddim, blockdim = cuda_args(data.shape)
76-
arctan_part = cupy.empty(data.shape, dtype='f4')
77-
_gpu_calc[griddim, blockdim](x, y, arctan_part)
78-
79-
slope = np.pi / 2. - np.arctan(arctan_part)
80-
sin_slope = np.sin(slope)
81-
sin_part = sin_altituderad * sin_slope
82-
8388
azimuthrad = (360.0 - azimuth) * np.pi / 180.
84-
aspect = (azimuthrad - np.pi / 2.) - np.arctan2(-x, y)
85-
cos_aspect = np.cos(aspect)
86-
cos_slope = np.cos(slope)
87-
88-
cos_part = cupy.empty(data.shape, dtype='f4')
89-
_gpu_cos_part[griddim, blockdim](cos_altituderad, cos_slope,
90-
cos_aspect, cos_part)
91-
shaded = sin_part + cos_part
92-
out = (shaded + 1) / 2
93-
94-
out[0, :] = cupy.nan
95-
out[-1, :] = cupy.nan
96-
out[:, 0] = cupy.nan
97-
out[:, -1] = cupy.nan
98-
99-
return out
10089

90+
# Allocate output buffer and launch kernel with appropriate dimensions
91+
output = cupy.empty(d_data.shape, np.float32)
92+
griddim, blockdim = calc_dims(d_data.shape)
93+
_gpu_calc_numba[griddim, blockdim](d_data, output, sin_altituderad, cos_altituderad, azimuthrad)
10194

102-
def _run_dask_cupy(data, azimuth, angle_altitude):
103-
msg = 'Upstream bug in dask prevents cupy backed arrays'
104-
raise NotImplementedError(msg)
95+
# Fill borders with nans.
96+
output[ 0, :] = cupy.nan
97+
output[-1, :] = cupy.nan
98+
output[:, 0] = cupy.nan
99+
output[:, -1] = cupy.nan
105100

101+
return output
106102

107103
def hillshade(agg: xr.DataArray,
108104
azimuth: int = 225,
@@ -221,7 +217,7 @@ def hillshade(agg: xr.DataArray,
221217
if isinstance(agg.data, np.ndarray):
222218
out = _run_numpy(agg.data, azimuth, angle_altitude)
223219

224-
# cupy case
220+
# cupy/numba case
225221
elif has_cuda() and isinstance(agg.data, cupy.ndarray):
226222
out = _run_cupy(agg.data, azimuth, angle_altitude)
227223

0 commit comments

Comments
 (0)