11# std lib
22from functools import partial
3- from math import sqrt
3+ import math
44
55# 3rd-party
66try :
@@ -23,7 +23,6 @@ class cupy(object):
2323
2424from typing import Optional
2525
26-
2726def _run_numpy (data , azimuth = 225 , angle_altitude = 25 ):
2827 azimuth = 360.0 - azimuth
2928 x , y = np .gradient (data )
@@ -48,61 +47,58 @@ def _run_dask_numpy(data, azimuth, angle_altitude):
4847 meta = np .array (()))
4948 return out
5049
50+ def _run_dask_cupy (data , azimuth , angle_altitude ):
51+ msg = 'Upstream bug in dask prevents cupy backed arrays'
52+ raise NotImplementedError (msg )
5153
5254@cuda .jit
53- def _gpu_calc (x , y , out ):
54- i , j = cuda .grid (2 )
55- if i < out .shape [0 ] and j < out .shape [1 ]:
56- out [i , j ] = sqrt (x [i , j ] * x [i , j ] + y [i , j ] * y [i , j ])
57-
58-
59- @cuda .jit
60- def _gpu_cos_part (cos_altituderad , cos_slope , cos_aspect , out ):
55+ def _gpu_calc_numba (data , output , sin_altituderad , cos_altituderad , azimuthrad ):
6156 i , j = cuda .grid (2 )
62- if i < out .shape [0 ] and j < out .shape [1 ]:
63- out [i , j ] = cos_altituderad * cos_slope [i , j ] * cos_aspect [i , j ]
57+ if i > 0 and i < data .shape [0 ]- 1 and j > 0 and j < data .shape [1 ]- 1 :
58+ x = (data [i + 1 , j ]- data [i - 1 , j ])/ 2
59+ y = (data [i , j + 1 ]- data [i , j - 1 ])/ 2
6460
61+ len = math .sqrt (x * x + y * y )
62+ slope = 1.57079632679 - math .atan (len )
63+ aspect = (azimuthrad - 1.57079632679 ) - math .atan2 (- x , y )
6564
66- def _run_cupy (data , azimuth , angle_altitude ):
67- x , y = np .gradient (data .get ())
68- x = cupy .asarray (x , dtype = x .dtype )
69- y = cupy .asarray (y , dtype = y .dtype )
70-
65+ sin_slope = math .sin (slope )
66+ sin_part = sin_altituderad * sin_slope
67+
68+ cos_aspect = math .cos (aspect )
69+ cos_slope = math .cos (slope )
70+ cos_part = cos_altituderad * cos_slope * cos_aspect
71+
72+ res = sin_part + cos_part
73+ output [i , j ] = (res + 1 ) * 0.5
74+
75+ def calc_dims (shape ):
76+ threadsperblock = (32 ,32 )
77+ blockspergrid = (
78+ (shape [0 ] + (threadsperblock [0 ] - 1 )) // threadsperblock [0 ],
79+ (shape [1 ] + (threadsperblock [1 ] - 1 )) // threadsperblock [1 ]
80+ )
81+ return blockspergrid , threadsperblock
82+
83+ def _run_cupy (d_data , azimuth , angle_altitude ):
84+ # Precompute constant values shared between all threads
7185 altituderad = angle_altitude * np .pi / 180.
7286 sin_altituderad = np .sin (altituderad )
7387 cos_altituderad = np .cos (altituderad )
74-
75- griddim , blockdim = cuda_args (data .shape )
76- arctan_part = cupy .empty (data .shape , dtype = 'f4' )
77- _gpu_calc [griddim , blockdim ](x , y , arctan_part )
78-
79- slope = np .pi / 2. - np .arctan (arctan_part )
80- sin_slope = np .sin (slope )
81- sin_part = sin_altituderad * sin_slope
82-
8388 azimuthrad = (360.0 - azimuth ) * np .pi / 180.
84- aspect = (azimuthrad - np .pi / 2. ) - np .arctan2 (- x , y )
85- cos_aspect = np .cos (aspect )
86- cos_slope = np .cos (slope )
87-
88- cos_part = cupy .empty (data .shape , dtype = 'f4' )
89- _gpu_cos_part [griddim , blockdim ](cos_altituderad , cos_slope ,
90- cos_aspect , cos_part )
91- shaded = sin_part + cos_part
92- out = (shaded + 1 ) / 2
93-
94- out [0 , :] = cupy .nan
95- out [- 1 , :] = cupy .nan
96- out [:, 0 ] = cupy .nan
97- out [:, - 1 ] = cupy .nan
98-
99- return out
10089
90+ # Allocate output buffer and launch kernel with appropriate dimensions
91+ output = cupy .empty (d_data .shape , np .float32 )
92+ griddim , blockdim = calc_dims (d_data .shape )
93+ _gpu_calc_numba [griddim , blockdim ](d_data , output , sin_altituderad , cos_altituderad , azimuthrad )
10194
102- def _run_dask_cupy (data , azimuth , angle_altitude ):
103- msg = 'Upstream bug in dask prevents cupy backed arrays'
104- raise NotImplementedError (msg )
95+ # Fill borders with nans.
96+ output [ 0 , :] = cupy .nan
97+ output [- 1 , :] = cupy .nan
98+ output [:, 0 ] = cupy .nan
99+ output [:, - 1 ] = cupy .nan
105100
101+ return output
106102
107103def hillshade (agg : xr .DataArray ,
108104 azimuth : int = 225 ,
@@ -221,7 +217,7 @@ def hillshade(agg: xr.DataArray,
221217 if isinstance (agg .data , np .ndarray ):
222218 out = _run_numpy (agg .data , azimuth , angle_altitude )
223219
224- # cupy case
220+ # cupy/numba case
225221 elif has_cuda () and isinstance (agg .data , cupy .ndarray ):
226222 out = _run_cupy (agg .data , azimuth , angle_altitude )
227223
0 commit comments