makepath · ianthomas23 · Dec 1, 2021 · Nov 30, 2021 · Dec 1, 2021
diff --git a/setup.py b/setup.py
@@ -39,6 +39,9 @@
         'noise >=1.2.2',
     ],
     'examples': examples,
+    'optional': [
+        'rtxpy',
+    ],
 }
 
 # additional doc dependencies may be needed

diff --git a/xrspatial/gpu_rtx/__init__.py b/xrspatial/gpu_rtx/__init__.py
@@ -0,0 +1,11 @@
+from ..utils import has_cuda, has_cupy
+
+
+try:
+    from rtxpy import RTX
+except ImportError:
+    RTX = None
+
+
+def has_rtx():
+    return has_cupy() and has_cuda() and RTX is not None
diff --git a/xrspatial/gpu_rtx/cuda_utils.py b/xrspatial/gpu_rtx/cuda_utils.py
@@ -0,0 +1,47 @@
+import numba as nb
+import numpy as np
+
+
+@nb.cuda.jit(device=True)
+def add(a, b):
+    return float3(a[0]+b[0], a[1]+b[1], a[2]+b[2])
+
+
+@nb.cuda.jit(device=True)
+def diff(a, b):
+    return float3(a[0]-b[0], a[1]-b[1], a[2]-b[2])
+
+
+@nb.cuda.jit(device=True)
+def dot(a, b):
+    return a[0]*b[0] + a[1]*b[1] + a[2]*b[2]
+
+
+@nb.cuda.jit(device=True)
+def float3(a, b, c):
+    return (np.float32(a), np.float32(b), np.float32(c))
+
+
+@nb.cuda.jit(device=True)
+def invert(a):
+    return float3(-a[0], -a[1], -a[2])
+
+
+@nb.cuda.jit(device=True)
+def mix(a, b, k):
+    return add(mul(a, k), mul(b, 1-k))
+
+
+@nb.cuda.jit(device=True)
+def make_float3(a, offset):
+    return float3(a[offset], a[offset+1], a[offset+2])
+
+
+@nb.cuda.jit(device=True)
+def mul(a, b):
+    return float3(a[0]*b, a[1]*b, a[2]*b)
+
+
+@nb.cuda.jit(device=True)
+def mult_color(a, b):
+    return float3(a[0]*b[0], a[1]*b[1], a[2]*b[2])
diff --git a/xrspatial/gpu_rtx/mesh_utils.py b/xrspatial/gpu_rtx/mesh_utils.py
@@ -0,0 +1,135 @@
+import numba as nb
+import numpy as np
+import cupy
+
+
+@nb.cuda.jit
+def _triangulate_terrain_kernel(verts, triangles, data, H, W, scale, stride):
+    global_id = stride + nb.cuda.grid(1)
+    if global_id < W*H:
+        h = global_id // W
+        w = global_id % W
+        mesh_map_index = h * W + w
+
+        val = data[h, -w]
+
+        offset = 3*mesh_map_index
+        verts[offset] = w
+        verts[offset+1] = h
+        verts[offset+2] = val * scale
+
+        if w != W - 1 and h != H - 1:
+            offset = 6*(h * (W-1) + w)
+            triangles[offset+0] = np.int32(mesh_map_index + W)
+            triangles[offset+1] = np.int32(mesh_map_index + W + 1)
+            triangles[offset+2] = np.int32(mesh_map_index)
+            triangles[offset+3] = np.int32(mesh_map_index + W + 1)
+            triangles[offset+4] = np.int32(mesh_map_index + 1)
+            triangles[offset+5] = np.int32(mesh_map_index)
+
+
+@nb.njit(parallel=True)
+def triangulate_cpu(verts, triangles, data, H, W, scale):
+    for h in nb.prange(H):
+        for w in range(W):
+            mesh_map_index = h * W + w
+
+            val = data[h, w]
+
+            offset = 3*mesh_map_index
+            verts[offset] = w
+            verts[offset+1] = h
+            verts[offset+2] = val * scale
+
+            if w != W - 1 and h != H - 1:
+                offset = 6*(h*(W-1) + w)
+                triangles[offset+0] = np.int32(mesh_map_index + W)
+                triangles[offset+1] = np.int32(mesh_map_index + W+1)
+                triangles[offset+2] = np.int32(mesh_map_index)
+                triangles[offset+3] = np.int32(mesh_map_index + W+1)
+                triangles[offset+4] = np.int32(mesh_map_index + 1)
+                triangles[offset+5] = np.int32(mesh_map_index)
+
+
+def triangulate_terrain(verts, triangles, terrain, scale=1):
+    H, W = terrain.shape
+    if isinstance(terrain.data, np.ndarray):
+        triangulate_cpu(verts, triangles, terrain.data, H, W, scale)
+    if isinstance(terrain.data, cupy.ndarray):
+        job_size = H*W
+        blockdim = 1024
+        griddim = (job_size + blockdim - 1) // 1024
+        d = 100
+        offset = 0
+        while job_size > 0:
+            batch = min(d, griddim)
+            _triangulate_terrain_kernel[batch, blockdim](
+                verts, triangles, terrain.data, H, W, scale, offset)
+            offset += batch*blockdim
+            job_size -= batch*blockdim
+    return 0
+
+
+@nb.jit(nopython=True)
+def _fill_contents(content, verts, triangles, num_tris):
+    v = np.empty(12, np.float32)
+    pad = np.zeros(2, np.int8)
+    offset = 0
+    for i in range(num_tris):
+        t0 = triangles[3*i+0]
+        t1 = triangles[3*i+1]
+        t2 = triangles[3*i+2]
+        v[3*0+0] = 0
+        v[3*0+1] = 0
+        v[3*0+2] = 0
+        v[3*1+0] = verts[3*t0+0]
+        v[3*1+1] = verts[3*t0+1]
+        v[3*1+2] = verts[3*t0+2]
+        v[3*2+0] = verts[3*t1+0]
+        v[3*2+1] = verts[3*t1+1]
+        v[3*2+2] = verts[3*t1+2]
+        v[3*3+0] = verts[3*t2+0]
+        v[3*3+1] = verts[3*t2+1]
+        v[3*3+2] = verts[3*t2+2]
+
+        offset = 50*i
+        content[offset:offset+48] = v.view(np.uint8)
+        content[offset+48:offset+50] = pad
+
+
+def write(name, verts, triangles):
+    """
+    Save a triangulated raster to a standard STL file.
+    Windows has a default STL viewer and probably all 3D viewers have native
+    support for it because of its simplicity. Can be used to verify the
+    correctness of the algorithm or to visualize the mesh to get a notion of
+    the size/complexity etc.
+    @param name - The name of the mesh file we're going to save.
+                  Should end in .stl
+    @param verts - A numpy array containing all the vertices of the mesh.
+                   Format is 3 float32 per vertex (vertex buffer)
+    @param triangles - A numpy array containing all the triangles of the mesh.
+                       Format is 3 int32 per triangle (index buffer)
+    """
+    ib = triangles
+    vb = verts
+    if isinstance(ib, cupy.ndarray):
+        ib = cupy.asnumpy(ib)
+    if isinstance(vb, cupy.ndarray):
+        vb = cupy.asnumpy(vb)
+
+    header = np.zeros(80, np.uint8)
+    nf = np.empty(1, np.uint32)
+    num_tris = triangles.shape[0] // 3
+    nf[0] = num_tris
+    f = open(name, 'wb')
+    f.write(header)
+    f.write(nf)
+
+    # size of 1 triangle in STL is 50 bytes
+    # 12 floats (each 4 bytes) for a total of 48
+    # And additional 2 bytes for padding
+    content = np.empty(num_tris*(50), np.uint8)
+    _fill_contents(content, vb, ib, num_tris)
+    f.write(content)
+    f.close()