Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@
'noise >=1.2.2',
],
'examples': examples,
'optional': [
'rtxpy',
],
}

# additional doc dependencies may be needed
Expand Down
11 changes: 11 additions & 0 deletions xrspatial/gpu_rtx/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from ..utils import has_cuda, has_cupy


try:
from rtxpy import RTX
except ImportError:
RTX = None


def has_rtx():
return has_cupy() and has_cuda() and RTX is not None
47 changes: 47 additions & 0 deletions xrspatial/gpu_rtx/cuda_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import numba as nb
import numpy as np


@nb.cuda.jit(device=True)
def add(a, b):
return float3(a[0]+b[0], a[1]+b[1], a[2]+b[2])


@nb.cuda.jit(device=True)
def diff(a, b):
return float3(a[0]-b[0], a[1]-b[1], a[2]-b[2])


@nb.cuda.jit(device=True)
def dot(a, b):
return a[0]*b[0] + a[1]*b[1] + a[2]*b[2]


@nb.cuda.jit(device=True)
def float3(a, b, c):
return (np.float32(a), np.float32(b), np.float32(c))


@nb.cuda.jit(device=True)
def invert(a):
return float3(-a[0], -a[1], -a[2])


@nb.cuda.jit(device=True)
def mix(a, b, k):
return add(mul(a, k), mul(b, 1-k))


@nb.cuda.jit(device=True)
def make_float3(a, offset):
return float3(a[offset], a[offset+1], a[offset+2])


@nb.cuda.jit(device=True)
def mul(a, b):
return float3(a[0]*b, a[1]*b, a[2]*b)


@nb.cuda.jit(device=True)
def mult_color(a, b):
return float3(a[0]*b[0], a[1]*b[1], a[2]*b[2])
135 changes: 135 additions & 0 deletions xrspatial/gpu_rtx/mesh_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
import numba as nb
import numpy as np
import cupy


@nb.cuda.jit
def _triangulate_terrain_kernel(verts, triangles, data, H, W, scale, stride):
global_id = stride + nb.cuda.grid(1)
if global_id < W*H:
h = global_id // W
w = global_id % W
mesh_map_index = h * W + w

val = data[h, -w]

offset = 3*mesh_map_index
verts[offset] = w
verts[offset+1] = h
verts[offset+2] = val * scale

if w != W - 1 and h != H - 1:
offset = 6*(h * (W-1) + w)
triangles[offset+0] = np.int32(mesh_map_index + W)
triangles[offset+1] = np.int32(mesh_map_index + W + 1)
triangles[offset+2] = np.int32(mesh_map_index)
triangles[offset+3] = np.int32(mesh_map_index + W + 1)
triangles[offset+4] = np.int32(mesh_map_index + 1)
triangles[offset+5] = np.int32(mesh_map_index)


@nb.njit(parallel=True)
def triangulate_cpu(verts, triangles, data, H, W, scale):
for h in nb.prange(H):
for w in range(W):
mesh_map_index = h * W + w

val = data[h, w]

offset = 3*mesh_map_index
verts[offset] = w
verts[offset+1] = h
verts[offset+2] = val * scale

if w != W - 1 and h != H - 1:
offset = 6*(h*(W-1) + w)
triangles[offset+0] = np.int32(mesh_map_index + W)
triangles[offset+1] = np.int32(mesh_map_index + W+1)
triangles[offset+2] = np.int32(mesh_map_index)
triangles[offset+3] = np.int32(mesh_map_index + W+1)
triangles[offset+4] = np.int32(mesh_map_index + 1)
triangles[offset+5] = np.int32(mesh_map_index)


def triangulate_terrain(verts, triangles, terrain, scale=1):
H, W = terrain.shape
if isinstance(terrain.data, np.ndarray):
triangulate_cpu(verts, triangles, terrain.data, H, W, scale)
if isinstance(terrain.data, cupy.ndarray):
job_size = H*W
blockdim = 1024
griddim = (job_size + blockdim - 1) // 1024
d = 100
offset = 0
while job_size > 0:
batch = min(d, griddim)
_triangulate_terrain_kernel[batch, blockdim](
verts, triangles, terrain.data, H, W, scale, offset)
offset += batch*blockdim
job_size -= batch*blockdim
return 0


@nb.jit(nopython=True)
def _fill_contents(content, verts, triangles, num_tris):
v = np.empty(12, np.float32)
pad = np.zeros(2, np.int8)
offset = 0
for i in range(num_tris):
t0 = triangles[3*i+0]
t1 = triangles[3*i+1]
t2 = triangles[3*i+2]
v[3*0+0] = 0
v[3*0+1] = 0
v[3*0+2] = 0
v[3*1+0] = verts[3*t0+0]
v[3*1+1] = verts[3*t0+1]
v[3*1+2] = verts[3*t0+2]
v[3*2+0] = verts[3*t1+0]
v[3*2+1] = verts[3*t1+1]
v[3*2+2] = verts[3*t1+2]
v[3*3+0] = verts[3*t2+0]
v[3*3+1] = verts[3*t2+1]
v[3*3+2] = verts[3*t2+2]

offset = 50*i
content[offset:offset+48] = v.view(np.uint8)
content[offset+48:offset+50] = pad


def write(name, verts, triangles):
"""
Save a triangulated raster to a standard STL file.
Windows has a default STL viewer and probably all 3D viewers have native
support for it because of its simplicity. Can be used to verify the
correctness of the algorithm or to visualize the mesh to get a notion of
the size/complexity etc.
@param name - The name of the mesh file we're going to save.
Should end in .stl
@param verts - A numpy array containing all the vertices of the mesh.
Format is 3 float32 per vertex (vertex buffer)
@param triangles - A numpy array containing all the triangles of the mesh.
Format is 3 int32 per triangle (index buffer)
"""
ib = triangles
vb = verts
if isinstance(ib, cupy.ndarray):
ib = cupy.asnumpy(ib)
if isinstance(vb, cupy.ndarray):
vb = cupy.asnumpy(vb)

header = np.zeros(80, np.uint8)
nf = np.empty(1, np.uint32)
num_tris = triangles.shape[0] // 3
nf[0] = num_tris
f = open(name, 'wb')
f.write(header)
f.write(nf)

# size of 1 triangle in STL is 50 bytes
# 12 floats (each 4 bytes) for a total of 48
# And additional 2 bytes for padding
content = np.empty(num_tris*(50), np.uint8)
_fill_contents(content, vb, ib, num_tris)
f.write(content)
f.close()
Loading