diff --git a/README.md b/README.md index 149484a..a86c101 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Python-OptiX -Python wrapper for the OptiX 7.3 raytracing engine. +Python wrapper for the OptiX 7 raytracing engine. Python-OptiX wraps the OptiX C++ API using Cython and provides a simplified interface to the original C-like API using mainly the @@ -12,16 +12,18 @@ Only Linux is supported at the moment. ### OptiX Versions -Python-OptiX currently only supports the most recent (7.3.0) release of OptiX +Python-OptiX currently supports the OptiX releases 7.3.0 and 7.4.0 ## Installation ### Dependencies Install a recent version of the [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads) -and the [OptiX 7.3.0 SDK](https://developer.nvidia.com/optix/downloads/7.3.0/linux64) +and the [OptiX 7.4.0 SDK](https://developer.nvidia.com/optix/downloads/7.4.0/linux64-x86_64) -Make sure the CUDA header files are installed as well +Note: The older [OptiX 7.3.0 SDK](https://developer.nvidia.com/optix/downloads/7.4.0/linux64-x86_64) version is supported as well. + +Make sure the CUDA header files are installed as well. Add the locations of CUDA and OptiX to the system `PATH` variable if necessary. diff --git a/examples/compile_with_tasks.py b/examples/compile_with_tasks.py new file mode 100644 index 0000000..9e967ff --- /dev/null +++ b/examples/compile_with_tasks.py @@ -0,0 +1,75 @@ +import concurrent.futures +import optix as ox +import argparse +import logging +import sys +from concurrent.futures import ThreadPoolExecutor +import time + +logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) +log = logging.getLogger() + + +if __name__ == "__main__": + if ox.optix_version()[1] < 4: + raise NotImplementedError("Parallel tasks are not implemented in optix versions < 7.3.") + + parser = argparse.ArgumentParser("Compile OptiX modules using parallel tasks") + parser.add_argument('file', nargs=1, help="The input file (.ptx or .cu) to compile") + parser.add_argument('-na', '--num-attributes', type=int, default=2, required=False, + help="Number of attribute values (up to 8, default 2)") + parser.add_argument('-npv', '--num-payload-values', type=int, default=2, required=False, + help=f"Number of payload values (up to {ox.PipelineCompileOptions.DEFAULT_MAX_PAYLOAD_VALUE_COUNT}, default 2)") + parser.add_argument('-npt', '--num-payload-types', type=int, default=1, required=False, + help=f"Number of payload types (up to {ox.ModuleCompileOptions.DEFAULT_MAX_PAYLOAD_TYPE_COUNT}, default 1)") + parser.add_argument('-ni', '--num-iters', type=int, default=1, required=False, + help="Number of iterations to compile. > 1 disables disk cache (default 1)") + parser.add_argument('-dt', '--disable-tasks', action='store_true', required=False, + help="Disable compilation with tasks (default enabled)") + parser.add_argument('-nt', '--num-threads', type=int, default=1, required=False, + help="Number of threads (default 1)") + parser.add_argument('-mt', '--max-num-tasks', type=int, default=2, required=False, + help="Maximum number of additional tasks (default 2)") + + args = parser.parse_args() + + logger = ox.Logger(log) + ctx = ox.DeviceContext(validation_mode=True, log_callback_function=logger, log_callback_level=3) + + if args.num_iters > 1: + ctx.cache_enabled = False + + # compile the file content to ptx in case a .cu file is given + ptx = ox.Module.compile_cuda_ptx(args.file[0]) + + pipeline_options = ox.PipelineCompileOptions(num_payload_values=0, + num_attribute_values=args.num_attributes) + + payload_semantics = [ox.PayloadSemantics.DEFAULT] * args.num_payload_values + payload_types = [payload_semantics] * args.num_payload_types + + compile_opts = ox.ModuleCompileOptions(payload_types=payload_types) + + use_tasks = not args.disable_tasks + + if use_tasks: + tic = time.time() + with ThreadPoolExecutor(max_workers=args.num_threads) as executor: + for i in range(args.num_iters): + module, task = ox.Module.create_as_task(ctx, ptx, module_compile_options=compile_opts, pipeline_compile_options=pipeline_options) + task_futures = {executor.submit(task.execute, args.max_num_tasks)} + while task_futures: + done, not_done = concurrent.futures.wait(task_futures, timeout=0.25, return_when=concurrent.futures.FIRST_COMPLETED) + for future in done: + new_tasks = future.result() + if len(new_tasks) > 0: + task_futures.update({executor.submit(t.execute, args.max_num_tasks) for t in new_tasks}) + task_futures.remove(future) + + # wait for the executor to finish here + print("Overall run time with tasks", time.time()-tic) + else: + tic = time.time() + for i in range(args.num_iters): + module = ox.Module(ctx, ptx, module_compile_options=compile_opts, pipeline_compile_options=pipeline_options) + print("Overall run time without tasks", time.time()-tic) \ No newline at end of file diff --git a/examples/dynamic_geometry.py b/examples/dynamic_geometry.py index 0507183..8980b7f 100644 --- a/examples/dynamic_geometry.py +++ b/examples/dynamic_geometry.py @@ -220,6 +220,7 @@ def display_subframe(output_buffer, gl_display, window): framebuf_res_x, framebuf_res_y, output_buffer.get_pbo() ) + def init_camera_state(state): camera = state.camera camera.eye = (0, 1, -20) @@ -233,12 +234,14 @@ def init_camera_state(state): trackball.set_reference_frame([1,0,0], [0,0,1], [0,1,0]) trackball.reinitialize_orientation_from_camera() + def create_context(state): logger = ox.Logger(log) ctx = ox.DeviceContext(validation_mode=False, log_callback_function=logger, log_callback_level=4) ctx.cache_enabled = False state.ctx = ctx + def generate_animated_vertices(out_vertices, animation_mode, time, width, height): threads_per_block = 128 num_blocks = (width*height + threads_per_block - 1) // threads_per_block @@ -251,12 +254,13 @@ def generate_animated_vertices(out_vertices, animation_mode, time, width, height def launch_generate_animated_vertices(state, animation_mode): generate_animated_vertices(state.d_temp_vertices, animation_mode, state.time, g_tessellation_resolution, g_tessellation_resolution) + def update_mesh_accel(state): # first sphere is static # second sphere moves by updating its transform matrix - transform = state.ias_build_input.get_transform_view(1) - transform[1,-1] = np.sin(4*state.time) + transform = state.ias_build_input.view_instance_transform(1) + transform[1, -1] = np.sin(4*state.time) # third sphere deforms launch_generate_animated_vertices(state, AnimationMode.DEFORM) @@ -270,13 +274,14 @@ def update_mesh_accel(state): state.last_exploding_sphere_rebuild_time = state.time state.exploding_gas = ox.AccelerationStructure(state.ctx, state.gas_build_input, compact=True, allow_update=True, random_vertex_access=True) - state.ias_build_input.instances[3].update_traversable(state.exploding_gas) + state.ias_build_input[3].traversable = state.exploding_gas state.ias_build_input.update_instance(3) else: state.exploding_gas.update(state.gas_build_input) state.ias.update(state.ias_build_input) + def build_vertex_generation_kernel(state): cuda_source = os.path.join(script_dir, 'cuda', 'dynamic_geometry_vertex_generation.cu') example_include_path = os.path.dirname(cuda_source) @@ -289,6 +294,7 @@ def build_vertex_generation_kernel(state): state.generate_vertices_kernel = cp.RawKernel(code=code, backend='nvrtc', options=build_flags, name='generate_vertices') + def build_mesh_accel(state): # Allocate temporary space for vertex generation. # The same memory space is reused for generating the deformed and exploding vertices before updates. @@ -335,9 +341,10 @@ def create_module(state): else: exception_flags=ox.ExceptionFlags.NONE + print("Triangle value", ox.PrimitiveTypeFlags.TRIANGLE.value) pipeline_opts = ox.PipelineCompileOptions( uses_motion_blur=False, - uses_primitive_type_flags = ox.PrimitiveTypeFlags.TRIANGLE, + uses_primitive_type_flags =ox.PrimitiveTypeFlags.TRIANGLE, traversable_graph_flags=ox.TraversableGraphFlags.ALLOW_SINGLE_LEVEL_INSTANCING, exception_flags=exception_flags, num_payload_values=3, @@ -347,7 +354,7 @@ def create_module(state): compile_opts = ox.ModuleCompileOptions( max_register_count=ox.ModuleCompileOptions.DEFAULT_MAX_REGISTER_COUNT, opt_level=ox.CompileOptimizationLevel.DEFAULT, - debug_level=ox.CompileDebugLevel.LINEINFO) + debug_level=ox.CompileDebugLevel.MODERATE) cuda_source = os.path.join(script_dir, 'cuda', 'dynamic_geometry.cu') state.module = ox.Module(state.ctx, cuda_source, compile_opts, pipeline_opts) @@ -364,7 +371,7 @@ def create_pipeline(state): program_grps = [state.raygen_grp, state.miss_grp, state.hit_grp] link_opts = ox.PipelineLinkOptions(max_trace_depth=1, - debug_level=ox.CompileDebugLevel.LINEINFO) + debug_level=ox.CompileDebugLevel.MODERATE) pipeline = ox.Pipeline(state.ctx, compile_options=state.pipeline_opts, diff --git a/examples/dynamic_materials.py b/examples/dynamic_materials.py index cf45988..9a09964 100644 --- a/examples/dynamic_materials.py +++ b/examples/dynamic_materials.py @@ -194,7 +194,7 @@ def create_module(state): compile_opts = ox.ModuleCompileOptions( max_register_count=ox.ModuleCompileOptions.DEFAULT_MAX_REGISTER_COUNT, opt_level=ox.CompileOptimizationLevel.DEFAULT, - debug_level=ox.CompileDebugLevel.LINEINFO) + debug_level=ox.CompileDebugLevel.MODERATE) source = os.path.join(script_dir, 'cuda', 'dynamic_materials.cu') state.module = ox.Module(state.ctx, source, compile_opts, pipeline_opts) diff --git a/examples/hello.py b/examples/hello.py index 0370d2b..e79fdaa 100644 --- a/examples/hello.py +++ b/examples/hello.py @@ -1,20 +1,15 @@ -import os, sys, logging - import optix as ox import cupy as cp import numpy as np - from PIL import Image, ImageOps - -script_dir = os.path.dirname(os.path.abspath(__file__)) - +import logging +import sys logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) log = logging.getLogger() def create_module(ctx, pipeline_opts): - compile_opts = ox.ModuleCompileOptions(debug_level=ox.CompileDebugLevel.LINEINFO) - source = os.path.join(script_dir, 'cuda', 'hello.cu') - module = ox.Module(ctx, source, compile_opts, pipeline_opts) + compile_opts = ox.ModuleCompileOptions(debug_level=ox.CompileDebugLevel.FULL, opt_level=ox.CompileOptimizationLevel.LEVEL_0) + module = ox.Module(ctx, 'cuda/hello.cu', compile_opts, pipeline_opts) return module diff --git a/examples/spheres.py b/examples/spheres.py index 20acf42..a7776e9 100644 --- a/examples/spheres.py +++ b/examples/spheres.py @@ -1,13 +1,9 @@ -import os, sys, logging - +import optix as ox import cupy as cp import numpy as np -import optix as ox - from PIL import Image, ImageOps - -script_dir = os.path.dirname(os.path.abspath(__file__)) - +import logging +import sys logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) log = logging.getLogger() img_size = (1024, 768) @@ -26,9 +22,8 @@ def create_acceleration_structure(ctx, bboxes): def create_module(ctx, pipeline_opts): - compile_opts = ox.ModuleCompileOptions(debug_level=ox.CompileDebugLevel.LINEINFO) - source = os.path.join(script_dir, 'cuda', 'spheres.cu') - module = ox.Module(ctx, source, compile_opts, pipeline_opts) + compile_opts = ox.ModuleCompileOptions(debug_level=ox.CompileDebugLevel.FULL, opt_level=ox.CompileOptimizationLevel.LEVEL_0) + module = ox.Module(ctx, 'cuda/spheres.cu', compile_opts, pipeline_opts) return module diff --git a/examples/triangle.py b/examples/triangle.py index d3d9159..b9d7bb3 100644 --- a/examples/triangle.py +++ b/examples/triangle.py @@ -1,13 +1,8 @@ -import os - +import optix as ox import cupy as cp import numpy as np -import optix as ox - from PIL import Image, ImageOps -script_dir = os.path.dirname(os.path.abspath(__file__)) - img_size = (1024, 768) # use a regular function for logging @@ -23,9 +18,8 @@ def create_acceleration_structure(ctx, vertices): def create_module(ctx, pipeline_opts): - compile_opts = ox.ModuleCompileOptions(debug_level=ox.CompileDebugLevel.LINEINFO) - source = os.path.join(script_dir, 'cuda', 'triangle.cu') - module = ox.Module(ctx, source, compile_opts, pipeline_opts) + compile_opts = ox.ModuleCompileOptions(debug_level=ox.CompileDebugLevel.FULL, opt_level=ox.CompileOptimizationLevel.LEVEL_0) + module = ox.Module(ctx, 'cuda/triangle.cu', compile_opts, pipeline_opts) return module diff --git a/optix/__init__.py b/optix/__init__.py index 5a08d65..39b95b7 100644 --- a/optix/__init__.py +++ b/optix/__init__.py @@ -1,6 +1,6 @@ -from .context import DeviceContext +from .context import DeviceContext, optix_version from .build import * -from .module import Module, ModuleCompileOptions, CompileOptimizationLevel, CompileDebugLevel +from .module import Module, ModuleCompileOptions, CompileOptimizationLevel, CompileDebugLevel, PayloadSemantics, Task from .program_group import ProgramGroup from .struct import SbtRecord, LaunchParamsRecord from .shader_binding_table import ShaderBindingTable diff --git a/optix/build.pxd b/optix/build.pxd index 854612a..895d613 100644 --- a/optix/build.pxd +++ b/optix/build.pxd @@ -6,8 +6,6 @@ from libc.stdint cimport uintptr_t cdef extern from "optix.h" nogil: - # build functions and structs - cdef enum OptixBuildFlags: OPTIX_BUILD_FLAG_NONE, OPTIX_BUILD_FLAG_ALLOW_UPDATE, @@ -72,28 +70,56 @@ cdef extern from "optix.h" nogil: unsigned int sbtIndexOffsetStrideInBytes unsigned int primitiveIndexOffset - cdef enum OptixPrimitiveType: - OPTIX_PRIMITIVE_TYPE_CUSTOM, - OPTIX_PRIMITIVE_TYPE_ROUND_QUADRATIC_BSPLINE, - OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE, - OPTIX_PRIMITIVE_TYPE_ROUND_LINEAR, - OPTIX_PRIMITIVE_TYPE_TRIANGLE, - - - cdef struct OptixBuildInputCurveArray: - OptixPrimitiveType curveType - unsigned int numPrimitives - const CUdeviceptr * vertexBuffers - unsigned int numVertices - unsigned int vertexStrideInBytes - const CUdeviceptr * widthBuffers - unsigned int widthStrideInBytes - const CUdeviceptr * normalBuffers - unsigned int normalStrideInBytes - CUdeviceptr indexBuffer - unsigned int indexStrideInBytes - unsigned int flag - unsigned int primitiveIndexOffset + IF _OPTIX_VERSION_MAJOR == 7 and _OPTIX_VERSION_MINOR > 3: # switch to new instance flags + cdef enum OptixPrimitiveType: + OPTIX_PRIMITIVE_TYPE_CUSTOM, + OPTIX_PRIMITIVE_TYPE_ROUND_QUADRATIC_BSPLINE, + OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE, + OPTIX_PRIMITIVE_TYPE_ROUND_LINEAR, + OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM, + OPTIX_PRIMITIVE_TYPE_TRIANGLE, + + cdef enum OptixCurveEndcapFlags: + OPTIX_CURVE_ENDCAP_DEFAULT, + OPTIX_CURVE_ENDCAP_ON + + cdef struct OptixBuildInputCurveArray: + OptixPrimitiveType curveType + unsigned int numPrimitives + const CUdeviceptr * vertexBuffers + unsigned int numVertices + unsigned int vertexStrideInBytes + const CUdeviceptr * widthBuffers + unsigned int widthStrideInBytes + const CUdeviceptr * normalBuffers + unsigned int normalStrideInBytes + CUdeviceptr indexBuffer + unsigned int indexStrideInBytes + unsigned int flag + unsigned int primitiveIndexOffset + unsigned int endcapFlags + ELSE: + cdef enum OptixPrimitiveType: + OPTIX_PRIMITIVE_TYPE_CUSTOM, + OPTIX_PRIMITIVE_TYPE_ROUND_QUADRATIC_BSPLINE, + OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE, + OPTIX_PRIMITIVE_TYPE_ROUND_LINEAR, + OPTIX_PRIMITIVE_TYPE_TRIANGLE, + + cdef struct OptixBuildInputCurveArray: + OptixPrimitiveType curveType + unsigned int numPrimitives + const CUdeviceptr * vertexBuffers + unsigned int numVertices + unsigned int vertexStrideInBytes + const CUdeviceptr * widthBuffers + unsigned int widthStrideInBytes + const CUdeviceptr * normalBuffers + unsigned int normalStrideInBytes + CUdeviceptr indexBuffer + unsigned int indexStrideInBytes + unsigned int flag + unsigned int primitiveIndexOffset cdef enum OptixIndicesFormat: OPTIX_INDICES_FORMAT_NONE, @@ -167,13 +193,14 @@ cdef extern from "optix.h" nogil: OPTIX_TRAVERSABLE_TYPE_MATRIX_MOTION_TRANSFORM, OPTIX_TRAVERSABLE_TYPE_SRT_MOTION_TRANSFORM, + cdef enum OptixInstanceFlags: OPTIX_INSTANCE_FLAG_NONE OPTIX_INSTANCE_FLAG_DISABLE_TRIANGLE_FACE_CULLING OPTIX_INSTANCE_FLAG_FLIP_TRIANGLE_FACING OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT OPTIX_INSTANCE_FLAG_ENFORCE_ANYHIT - OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM + cdef struct OptixInstance: float transform [12] @@ -276,12 +303,12 @@ cdef class BuildInputCurveArray(BuildInputArray): cdef class Instance(OptixObject): cdef OptixInstance instance - cdef AccelerationStructure traversable + cdef AccelerationStructure _traversable cdef class BuildInputInstanceArray(BuildInputArray): cdef OptixBuildInputInstanceArray build_input - cdef public object instances + cdef object instances cdef object _d_instances diff --git a/optix/build.pyx b/optix/build.pyx index fb42afc..db2c9c0 100644 --- a/optix/build.pyx +++ b/optix/build.pyx @@ -31,15 +31,48 @@ class GeometryFlags(IntEnum): REQUIRE_SINGLE_ANYHIT_CALL = OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL -class PrimitiveType(IntEnum): +class BuildFlags(IntFlag): """ - Wraps the OptixPrimitiveType enum. + Wraps the OptixBuildFlags enum """ - CUSTOM = OPTIX_PRIMITIVE_TYPE_CUSTOM, - ROUND_QUADRATIC_BSPLINE = OPTIX_PRIMITIVE_TYPE_ROUND_QUADRATIC_BSPLINE, - ROUND_CUBIC_BSPLINE = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE, - ROUND_LINEAR = OPTIX_PRIMITIVE_TYPE_ROUND_LINEAR, - TRIANGLE = OPTIX_PRIMITIVE_TYPE_TRIANGLE + NONE = OPTIX_BUILD_FLAG_NONE, + ALLOW_UPDATE = OPTIX_BUILD_FLAG_ALLOW_UPDATE, + ALLOW_COMPACTION = OPTIX_BUILD_FLAG_ALLOW_COMPACTION, + PREFER_FAST_TRACE = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE, + PREFER_FAST_BUILD = OPTIX_BUILD_FLAG_PREFER_FAST_BUILD, + ALLOW_RANDOM_VERTEX_ACCESS = OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS, + ALLOW_RANDOM_INSTANCE_ACCESS = OPTIX_BUILD_FLAG_ALLOW_RANDOM_INSTANCE_ACCESS, + + +IF _OPTIX_VERSION > 70300: # switch to new instance flags + class PrimitiveType(IntEnum): + """ + Wraps the OptixPrimitiveType enum. + """ + CUSTOM = OPTIX_PRIMITIVE_TYPE_CUSTOM, + ROUND_QUADRATIC_BSPLINE = OPTIX_PRIMITIVE_TYPE_ROUND_QUADRATIC_BSPLINE, + ROUND_CUBIC_BSPLINE = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE, + ROUND_LINEAR = OPTIX_PRIMITIVE_TYPE_ROUND_LINEAR, + ROUND_CATMULLROM = OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM, + TRIANGLE = OPTIX_PRIMITIVE_TYPE_TRIANGLE + + class CurveEndcapFlags(IntEnum): + DEFAULT = OPTIX_CURVE_ENDCAP_DEFAULT, + ON = OPTIX_CURVE_ENDCAP_ON +ELSE: + class CurveEndcapFlags(IntEnum): + DEFAULT = 0 # only for interface. Ignored for Optix versions below 7.4 + + class PrimitiveType(IntEnum): + """ + Wraps the OptixPrimitiveType enum. + """ + CUSTOM = OPTIX_PRIMITIVE_TYPE_CUSTOM, + ROUND_QUADRATIC_BSPLINE = OPTIX_PRIMITIVE_TYPE_ROUND_QUADRATIC_BSPLINE, + ROUND_CUBIC_BSPLINE = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE, + ROUND_LINEAR = OPTIX_PRIMITIVE_TYPE_ROUND_LINEAR, + TRIANGLE = OPTIX_PRIMITIVE_TYPE_TRIANGLE + class InstanceFlags(IntFlag): """ @@ -50,7 +83,6 @@ class InstanceFlags(IntFlag): FLIP_TRIANGLE_FACING = OPTIX_INSTANCE_FLAG_FLIP_TRIANGLE_FACING, DISABLE_ANYHIT = OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT, ENFORCE_ANYHIT = OPTIX_INSTANCE_FLAG_ENFORCE_ANYHIT, - DISABLE_TRANSFORM = OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM cdef class BuildInputArray(OptixObject): @@ -325,7 +357,8 @@ cdef class BuildInputCurveArray(BuildInputArray): index_buffer, normal_buffers = None, flags=None, - primitive_index_offset=0): + primitive_index_offset=0, + endcap_flags=CurveEndcapFlags.DEFAULT): self.build_input.curveType = curve_type.value self._d_vertex_buffers = [cp.asarray(vb, np.float32) for vb in ensure_iterable(vertex_buffers)] @@ -385,6 +418,9 @@ cdef class BuildInputCurveArray(BuildInputArray): self.build_input.primitiveIndexOffset = primitive_index_offset + IF _OPTIX_VERSION > 70300: + self.build_input.endcapFlags = endcap_flags # only for Optix versions >= 7.4 + cdef void prepare_build_input(self, OptixBuildInput * build_input) except *: build_input.type = OPTIX_BUILD_INPUT_TYPE_CURVES build_input.curveArray = self.build_input @@ -423,11 +459,9 @@ cdef class Instance(OptixObject): visibility_mask=None): if transform is None: transform = np.eye(3, 4, dtype=np.float32) - transform = np.ascontiguousarray(np.asarray(transform, dtype=np.float32).reshape(3,4)) - cdef float[:, ::1] c_transform = transform - memcpy(&self.instance.transform, &c_transform[0, 0], sizeof(float) * 12) + self.transform = transform self.traversable = traversable - self.instance.traversableHandle = self.traversable.handle + self.instance.instanceId = instance_id self.instance.flags = flags.value self.instance.sbtOffset = sbt_offset @@ -438,8 +472,14 @@ cdef class Instance(OptixObject): raise ValueError(f"Too many entries in visibility mask. Got {visibility_mask.bit_length()} but supported are only {max_visibility_mask_bits}") self.instance.visibilityMask = visibility_mask - def update_traversable(self, AccelerationStructure traversable): - self.traversable = traversable + @property + def traversable(self): + return self._traversable + + @traversable.setter + def traversable(self, AccelerationStructure traversable): + self._traversable = traversable + # update the handle as well self.instance.traversableHandle = self.traversable.handle def __deepcopy__(self, memodict={}): @@ -452,6 +492,17 @@ cdef class Instance(OptixObject): return result + @property + def transform(self): + cdef float [:] transform_view = self.instance.transform + return np.asarray(transform_view).reshape(3,4) + + @transform.setter + def transform(self, tf): + transform = np.ascontiguousarray(np.asarray(tf, dtype=np.float32).reshape(3,4)) + cdef float[:, ::1] c_transform = transform + memcpy(&self.instance.transform, &c_transform[0, 0], sizeof(float) * 12) + cdef class BuildInputInstanceArray(BuildInputArray): """ @@ -485,13 +536,48 @@ cdef class BuildInputInstanceArray(BuildInputArray): cdef size_t num_elements(self): return self.build_input.numInstances - + + def __getitem__(self, index): + return self.instances[index] + + def __setitem__(self, index, instance): + if not isinstance(instance, Instance): + raise TypeError("Only instance objects.") + self.instances[index] = instance + self.update_instance(index) + def update_instance(self, index): - src_ptr = &(((self.instances[index])).instance) + """ + Update the instance at index in gpu memory from the instances list in host memory. + + Parameters + ---------- + index: int + The index to update + """ + # update the value in the cuda buffer + src_ptr = &((self.instances[index])).instance dst_ptr = self._d_instances.ptr + index*sizeof(OptixInstance) cp.cuda.runtime.memcpy(dst_ptr, src_ptr, sizeof(OptixInstance), cp.cuda.runtime.memcpyHostToDevice) - def get_transform_view(self, index): + # TODO: still thinking of a better way to acomplish the transform access in an OO way. + def view_instance_transform(self, index): + """ + Obtain a view of the transform parameter at index in gpu memory as a cupy array for direct modification + + Parameters + ---------- + index: int + The index of the transform this view should point to + + Returns + ------- + transform_view: cp.ndarray of shape (3, 4) + A view of the transform matrix + + """ + if index < 0 or index >=len(self.instances): + raise IndexError(f"Invalid index {index} for list of length {len(self.instances)}.") device_ptr = cp.cuda.MemoryPointer(mem=self._d_instances.mem, offset=index*sizeof(OptixInstance)) return cp.ndarray(shape=(3,4), dtype=np.float32, memptr=device_ptr) @@ -801,6 +887,11 @@ cdef class AccelerationStructure(OptixContextObject): def _repr_details(self): return f"{self._num_elements} elements in {self._buffer_sizes.outputSizeInBytes} bytes" + @property + def build_flags(self): + return BuildFlags(self.build_flags) + + diff --git a/optix/context.pxd b/optix/context.pxd index 775d4d2..d038865 100644 --- a/optix/context.pxd +++ b/optix/context.pxd @@ -59,4 +59,4 @@ cdef class DeviceContext(OptixObject): cdef class OptixContextObject(OptixObject): - cdef DeviceContext context \ No newline at end of file + cdef public DeviceContext context \ No newline at end of file diff --git a/optix/context.pyx b/optix/context.pyx index 6d6fbda..0049179 100644 --- a/optix/context.pyx +++ b/optix/context.pyx @@ -6,6 +6,11 @@ import cupy as cp optix_init() +OPTIX_VERSION = _OPTIX_VERSION + +def optix_version(): + return _OPTIX_VERSION_MAJOR, _OPTIX_VERSION_MINOR, _OPTIX_VERSION_MICRO + cdef class _LogWrapper: def __init__(self, log_function): self.log_function = log_function @@ -254,3 +259,4 @@ cdef class OptixContextObject(OptixObject): """ def __init__(self, DeviceContext context): self.context = context + diff --git a/optix/module.pxd b/optix/module.pxd index 45509d2..94bb927 100644 --- a/optix/module.pxd +++ b/optix/module.pxd @@ -2,12 +2,16 @@ from .base cimport OptixObject from .common cimport OptixResult, OptixModule from .context cimport OptixDeviceContext, OptixContextObject from .build cimport OptixPrimitiveType -from .pipeline cimport OptixPipelineCompileOptions +from .pipeline cimport OptixPipelineCompileOptions, OptixCompileDebugLevel +from libcpp.vector cimport vector cdef extern from "optix_includes.h" nogil: + cdef OptixResult optixInit() cdef size_t OPTIX_COMPILE_DEFAULT_MAX_REGISTER_COUNT + cdef size_t OPTIX_COMPILE_DEFAULT_MAX_PAYLOAD_VALUE_COUNT + cdef enum OptixCompileOptimizationLevel: OPTIX_COMPILE_OPTIMIZATION_DEFAULT, @@ -17,13 +21,6 @@ cdef extern from "optix_includes.h" nogil: OPTIX_COMPILE_OPTIMIZATION_LEVEL_3 - cdef enum OptixCompileDebugLevel: - OPTIX_COMPILE_DEBUG_LEVEL_DEFAULT, - OPTIX_COMPILE_DEBUG_LEVEL_NONE, - OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO, - OPTIX_COMPILE_DEBUG_LEVEL_FULL - - cdef struct OptixModuleCompileBoundValueEntry: size_t pipelineParamOffsetInBytes size_t sizeInBytes @@ -31,17 +28,60 @@ cdef extern from "optix_includes.h" nogil: const char* annotation - cdef struct OptixModuleCompileOptions: - int maxRegisterCount - OptixCompileOptimizationLevel optLevel - OptixCompileDebugLevel debugLevel - const OptixModuleCompileBoundValueEntry* boundValues - unsigned int numBoundValues - - - cdef struct OptixBuiltinISOptions: - OptixPrimitiveType builtinISModuleType - int usesMotionBlur + IF _OPTIX_VERSION > 70300: # switch to new version + cdef size_t OPTIX_COMPILE_DEFAULT_MAX_PAYLOAD_TYPE_COUNT + + cdef enum OptixPayloadSemantics: + OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_NONE, + OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_READ, + OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_WRITE, + OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_READ_WRITE, + OPTIX_PAYLOAD_SEMANTICS_CH_NONE, + OPTIX_PAYLOAD_SEMANTICS_CH_READ, + OPTIX_PAYLOAD_SEMANTICS_CH_WRITE, + OPTIX_PAYLOAD_SEMANTICS_CH_READ_WRITE, + OPTIX_PAYLOAD_SEMANTICS_MS_NONE, + OPTIX_PAYLOAD_SEMANTICS_MS_READ, + OPTIX_PAYLOAD_SEMANTICS_MS_WRITE, + OPTIX_PAYLOAD_SEMANTICS_MS_READ_WRITE, + OPTIX_PAYLOAD_SEMANTICS_AH_NONE, + OPTIX_PAYLOAD_SEMANTICS_AH_READ, + OPTIX_PAYLOAD_SEMANTICS_AH_WRITE, + OPTIX_PAYLOAD_SEMANTICS_AH_READ_WRITE, + OPTIX_PAYLOAD_SEMANTICS_IS_NONE, + OPTIX_PAYLOAD_SEMANTICS_IS_READ, + OPTIX_PAYLOAD_SEMANTICS_IS_WRITE, + OPTIX_PAYLOAD_SEMANTICS_IS_READ_WRITE, + + cdef struct OptixPayloadType: + unsigned int numPayloadValues + const unsigned int* payloadSemantics + + cdef struct OptixModuleCompileOptions: + int maxRegisterCount + OptixCompileOptimizationLevel optLevel + OptixCompileDebugLevel debugLevel + const OptixModuleCompileBoundValueEntry* boundValues + unsigned int numBoundValues + unsigned int numPayloadTypes + OptixPayloadType* payloadTypes + + cdef struct OptixBuiltinISOptions: + OptixPrimitiveType builtinISModuleType + int usesMotionBlur + unsigned int buildFlags + unsigned int curveEndcapFlags + ELSE: + cdef struct OptixModuleCompileOptions: + int maxRegisterCount + OptixCompileOptimizationLevel optLevel + OptixCompileDebugLevel debugLevel + const OptixModuleCompileBoundValueEntry* boundValues + unsigned int numBoundValues + + cdef struct OptixBuiltinISOptions: + OptixPrimitiveType builtinISModuleType + int usesMotionBlur OptixResult optixModuleCreateFromPTX(OptixDeviceContext context, @@ -64,13 +104,52 @@ cdef extern from "optix_includes.h" nogil: OptixModule *builtinModule) -cdef class ModuleCompileOptions(OptixObject): - cdef OptixModuleCompileOptions compile_options + IF _OPTIX_VERSION > 70300: # switch to new version + ctypedef struct OptixTask: + pass + cdef enum OptixModuleCompileState: + OPTIX_MODULE_COMPILE_STATE_NOT_STARTED + OPTIX_MODULE_COMPILE_STATE_STARTED + OPTIX_MODULE_COMPILE_STATE_IMPENDING_FAILURE + OPTIX_MODULE_COMPILE_STATE_FAILED + OPTIX_MODULE_COMPILE_STATE_COMPLETED + + cdef OptixResult optixModuleGetCompilationState(OptixModule module, + OptixModuleCompileState * state) + + cdef OptixResult optixModuleCreateFromPTXWithTasks(OptixDeviceContext context, + const OptixModuleCompileOptions * moduleCompileOptions, + const OptixPipelineCompileOptions * pipelineCompileOptions, + const char * PTX, + size_t PTXsize, + char * logString, + size_t * logStringSize, + OptixModule * module, + OptixTask * firstTask) + + cdef OptixResult optixTaskExecute(OptixTask task, + OptixTask * additionalTasks, + unsigned int maxNumAdditionalTasks, + unsigned int *numAdditionalTasksCreated) + + +cdef class BuiltinISOptions(OptixObject): + cdef OptixBuiltinISOptions options cdef class Module(OptixContextObject): cdef OptixModule module cdef list _compile_flags - #cpdef size_t c_obj(self) - +IF _OPTIX_VERSION > 70300: # switch to new version + cdef class ModuleCompileOptions(OptixObject): + cdef OptixModuleCompileOptions compile_options + cdef vector[OptixPayloadType] payload_types + cdef vector[vector[unsigned int]] payload_values # WTF! + + cdef class Task(OptixObject): + cdef OptixTask task + cdef Module module +ELSE: + cdef class ModuleCompileOptions(OptixObject): + cdef OptixModuleCompileOptions compile_options \ No newline at end of file diff --git a/optix/module.pyx b/optix/module.pyx index 7dd3326..e991838 100644 --- a/optix/module.pyx +++ b/optix/module.pyx @@ -1,12 +1,16 @@ # distutils: language = c++ -from enum import IntEnum +from enum import IntEnum, IntFlag import os from .path_utility import get_cuda_include_path, get_optix_include_path from .common cimport optix_check_return, optix_init from .context cimport DeviceContext from .pipeline cimport PipelineCompileOptions from .pipeline import CompileDebugLevel +from .build import PrimitiveType, BuildFlags, CurveEndcapFlags +from .common import ensure_iterable +from libc.stdint cimport uintptr_t +from libcpp.vector cimport vector optix_init() @@ -20,22 +24,134 @@ class CompileOptimizationLevel(IntEnum): LEVEL_2 = OPTIX_COMPILE_OPTIMIZATION_LEVEL_2, LEVEL_3 = OPTIX_COMPILE_OPTIMIZATION_LEVEL_3, +IF _OPTIX_VERSION > 70300: + class PayloadSemantics(IntFlag): + """ + Wraps the PayloadSemantics enum. + """ + + DEFAULT = OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_READ_WRITE | OPTIX_PAYLOAD_SEMANTICS_CH_READ_WRITE | OPTIX_PAYLOAD_SEMANTICS_MS_READ_WRITE | OPTIX_PAYLOAD_SEMANTICS_AH_READ_WRITE | OPTIX_PAYLOAD_SEMANTICS_IS_READ_WRITE # allow everything as default + TRACE_CALLER_NONE = OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_NONE, + TRACE_CALLER_READ = OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_READ, + TRACE_CALLER_WRITE = OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_WRITE, + TRACE_CALLER_READ_WRITE = OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_READ_WRITE, + CH_NONE = OPTIX_PAYLOAD_SEMANTICS_CH_NONE, + CH_READ = OPTIX_PAYLOAD_SEMANTICS_CH_READ, + CH_WRITE = OPTIX_PAYLOAD_SEMANTICS_CH_WRITE, + CH_READ_WRITE = OPTIX_PAYLOAD_SEMANTICS_CH_READ_WRITE, + MS_NONE = OPTIX_PAYLOAD_SEMANTICS_MS_NONE, + MS_READ = OPTIX_PAYLOAD_SEMANTICS_MS_READ, + MS_WRITE = OPTIX_PAYLOAD_SEMANTICS_MS_WRITE, + MS_READ_WRITE = OPTIX_PAYLOAD_SEMANTICS_MS_READ_WRITE, + AH_NONE = OPTIX_PAYLOAD_SEMANTICS_AH_NONE, + AH_READ = OPTIX_PAYLOAD_SEMANTICS_AH_READ, + AH_WRITE = OPTIX_PAYLOAD_SEMANTICS_AH_WRITE, + AH_READ_WRITE = OPTIX_PAYLOAD_SEMANTICS_AH_READ_WRITE, + IS_NONE = OPTIX_PAYLOAD_SEMANTICS_IS_NONE, + IS_READ = OPTIX_PAYLOAD_SEMANTICS_IS_READ, + IS_WRITE = OPTIX_PAYLOAD_SEMANTICS_IS_WRITE, + IS_READ_WRITE = OPTIX_PAYLOAD_SEMANTICS_IS_READ_WRITE + + class ModuleCompileState(IntFlag): + NOT_STARTED = OPTIX_MODULE_COMPILE_STATE_NOT_STARTED, + STARTED = OPTIX_MODULE_COMPILE_STATE_STARTED, + IMPENDING_FAILURE = OPTIX_MODULE_COMPILE_STATE_IMPENDING_FAILURE, + FAILED = OPTIX_MODULE_COMPILE_STATE_FAILED, + COMPLETED = OPTIX_MODULE_COMPILE_STATE_COMPLETED, + + + cdef class Task(OptixObject): + """ + Class to represent a parallel Task to compile an OptiX module. + A Task can be executed in parallel by e.g. a thread pool to handle lots of module compilations concurrently. + It is only valid as long as the corresponding module exists, therefore in this wrapper a reference to the module + if stored. + + Note, that a Task is not supposed to be created by the user directly, but provided by the create_as_task method + of the Module class. + + Parameters + ---------- + module: Module + The module this Task belongs to. + """ + def __init__(self, Module module): + self.module = module + self.task = NULL + + def execute(self, max_additional_tasks=2): + """ + Execute the Task. If more parallel work is found, it will be returned as a new list of Task objects. + The list has a maximum size of max_additional_tasks. + + Node, that each Task can only be executed by a single thread. + + Parameters + ---------- + max_additional_tasks: int + The maximum number of new Tasks to create from this one + + Returns + ------- + tasks: List[Task] + The newly created tasks if any + """ + cdef vector[OptixTask] additional_tasks + cdef unsigned int i + cdef unsigned int additional_tasks_created = 0 + cdef unsigned int max_num_additional_tasks = max_additional_tasks + + with nogil: + additional_tasks.resize(max_num_additional_tasks) + optix_check_return(optixTaskExecute(self.task, additional_tasks.data(), max_num_additional_tasks, &additional_tasks_created)) + + cdef list tasks = [] + for i in range(additional_tasks_created): + t = Task(self.module) + t.task = additional_tasks[i] + tasks.append(t) + return tasks + cdef class ModuleCompileOptions(OptixObject): """ Wraps the OptixModuleCompileOptions struct. """ DEFAULT_MAX_REGISTER_COUNT = OPTIX_COMPILE_DEFAULT_MAX_REGISTER_COUNT + DEFAULT_MAX_PAYLOAD_TYPE_COUNT = OPTIX_COMPILE_DEFAULT_MAX_PAYLOAD_TYPE_COUNT + DEFAULT_MAX_PAYLOAD_VALUE_COUNT = OPTIX_COMPILE_DEFAULT_MAX_PAYLOAD_VALUE_COUNT + def __init__(self, - max_register_count=OPTIX_COMPILE_DEFAULT_MAX_REGISTER_COUNT, + max_register_count=DEFAULT_MAX_REGISTER_COUNT, opt_level=CompileOptimizationLevel.DEFAULT, - debug_level= CompileDebugLevel.DEFAULT): #TODO add bound values + debug_level= CompileDebugLevel.DEFAULT, + payload_types=None): #TODO add bound values self.compile_options.maxRegisterCount = max_register_count self.compile_options.optLevel = opt_level.value self.compile_options.debugLevel = debug_level.value self.compile_options.numBoundValues = 0 self.compile_options.boundValues = NULL # currently not supported + IF _OPTIX_VERSION > 70300: + if payload_types is None: + self.compile_options.numPayloadTypes = 0 + self.compile_options.payloadTypes = NULL + else: + # set the payload types for these compile options (this is horrible, i know ;)) + payload_types = [ensure_iterable(pt) for pt in ensure_iterable(payload_types)] # list of lists + self.payload_types.resize(len(payload_types)) # the number of different payload types + self.payload_values.resize(self.payload_types.size()) # a vector of semantics for each payload type + self.compile_options.numPayloadTypes = self.payload_types.size() + for i, payload_values in enumerate(payload_types): + self.payload_types[i].numPayloadValues = len(payload_values) + self.payload_values[i].resize(self.payload_types[i].numPayloadValues) + for j, payload_semantics in enumerate(payload_values): + self.payload_values[i][j] = payload_semantics.value + self.payload_types[i].payloadSemantics = self.payload_values[i].data() + self.compile_options.payloadTypes = self.payload_types.data() + + + @property def max_register_count(self): return self.compile_options.maxRegisterCount @@ -74,12 +190,29 @@ cdef _is_ptx(src): if not isinstance(src, (bytes, bytearray)): return False for line in src.splitlines(): - print(line) if len(line) == 0 or line.startswith(b'//') or line.startswith(b'\n'): continue return line.startswith(b'.version') +cdef class BuiltinISOptions(OptixObject): + def __init__(self, + primitive_type, + build_flags=None, + uses_motion_blur=False, + curve_endcap_flags=None): + self.options.builtinISModuleType = primitive_type.value + self.options.usesMotionBlur = uses_motion_blur + + IF _OPTIX_VERSION > 70300: + if build_flags is None: + raise ValueError("Parameter build_flags is required for OptiX versions >= 7.4.") + self.options.buildFlags = build_flags.value + if curve_endcap_flags is None: + curve_endcap_flags = CurveEndcapFlags.DEFAULT + self.options.curveEndcapFlags = curve_endcap_flags.value + + cdef class Module(OptixContextObject): """ Class representing a Optix Cuda program that will be called during pipeline execution. Wraps the OptixModule struct. @@ -107,26 +240,152 @@ cdef class Module(OptixContextObject): compile_flags=_nvrtc_compile_flags_default, program_name=None): super().__init__(context) + cdef const char * c_ptx + cdef unsigned int pipeline_payload_values, i self._compile_flags = list(compile_flags) - if not _is_ptx(src): - ptx = self._compile_cuda_ptx(src, name=program_name) - else: - ptx = src - cdef const char* c_ptx = ptx - optixModuleCreateFromPTX(self.context.c_context, - &module_compile_options.compile_options, - &pipeline_compile_options.compile_options, - c_ptx, - len(ptx) + 1, - NULL, - NULL, - &self.module) - - def _compile_cuda_ptx(self, src, name=None, **kwargs): + + if src is not None: + ptx = self.compile_cuda_ptx(src, compile_flags, name=program_name) + c_ptx = ptx + #IF _OPTIX_VERSION > 70300: + # self._check_payload_values(module_compile_options, pipeline_compile_options) + + optix_check_return(optixModuleCreateFromPTX(self.context.c_context, + &module_compile_options.compile_options, + &pipeline_compile_options.compile_options, + c_ptx, + len(ptx) + 1, + NULL, + NULL, + &self.module)) + + def __dealloc__(self): + if self.module != 0: + optix_check_return(optixModuleDestroy(self.module)) + + IF _OPTIX_VERSION > 70300: + @property + def compile_state(self): + cdef OptixModuleCompileState state + with nogil: + optix_check_return(optixModuleGetCompilationState(self.module, &state)) + return ModuleCompileState(state) + + # @staticmethod + # def _check_payload_values(ModuleCompileOptions module_compile_options, PipelineCompileOptions pipeline_compile_options): + # IF _OPTIX_VERSION > 70300: + # # check if the payload values match between the module and pipeline compile options + # pipeline_payload_values = pipeline_compile_options.compile_options.numPayloadValues + # if module_compile_options.payload_types.size() > 0: + # for i in range(module_compile_options.compile_options.numPayloadTypes): + # if pipeline_payload_values != module_compile_options.compile_options.payloadTypes[ + # i].numPayloadValues: + # raise ValueError( + # f"number of payload values in module compile options at index {i} does not match the num_payload_values in the pipeline_compile_options.") + # return + + @classmethod + def create_as_task(cls, + DeviceContext context, + src, + ModuleCompileOptions module_compile_options = ModuleCompileOptions(), + PipelineCompileOptions pipeline_compile_options = PipelineCompileOptions(), + compile_flags=_nvrtc_compile_flags_default, + program_name=None): + """ + Create a module associated with a parallel task. + The function will perform just enough work to instantiate the module. + Everything else will be done by the task on request. + + Parameters + ---------- + context: DeviceContext + The current OptiX context + src: str + Either a string containing the module's source code or PTX or the path to a file containing it. + module_compile_options: ModuleCompileOptions + Compile options of this module + pipeline_compile_options: PipelineCompileOptions + Compile options of the pipeline the module will be used in + compile_flags: list[str], optional + List of compiler flags to use. If omitted, the default flags are used. + program_name: str, optional + The name the program is given internally. Of omitted either the filename is used if given or a default name is used. + + Returns + ------- + + module: Module + The created module + task: Task + The task associated with this module + + """ + cdef Module module = Module(context, None, compile_flags=compile_flags) + cdef const char * c_ptx + cdef unsigned int pipeline_payload_values, i + #cls._check_payload_values(module_compile_options, pipeline_compile_options) + + ptx = cls.compile_cuda_ptx(src, compile_flags, name=program_name) + c_ptx = ptx + + cdef Task task = Task(module) + + optix_check_return(optixModuleCreateFromPTXWithTasks(context.c_context, + &module_compile_options.compile_options, + &pipeline_compile_options.compile_options, + c_ptx, + len(ptx) + 1, + NULL, + NULL, + &module.module, + &task.task)) + return module, task + + + @classmethod + def create_builtin_is_module(cls, + DeviceContext context, + ModuleCompileOptions module_compile_options, + PipelineCompileOptions pipeline_compile_options, + BuiltinISOptions builtin_is_options): + """ + Return a module containing the builtin intersection program for the given primitive + + Parameters + ---------- + context: DeviceContext + The current optix context + module_compile_options: ModuleCompileOptions + The compile options for the module + pipeline_compile_options: PipelineCompileOptions + The compile options of the pipeline + builtin_is_options: BuiltinISOptions + Special options for the intersection program like the endcap type for curves + + Returns + ------- + module: Module + The Module containing the intersection program + """ + cdef Module module = cls(context, None) + + IF _OPTIX_VERSION > 70300: + cls._check_payload_values(module_compile_options, pipeline_compile_options) + optix_check_return(optixBuiltinISModuleGet(context.c_context, + &module_compile_options.compile_options, + &pipeline_compile_options.compile_options, + &builtin_is_options.options, &module.module)) + return module + + @staticmethod + def compile_cuda_ptx(src, compile_flags=_nvrtc_compile_flags_default, name=None, **kwargs): if os.path.exists(src): name = src with open(src, 'r') as f: src = f.read() + if _is_ptx(src): + return src elif name is None: name = "default_program" @@ -134,8 +393,7 @@ cdef class Module(OptixContextObject): # TODO is there a public API for that? from cupy.cuda.compiler import _NVRTCProgram as NVRTCProgram prog = NVRTCProgram(src, name, **kwargs) - flags = self._compile_flags - + flags = list(compile_flags) # get cuda and optix_include_paths cuda_include_path = get_cuda_include_path() optix_include_path = get_optix_include_path() diff --git a/optix/pipeline.pxd b/optix/pipeline.pxd index ceaac1d..0806015 100644 --- a/optix/pipeline.pxd +++ b/optix/pipeline.pxd @@ -5,6 +5,7 @@ from .program_group cimport ProgramGroup, OptixStackSizes from .shader_binding_table cimport OptixShaderBindingTable cdef extern from "optix_includes.h" nogil: + cdef size_t OPTIX_COMPILE_DEFAULT_MAX_PAYLOAD_VALUE_COUNT # pipeline functions and structs ctypedef struct OptixPipeline: @@ -17,26 +18,40 @@ cdef extern from "optix_includes.h" nogil: OPTIX_EXCEPTION_FLAG_USER, OPTIX_EXCEPTION_FLAG_DEBUG - - cdef enum OptixCompileDebugLevel: - OPTIX_COMPILE_DEBUG_LEVEL_DEFAULT, - OPTIX_COMPILE_DEBUG_LEVEL_NONE, - OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO, - OPTIX_COMPILE_DEBUG_LEVEL_FULL - - cdef enum OptixTraversableGraphFlags: OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY, OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_GAS, OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_LEVEL_INSTANCING - cdef enum OptixPrimitiveTypeFlags: - OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM, - OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_QUADRATIC_BSPLINE, - OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE, - OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_LINEAR, - OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE, + IF _OPTIX_VERSION > 70300: # switch to new instance flags + cdef enum OptixCompileDebugLevel: + OPTIX_COMPILE_DEBUG_LEVEL_DEFAULT, + OPTIX_COMPILE_DEBUG_LEVEL_NONE, + OPTIX_COMPILE_DEBUG_LEVEL_MINIMAL, + OPTIX_COMPILE_DEBUG_LEVEL_MODERATE, + OPTIX_COMPILE_DEBUG_LEVEL_FULL + + cdef enum OptixPrimitiveTypeFlags: + OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM, + OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_QUADRATIC_BSPLINE, + OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE, + OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_LINEAR, + OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CATMULLROM, + OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE, + ELSE: + cdef enum OptixCompileDebugLevel: + OPTIX_COMPILE_DEBUG_LEVEL_DEFAULT, + OPTIX_COMPILE_DEBUG_LEVEL_NONE, + OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO, + OPTIX_COMPILE_DEBUG_LEVEL_FULL + + cdef enum OptixPrimitiveTypeFlags: + OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM, + OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_QUADRATIC_BSPLINE, + OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE, + OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_LINEAR, + OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE, cdef struct OptixPipelineCompileOptions: diff --git a/optix/pipeline.pyx b/optix/pipeline.pyx index b3a3142..c471b0d 100644 --- a/optix/pipeline.pyx +++ b/optix/pipeline.pyx @@ -16,16 +16,6 @@ from .shader_binding_table cimport ShaderBindingTable optix_init() -class CompileDebugLevel(IntEnum): - """ - Wraps the OptixCompileDebugLevel enum. - """ - DEFAULT = OPTIX_COMPILE_DEBUG_LEVEL_DEFAULT, - NONE = OPTIX_COMPILE_DEBUG_LEVEL_NONE, - LINEINFO = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO, - FULL = OPTIX_COMPILE_DEBUG_LEVEL_FULL - - class ExceptionFlags(IntFlag): """ Wraps the OptixExceptionFlags enum. @@ -46,22 +36,56 @@ class TraversableGraphFlags(IntFlag): ALLOW_SINGLE_LEVEL_INSTANCING = OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_LEVEL_INSTANCING -class PrimitiveTypeFlags(IntFlag): - """ - Wraps the OptixPrimitiveTypeFlags enum. - """ - DEFAULT = 0, # corresponds to CUSTOM | TRIANGLE - CUSTOM = OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM, - ROUND_QUADRATIC_BSPLINE = OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_QUADRATIC_BSPLINE, - ROUND_CUBIC_BSPLINE = OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE, - ROUND_LINEAR = OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_LINEAR, - TRIANGLE = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE +IF _OPTIX_VERSION > 70300: # switch to new instance flags + class CompileDebugLevel(IntEnum): + """ + Wraps the OptixCompileDebugLevel enum. + """ + DEFAULT = OPTIX_COMPILE_DEBUG_LEVEL_DEFAULT, + NONE = OPTIX_COMPILE_DEBUG_LEVEL_NONE, + MINIMAL = OPTIX_COMPILE_DEBUG_LEVEL_MINIMAL, + MODERATE = OPTIX_COMPILE_DEBUG_LEVEL_MODERATE, + FULL = OPTIX_COMPILE_DEBUG_LEVEL_FULL + + class PrimitiveTypeFlags(IntFlag): + """ + Wraps the OptixPrimitiveTypeFlags enum. + """ + DEFAULT = 0, # corresponds to CUSTOM | TRIANGLE + CUSTOM = OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM, + ROUND_QUADRATIC_BSPLINE = OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_QUADRATIC_BSPLINE, + ROUND_CUBIC_BSPLINE = OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE, + ROUND_LINEAR = OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_LINEAR, + ROUND_CATMULLROM = OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CATMULLROM, + TRIANGLE = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE # fixes negative number error +ELSE: + class CompileDebugLevel(IntEnum): + """ + Wraps the OptixCompileDebugLevel enum. + """ + DEFAULT = OPTIX_COMPILE_DEBUG_LEVEL_DEFAULT, + NONE = OPTIX_COMPILE_DEBUG_LEVEL_NONE, + LINEINFO = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO, + FULL = OPTIX_COMPILE_DEBUG_LEVEL_FULL + + class PrimitiveTypeFlags(IntFlag): + """ + Wraps the OptixPrimitiveTypeFlags enum. + """ + DEFAULT = 0, # corresponds to CUSTOM | TRIANGLE + CUSTOM = OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM, + ROUND_QUADRATIC_BSPLINE = OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_QUADRATIC_BSPLINE, + ROUND_CUBIC_BSPLINE = OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE, + ROUND_LINEAR = OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_LINEAR, + TRIANGLE = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE cdef class PipelineCompileOptions(OptixObject): """ Class wrapping the OptixPipelineCompileOptions struct. """ + DEFAULT_MAX_PAYLOAD_VALUE_COUNT = OPTIX_COMPILE_DEFAULT_MAX_PAYLOAD_VALUE_COUNT + def __init__(self, uses_motion_blur=False, traversable_graph_flags = TraversableGraphFlags.ALLOW_ANY, @@ -70,13 +94,13 @@ cdef class PipelineCompileOptions(OptixObject): exception_flags = ExceptionFlags.NONE, pipeline_launch_params_variable_name = "params", uses_primitive_type_flags = PrimitiveTypeFlags.DEFAULT): - self.compile_options.usesMotionBlur = uses_motion_blur - self.compile_options.traversableGraphFlags = traversable_graph_flags.value - self.compile_options.numPayloadValues = num_payload_values - self.compile_options.numAttributeValues = num_attribute_values - self.compile_options.exceptionFlags = exception_flags.value + self.uses_motion_blur = uses_motion_blur + self.traversable_graph_flags = traversable_graph_flags + self.num_payload_values = num_payload_values + self.num_attribute_values = num_attribute_values + self.exception_flags = exception_flags self.pipeline_launch_params_variable_name = pipeline_launch_params_variable_name - self.compile_options.usesPrimitiveTypeFlags = (uses_primitive_type_flags.value) + self.uses_primitive_type_flags = uses_primitive_type_flags @property def uses_motion_blur(self): @@ -100,6 +124,8 @@ cdef class PipelineCompileOptions(OptixObject): @num_payload_values.setter def num_payload_values(self, num_payload_values): + if num_payload_values > self.DEFAULT_MAX_PAYLOAD_VALUE_COUNT: + raise ValueError(f"A maximum of {self.DEFAULT_MAX_PAYLOAD_VALUE_COUNT} payload values is allowed.") self.compile_options.numPayloadValues = num_payload_values @property diff --git a/setup.py b/setup.py index 6e1ed53..7d4fecb 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,7 @@ from setuptools import setup, Extension, find_packages from Cython.Build import cythonize - +import re +from pathlib import Path # standalone import of a module (https://stackoverflow.com/a/58423785) def import_module_from_path(path): @@ -29,8 +30,28 @@ def import_module_from_path(path): if cuda_include_path is None or optix_include_path is None: raise RuntimeError("Cuda or optix not found in the system") -extensions = [Extension("*", ["optix/*.pyx"], include_dirs=[cuda_include_path, optix_include_path])] -extensions = cythonize(extensions, language_level="3") +optix_version_re = re.compile(r'.*OPTIX_VERSION +(\d{5})') # get the optix version from the header +with open(Path(optix_include_path) / "optix.h", 'r') as f: + header_content = f.read() + optix_version = int(optix_version_re.search(header_content).group(1)) + +optix_version_major = optix_version // 10000 +optix_version_minor = (optix_version % 10000) // 100 +optix_version_micro = optix_version % 100 + +print(f"Found OptiX version {optix_version_major}.{optix_version_minor}.{optix_version_micro}.") + +cython_compile_env = { + '_OPTIX_VERSION': optix_version, + '_OPTIX_VERSION_MAJOR': optix_version_major, + '_OPTIX_VERSION_MINOR': optix_version_minor, + '_OPTIX_VERSION_MICRO': optix_version_micro +} + +extensions = [Extension("*", ["optix/*.pyx"], + include_dirs=[cuda_include_path, optix_include_path])] +extensions = cythonize(extensions, language_level="3", + compile_time_env=cython_compile_env) with open("README.md", "r", encoding="utf-8") as fh: long_description = fh.read()