Skip to content

Commit

Permalink
OptiXAggregate: take optional CUDA stream in buildBVH()
Browse files Browse the repository at this point in the history
  • Loading branch information
mmp committed Jul 24, 2021
1 parent 2842083 commit 4b74e6b
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 9 deletions.
26 changes: 18 additions & 8 deletions src/pbrt/gpu/aggregate.cpp
Expand Up @@ -90,7 +90,7 @@ extern const unsigned char PBRT_EMBEDDED_PTX[];
STAT_MEMORY_COUNTER("Memory/Acceleration structures", gpuBVHBytes);

OptixTraversableHandle OptiXAggregate::buildBVH(
const std::vector<OptixBuildInput> &buildInputs) const {
const std::vector<OptixBuildInput> &buildInputs, cudaStream_t buildStream) {
if (buildInputs.empty())
return {};

Expand Down Expand Up @@ -121,16 +121,18 @@ OptixTraversableHandle OptiXAggregate::buildBVH(
// Build.
OptixTraversableHandle traversableHandle{0};
OPTIX_CHECK(optixAccelBuild(
optixContext, cudaStream, &accelOptions, buildInputs.data(), buildInputs.size(),
optixContext, buildStream, &accelOptions, buildInputs.data(), buildInputs.size(),
CUdeviceptr(tempBuffer), blasBufferSizes.tempSizeInBytes,
CUdeviceptr(outputBuffer), blasBufferSizes.outputSizeInBytes, &traversableHandle,
&emitDesc, 1));

CUDA_CHECK(cudaFree(tempBuffer));

CUDA_CHECK(cudaStreamSynchronize(buildStream));
uint64_t compactedSize;
CUDA_CHECK(cudaMemcpy(&compactedSize, compactedSizePtr, sizeof(uint64_t),
cudaMemcpyDeviceToHost));
CUDA_CHECK(cudaMemcpyAsync(&compactedSize, compactedSizePtr, sizeof(uint64_t),
cudaMemcpyDeviceToHost, buildStream));
CUDA_CHECK(cudaStreamSynchronize(buildStream));

if (compactedSize >= blasBufferSizes.outputSizeInBytes) {
// No need to compact...
Expand All @@ -142,9 +144,10 @@ OptixTraversableHandle OptiXAggregate::buildBVH(
void *asBuffer;
CUDA_CHECK(cudaMalloc(&asBuffer, compactedSize));

OPTIX_CHECK(optixAccelCompact(optixContext, cudaStream, traversableHandle,
OPTIX_CHECK(optixAccelCompact(optixContext, buildStream, traversableHandle,
CUdeviceptr(asBuffer), compactedSize,
&traversableHandle));
CUDA_CHECK(cudaStreamSynchronize(buildStream));

CUDA_CHECK(cudaFree(outputBuffer));
}
Expand Down Expand Up @@ -1296,6 +1299,10 @@ OptiXAggregate::OptiXAggregate(

LOG_VERBOSE("Starting to create IASes for %d instance definitions",
scene.instanceDefinitions.size());
std::vector<cudaStream_t> threadCUDAStreams(MaxThreadIndex());
for (int i = 0; i < threadCUDAStreams.size(); ++i)
cudaStreamCreate(&threadCUDAStreams[i]);

std::unordered_map<std::string, Instance> instanceMap;
std::mutex instanceMapMutex;
ParallelFor(0, scene.instanceDefinitions.size(), [&](int64_t i) {
Expand All @@ -1319,7 +1326,7 @@ OptiXAggregate::OptiXAggregate(
alloc);
meshes.clear();
if (triangleBuildInput) {
inst.handles[0] = buildBVH(triangleBuildInput.optixInputs);
inst.handles[0] = buildBVH(triangleBuildInput.optixInputs, threadCUDAStreams[ThreadIndex]);
inst.sbtOffsets[0] = addHGRecords(triangleBuildInput);
inst.bounds = triangleBuildInput.bounds;
}
Expand All @@ -1329,7 +1336,7 @@ OptiXAggregate::OptiXAggregate(
hitPGRandomHitBilinearPatch, textures.floatTextures, namedMaterials,
materials, media, {}, alloc);
if (bilinearBuildInput) {
inst.handles[1] = buildBVH(bilinearBuildInput.optixInputs);
inst.handles[1] = buildBVH(bilinearBuildInput.optixInputs, threadCUDAStreams[ThreadIndex]);
inst.sbtOffsets[1] = addHGRecords(bilinearBuildInput);
inst.bounds = Union(inst.bounds, bilinearBuildInput.bounds);
}
Expand All @@ -1339,14 +1346,17 @@ OptiXAggregate::OptiXAggregate(
hitPGRandomHitQuadric, textures.floatTextures, namedMaterials,
materials, media, {}, alloc);
if (quadricBuildInput) {
inst.handles[2] = buildBVH(quadricBuildInput.optixInputs);
inst.handles[2] = buildBVH(quadricBuildInput.optixInputs, threadCUDAStreams[ThreadIndex]);
inst.sbtOffsets[2] = addHGRecords(quadricBuildInput);
inst.bounds = Union(inst.bounds, quadricBuildInput.bounds);
}

std::lock_guard<std::mutex> lock(instanceMapMutex);
instanceMap[def.first] = inst;
});

for (int i = 0; i < threadCUDAStreams.size(); ++i)
cudaStreamDestroy(threadCUDAStreams[i]);
LOG_VERBOSE("Finished creating IASes for instance definitions");

///////////////////////////////////////////////////////////////////////////
Expand Down
3 changes: 2 additions & 1 deletion src/pbrt/gpu/aggregate.h
Expand Up @@ -118,7 +118,8 @@ class OptiXAggregate : public WavefrontAggregate {
OptixProgramGroup createIntersectionPG(const char *closest, const char *any,
const char *intersect) const;

OptixTraversableHandle buildBVH(const std::vector<OptixBuildInput> &buildInputs) const;
OptixTraversableHandle buildBVH(const std::vector<OptixBuildInput> &buildInputs,
cudaStream_t stream = 0);

Allocator alloc;
Bounds3f bounds;
Expand Down

0 comments on commit 4b74e6b

Please sign in to comment.