Skip to content

Commit

Permalink
Changed MiniEngine RT sample to work with multiple BLASes (#790)
Browse files Browse the repository at this point in the history
* Allocate separate scratch for each BLAS

* Updated sample to use one BLAS per mesh
  • Loading branch information
stanard committed Jul 19, 2022
1 parent 8fc63a4 commit 1704d88
Showing 1 changed file with 79 additions and 59 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,6 @@ using namespace GameCore;
using namespace Math;
using namespace Graphics;

extern ByteAddressBuffer g_bvh_bottomLevelAccelerationStructure;

CComPtr<ID3D12Device5> g_pRaytracingDevice;

__declspec(align(16)) struct HitShaderConstants
Expand Down Expand Up @@ -718,19 +716,29 @@ void D3D12RaytracingMiniEngineSample::Startup( void )
g_hitConstantBuffer.Create(L"Hit Constant Buffer", 1, sizeof(HitShaderConstants));
g_dynamicConstantBuffer.Create(L"Dynamic Constant Buffer", 1, sizeof(DynamicCB));

//
// Load the model
//
const ModelH3D& model = Sponza::GetModel();

InitializeSceneInfo(model);
InitializeViews(model);
UINT numMeshes = model.m_Header.meshCount;

const UINT numBottomLevels = 1;
//
// Define the top level acceleration structure
//
const UINT numMeshes = model.m_Header.meshCount;
const UINT numInstances = numMeshes;

// You can toggle between all meshes in one BLAS or one instance per mesh. Typically, to save memory, you would instance a BLAS multiple
// times when geometry is duplicated.
ASSERT(numInstances == 1 || numInstances == numMeshes);

D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO topLevelPrebuildInfo;
D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC topLevelAccelerationStructureDesc = {};
D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS &topLevelInputs = topLevelAccelerationStructureDesc.Inputs;
topLevelInputs.Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL;
topLevelInputs.NumDescs = numBottomLevels;
topLevelInputs.NumDescs = numInstances;
topLevelInputs.Flags = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_TRACE;
topLevelInputs.pGeometryDescs = nullptr;
topLevelInputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY;
Expand All @@ -739,6 +747,27 @@ void D3D12RaytracingMiniEngineSample::Startup( void )
const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS buildFlag = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_TRACE;
std::vector<D3D12_RAYTRACING_GEOMETRY_DESC> geometryDescs(model.m_Header.meshCount);
UINT64 scratchBufferSizeNeeded = topLevelPrebuildInfo.ScratchDataSizeInBytes;

ByteAddressBuffer tlasScratchBuffer;
tlasScratchBuffer.Create(L"Acceleration Structure Scratch Buffer", (UINT)scratchBufferSizeNeeded, 1);

D3D12_HEAP_PROPERTIES defaultHeapProps = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT);
auto tlasBufferDesc = CD3DX12_RESOURCE_DESC::Buffer(topLevelPrebuildInfo.ResultDataMaxSizeInBytes, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
g_Device->CreateCommittedResource(
&defaultHeapProps,
D3D12_HEAP_FLAG_NONE,
&tlasBufferDesc,
D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE,
nullptr,
IID_PPV_ARGS(&g_bvh_topLevelAccelerationStructure));

topLevelAccelerationStructureDesc.DestAccelerationStructureData = g_bvh_topLevelAccelerationStructure->GetGPUVirtualAddress();
topLevelAccelerationStructureDesc.ScratchAccelerationStructureData = tlasScratchBuffer.GetGpuVirtualAddress();

//
// Define the bottom level acceleration structures
//

for (UINT i = 0; i < numMeshes; i++)
{
auto &mesh = model.m_pMesh[i];
Expand All @@ -750,109 +779,100 @@ void D3D12RaytracingMiniEngineSample::Startup( void )
D3D12_RAYTRACING_GEOMETRY_TRIANGLES_DESC &trianglesDesc = desc.Triangles;
trianglesDesc.VertexFormat = DXGI_FORMAT_R32G32B32_FLOAT;
trianglesDesc.VertexCount = mesh.vertexCount;
trianglesDesc.VertexBuffer.StartAddress = model.GetVertexBuffer().BufferLocation + (mesh.vertexDataByteOffset + mesh.attrib[ModelH3D::attrib_position].offset);
trianglesDesc.VertexBuffer.StartAddress = model.GetVertexBuffer().BufferLocation + (mesh.vertexDataByteOffset + (UINT)mesh.attrib[ModelH3D::attrib_position].offset);
trianglesDesc.IndexBuffer = model.GetIndexBuffer().BufferLocation + mesh.indexDataByteOffset;
trianglesDesc.VertexBuffer.StrideInBytes = mesh.vertexStride;
trianglesDesc.IndexCount = mesh.indexCount;
trianglesDesc.IndexFormat = DXGI_FORMAT_R16_UINT;
trianglesDesc.Transform3x4 = 0;
}

std::vector<UINT64> bottomLevelAccelerationStructureSize(numBottomLevels);
std::vector<D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC> bottomLevelAccelerationStructureDescs(numBottomLevels);
for (UINT i = 0; i < numBottomLevels; i++)
{
D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC &bottomLevelAccelerationStructureDesc = bottomLevelAccelerationStructureDescs[i];
D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS &bottomLevelInputs = bottomLevelAccelerationStructureDesc.Inputs;
bottomLevelInputs.Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL;
bottomLevelInputs.NumDescs = numMeshes;
bottomLevelInputs.pGeometryDescs = &geometryDescs[i];
bottomLevelInputs.Flags = buildFlag;
bottomLevelInputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY;

D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO bottomLevelprebuildInfo;
g_pRaytracingDevice->GetRaytracingAccelerationStructurePrebuildInfo(&bottomLevelInputs, &bottomLevelprebuildInfo);

bottomLevelAccelerationStructureSize[i] = bottomLevelprebuildInfo.ResultDataMaxSizeInBytes;
scratchBufferSizeNeeded = std::max(bottomLevelprebuildInfo.ScratchDataSizeInBytes, scratchBufferSizeNeeded);
}

ByteAddressBuffer scratchBuffer;
scratchBuffer.Create(L"Acceleration Structure Scratch Buffer", (UINT)scratchBufferSizeNeeded, 1);
g_bvh_bottomLevelAccelerationStructures.resize(numInstances);
std::vector<D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC> blasDescs(numInstances);
std::vector<D3D12_RAYTRACING_INSTANCE_DESC> instanceDescs(numInstances);
std::vector<ByteAddressBuffer> blasScratchBuffers(numInstances);

D3D12_HEAP_PROPERTIES defaultHeapDesc = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT);
auto topLevelDesc = CD3DX12_RESOURCE_DESC::Buffer(topLevelPrebuildInfo.ResultDataMaxSizeInBytes, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
g_Device->CreateCommittedResource(
&defaultHeapDesc,
D3D12_HEAP_FLAG_NONE,
&topLevelDesc,
D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE,
nullptr,
IID_PPV_ARGS(&g_bvh_topLevelAccelerationStructure));
for (UINT i = 0; i < numInstances; i++)
{
D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC& blasDesc = blasDescs[i];
blasDesc.Inputs.Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL;
blasDesc.Inputs.NumDescs = (numInstances == numMeshes) ? 1 : numMeshes;
blasDesc.Inputs.pGeometryDescs = &geometryDescs[i];
blasDesc.Inputs.Flags = buildFlag;
blasDesc.Inputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY;

topLevelAccelerationStructureDesc.DestAccelerationStructureData = g_bvh_topLevelAccelerationStructure->GetGPUVirtualAddress();
topLevelAccelerationStructureDesc.ScratchAccelerationStructureData = scratchBuffer.GetGpuVirtualAddress();
D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO bottomLevelPrebuildInfo;
g_pRaytracingDevice->GetRaytracingAccelerationStructurePrebuildInfo(&blasDesc.Inputs, &bottomLevelPrebuildInfo);

std::vector<D3D12_RAYTRACING_INSTANCE_DESC> instanceDescs(numBottomLevels);
g_bvh_bottomLevelAccelerationStructures.resize(numBottomLevels);
for (UINT i = 0; i < bottomLevelAccelerationStructureDescs.size(); i++)
{
auto &bottomLevelStructure = g_bvh_bottomLevelAccelerationStructures[i];
auto &blas = g_bvh_bottomLevelAccelerationStructures[i];

auto bottomLevelDesc = CD3DX12_RESOURCE_DESC::Buffer(bottomLevelAccelerationStructureSize[i], D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
auto blasBufferDesc = CD3DX12_RESOURCE_DESC::Buffer(bottomLevelPrebuildInfo.ResultDataMaxSizeInBytes, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
g_Device->CreateCommittedResource(
&defaultHeapDesc,
&defaultHeapProps,
D3D12_HEAP_FLAG_NONE,
&bottomLevelDesc,
&blasBufferDesc,
D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE,
nullptr,
IID_PPV_ARGS(&bottomLevelStructure));
IID_PPV_ARGS(&blas));

bottomLevelAccelerationStructureDescs[i].DestAccelerationStructureData = bottomLevelStructure->GetGPUVirtualAddress();
bottomLevelAccelerationStructureDescs[i].ScratchAccelerationStructureData = scratchBuffer.GetGpuVirtualAddress();
blasDesc.DestAccelerationStructureData = blas->GetGPUVirtualAddress();

blasScratchBuffers[i].Create(L"BLAS build scratch buffer", (UINT)bottomLevelPrebuildInfo.ScratchDataSizeInBytes, 1);
blasDesc.ScratchAccelerationStructureData = blasScratchBuffers[i].GetGpuVirtualAddress();

D3D12_RAYTRACING_INSTANCE_DESC &instanceDesc = instanceDescs[i];
g_pRaytracingDescriptorHeap->AllocateBufferUav(*bottomLevelStructure);
g_pRaytracingDescriptorHeap->AllocateBufferUav(*blas);

// Identity matrix
ZeroMemory(instanceDesc.Transform, sizeof(instanceDesc.Transform));
instanceDesc.Transform[0][0] = 1.0f;
instanceDesc.Transform[1][1] = 1.0f;
instanceDesc.Transform[2][2] = 1.0f;

instanceDesc.AccelerationStructure = g_bvh_bottomLevelAccelerationStructures[i]->GetGPUVirtualAddress();
instanceDesc.AccelerationStructure = blas->GetGPUVirtualAddress();
instanceDesc.Flags = 0;
instanceDesc.InstanceID = 0;
instanceDesc.InstanceMask = 1;
instanceDesc.InstanceContributionToHitGroupIndex = i;
}

//
// Upload the instance data
//
ByteAddressBuffer instanceDataBuffer;
instanceDataBuffer.Create(L"Instance Data Buffer", numBottomLevels, sizeof(D3D12_RAYTRACING_INSTANCE_DESC), instanceDescs.data());
instanceDataBuffer.Create(L"Instance Data Buffer", numInstances, sizeof(D3D12_RAYTRACING_INSTANCE_DESC), instanceDescs.data());

topLevelInputs.InstanceDescs = instanceDataBuffer.GetGpuVirtualAddress();
topLevelInputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY;

GraphicsContext& gfxContext = GraphicsContext::Begin(L"Create Acceleration Structure");
ID3D12GraphicsCommandList *pCommandList = gfxContext.GetCommandList();
//
// Build the acceleration structures
//
GraphicsContext& gfxContext = GraphicsContext::Begin(L"Build Acceleration Structures");

CComPtr<ID3D12GraphicsCommandList4> pRaytracingCommandList;
pCommandList->QueryInterface(IID_PPV_ARGS(&pRaytracingCommandList));
gfxContext.GetCommandList()->QueryInterface(IID_PPV_ARGS(&pRaytracingCommandList));

ID3D12DescriptorHeap *descriptorHeaps[] = { &g_pRaytracingDescriptorHeap->GetDescriptorHeap() };
pRaytracingCommandList->SetDescriptorHeaps(ARRAYSIZE(descriptorHeaps), descriptorHeaps);

auto uavBarrier = CD3DX12_RESOURCE_BARRIER::UAV(nullptr);
for (UINT i = 0; i < bottomLevelAccelerationStructureDescs.size(); i++)
for (UINT i = 0; i < blasDescs.size(); i++)
{
pRaytracingCommandList->BuildRaytracingAccelerationStructure(&bottomLevelAccelerationStructureDescs[i], 0, nullptr);
}
pCommandList->ResourceBarrier(1, &uavBarrier);
pRaytracingCommandList->BuildRaytracingAccelerationStructure(&blasDescs[i], 0, nullptr);

// If each BLAS build reuses the scratch buffer, you would need a UAV barrier between each. But without
// barriers, the driver may be able to batch these BLAS builds together. This maximizes GPU utilization
// and should execute more quickly.
}
pRaytracingCommandList->ResourceBarrier(1, &uavBarrier);
pRaytracingCommandList->BuildRaytracingAccelerationStructure(&topLevelAccelerationStructureDesc, 0, nullptr);

gfxContext.Finish(true);

//
// Build the RTPSO
//
InitializeRaytracingStateObjects(model, numMeshes);

m_CameraPosArrayCurrentPosition = 0;
Expand Down

0 comments on commit 1704d88

Please sign in to comment.