Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vk: Rewrite descriptor allocations #13927

Merged
merged 6 commits into from May 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion rpcs3/Emu/RSX/Common/simple_array.hpp
Expand Up @@ -5,7 +5,7 @@

namespace rsx
{
template <typename Ty>
template <typename Ty> requires std::is_trivially_destructible_v<Ty>
struct simple_array
{
public:
Expand Down
26 changes: 5 additions & 21 deletions rpcs3/Emu/RSX/VK/VKCompute.cpp
Expand Up @@ -16,13 +16,13 @@ namespace vk

void compute_task::init_descriptors()
{
std::vector<VkDescriptorPoolSize> descriptor_pool_sizes;
std::vector<VkDescriptorSetLayoutBinding> bindings;
rsx::simple_array<VkDescriptorPoolSize> descriptor_pool_sizes;
rsx::simple_array<VkDescriptorSetLayoutBinding> bindings;

const auto layout = get_descriptor_layout();
for (const auto &e : layout)
{
descriptor_pool_sizes.push_back({e.first, u32(VK_MAX_COMPUTE_TASKS * e.second)});
descriptor_pool_sizes.push_back({e.first, e.second});

for (unsigned n = 0; n < e.second; ++n)
{
Expand All @@ -38,7 +38,7 @@ namespace vk
}

// Reserve descriptor pools
m_descriptor_pool.create(*g_render_device, descriptor_pool_sizes.data(), ::size32(descriptor_pool_sizes), VK_MAX_COMPUTE_TASKS, 3);
m_descriptor_pool.create(*g_render_device, descriptor_pool_sizes);
m_descriptor_layout = vk::descriptors::create_layout(bindings);

VkPipelineLayoutCreateInfo layout_info = {};
Expand Down Expand Up @@ -119,15 +119,6 @@ namespace vk
}
}

void compute_task::free_resources()
{
if (m_used_descriptors == 0)
return;

m_descriptor_pool.reset(0);
m_used_descriptors = 0;
}

void compute_task::load_program(const vk::command_buffer& cmd)
{
if (!m_program)
Expand Down Expand Up @@ -155,14 +146,7 @@ namespace vk

ensure(m_used_descriptors < VK_MAX_COMPUTE_TASKS);

VkDescriptorSetAllocateInfo alloc_info = {};
alloc_info.descriptorPool = m_descriptor_pool;
alloc_info.descriptorSetCount = 1;
alloc_info.pSetLayouts = &m_descriptor_layout;
alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;

CHECK_RESULT(vkAllocateDescriptorSets(*g_render_device, &alloc_info, m_descriptor_set.ptr()));
m_used_descriptors++;
m_descriptor_set = m_descriptor_pool.allocate(m_descriptor_layout, VK_TRUE);

bind_resources();

Expand Down
2 changes: 0 additions & 2 deletions rpcs3/Emu/RSX/VK/VKCompute.h
Expand Up @@ -44,8 +44,6 @@ namespace vk
void create();
void destroy();

void free_resources();

virtual void bind_resources() {}
virtual void declare_inputs() {}

Expand Down
6 changes: 0 additions & 6 deletions rpcs3/Emu/RSX/VK/VKDraw.cpp
Expand Up @@ -966,11 +966,6 @@ void VKGSRender::end()
m_aux_frame_context.grab_resources(*m_current_frame);
m_current_frame = &m_aux_frame_context;
}
else if (m_current_frame->used_descriptors)
{
m_current_frame->descriptor_pool.reset(0);
m_current_frame->used_descriptors = 0;
}

ensure(!m_current_frame->swap_command_buffer);

Expand Down Expand Up @@ -998,7 +993,6 @@ void VKGSRender::end()
}

// Allocate descriptor set
check_descriptors();
m_current_frame->descriptor_set = allocate_descriptor_set();

// Load program execution environment
Expand Down
64 changes: 32 additions & 32 deletions rpcs3/Emu/RSX/VK/VKGSRender.cpp
Expand Up @@ -395,9 +395,9 @@ namespace
std::tuple<VkPipelineLayout, VkDescriptorSetLayout> get_shared_pipeline_layout(VkDevice dev)
{
const auto& binding_table = vk::get_current_renderer()->get_pipeline_binding_table();
std::vector<VkDescriptorSetLayoutBinding> bindings(binding_table.total_descriptor_bindings);
rsx::simple_array<VkDescriptorSetLayoutBinding> bindings(binding_table.total_descriptor_bindings);

usz idx = 0;
u32 idx = 0;

// Vertex stream, one stream for cacheable data, one stream for transient data
for (int i = 0; i < 3; i++)
Expand All @@ -406,55 +406,63 @@ namespace
bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[idx].binding = binding_table.vertex_buffers_first_bind_slot + i;
bindings[idx].pImmutableSamplers = nullptr;
idx++;
}

bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[idx].binding = binding_table.fragment_constant_buffers_bind_slot;
bindings[idx].pImmutableSamplers = nullptr;

idx++;

bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[idx].binding = binding_table.fragment_state_bind_slot;
bindings[idx].pImmutableSamplers = nullptr;

idx++;

bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[idx].binding = binding_table.fragment_texture_params_bind_slot;
bindings[idx].pImmutableSamplers = nullptr;

idx++;

bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[idx].binding = binding_table.vertex_constant_buffers_bind_slot;
bindings[idx].pImmutableSamplers = nullptr;

idx++;

bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
bindings[idx].binding = binding_table.vertex_params_bind_slot;
bindings[idx].pImmutableSamplers = nullptr;

idx++;

bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[idx].binding = binding_table.conditional_render_predicate_slot;
bindings[idx].pImmutableSamplers = nullptr;

idx++;

bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[idx].binding = binding_table.rasterizer_env_bind_slot;
bindings[idx].pImmutableSamplers = nullptr;

idx++;

Expand All @@ -466,6 +474,7 @@ namespace
bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[idx].binding = binding;
bindings[idx].pImmutableSamplers = nullptr;
idx++;
}

Expand All @@ -475,6 +484,7 @@ namespace
bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[idx].binding = binding_table.vertex_textures_first_bind_slot + i;
bindings[idx].pImmutableSamplers = nullptr;
idx++;
}

Expand Down Expand Up @@ -595,7 +605,7 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar)
m_secondary_cb_list.create(m_secondary_command_buffer_pool, vk::command_buffer::access_type_hint::all);

//Precalculated stuff
std::tie(pipeline_layout, descriptor_layouts) = get_shared_pipeline_layout(*m_device);
std::tie(m_pipeline_layout, m_descriptor_layouts) = get_shared_pipeline_layout(*m_device);

//Occlusion
m_occlusion_query_manager = std::make_unique<vk::query_pool_manager>(*m_device, VK_QUERY_TYPE_OCCLUSION, OCCLUSION_MAX_POOL_SIZE);
Expand All @@ -614,13 +624,16 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar)
const auto& binding_table = m_device->get_pipeline_binding_table();
const u32 num_fs_samplers = binding_table.vertex_textures_first_bind_slot - binding_table.textures_first_bind_slot;

std::vector<VkDescriptorPoolSize> sizes;
sizes.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER , 6 * max_draw_calls });
sizes.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER , 3 * max_draw_calls });
sizes.push_back({ VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER , (num_fs_samplers + 4) * max_draw_calls });
rsx::simple_array<VkDescriptorPoolSize> descriptor_type_sizes =
{
{ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER , 6 },
{ VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER , 3 },
{ VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER , (num_fs_samplers + 4) },

// Conditional rendering predicate slot; refactor to allow skipping this when not needed
sizes.push_back({ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1 * max_draw_calls });
// Conditional rendering predicate slot; refactor to allow skipping this when not needed
{ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1 }
};
m_descriptor_pool.create(*m_device, descriptor_type_sizes, max_draw_calls);

VkSemaphoreCreateInfo semaphore_info = {};
semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
Expand Down Expand Up @@ -665,7 +678,6 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar)
{
vkCreateSemaphore((*m_device), &semaphore_info, nullptr, &ctx.present_wait_semaphore);
vkCreateSemaphore((*m_device), &semaphore_info, nullptr, &ctx.acquire_signal_semaphore);
ctx.descriptor_pool.create(*m_device, sizes.data(), static_cast<u32>(sizes.size()), max_draw_calls, 1);
}

const auto& memory_map = m_device->get_memory_mapping();
Expand Down Expand Up @@ -920,7 +932,6 @@ VKGSRender::~VKGSRender()
{
vkDestroySemaphore((*m_device), ctx.present_wait_semaphore, nullptr);
vkDestroySemaphore((*m_device), ctx.acquire_signal_semaphore, nullptr);
ctx.descriptor_pool.destroy();

ctx.buffer_views_to_clean.clear();
}
Expand All @@ -935,8 +946,8 @@ VKGSRender::~VKGSRender()
m_text_writer.reset();

//Pipeline descriptors
vkDestroyPipelineLayout(*m_device, pipeline_layout, nullptr);
vkDestroyDescriptorSetLayout(*m_device, descriptor_layouts, nullptr);
vkDestroyPipelineLayout(*m_device, m_pipeline_layout, nullptr);
vkDestroyDescriptorSetLayout(*m_device, m_descriptor_layouts, nullptr);

// Queries
m_occlusion_query_manager.reset();
Expand All @@ -952,6 +963,9 @@ VKGSRender::~VKGSRender()
// Global resources
vk::destroy_global_resources();

// Destroy at the end in case of lingering callbacks
m_descriptor_pool.destroy();

// Device handles/contexts
m_swapchain->destroy();
m_instance.destroy();
Expand Down Expand Up @@ -1318,25 +1332,11 @@ void VKGSRender::check_present_status()
}
}

void VKGSRender::check_descriptors()
{
// Ease resource pressure if the number of draw calls becomes too high or we are running low on memory resources
const auto required_descriptors = rsx::method_registers.current_draw_clause.pass_count();
if (!m_current_frame->descriptor_pool.can_allocate(required_descriptors, m_current_frame->used_descriptors))
{
// Should hard sync before resetting descriptors for spec compliance
flush_command_queue(true);

m_current_frame->descriptor_pool.reset(0);
m_current_frame->used_descriptors = 0;
}
}

VkDescriptorSet VKGSRender::allocate_descriptor_set()
{
if (!m_shader_interpreter.is_interpreter(m_program)) [[likely]]
{
return m_current_frame->descriptor_pool.allocate(descriptor_layouts, VK_TRUE, m_current_frame->used_descriptors++);
return m_descriptor_pool.allocate(m_descriptor_layouts, VK_TRUE);
}
else
{
Expand Down Expand Up @@ -1417,15 +1417,15 @@ void VKGSRender::on_init_thread()
if (!m_overlay_manager)
{
m_frame->hide();
m_shaders_cache->load(nullptr, pipeline_layout);
m_shaders_cache->load(nullptr, m_pipeline_layout);
m_frame->show();
}
else
{
rsx::shader_loading_dialog_native dlg(this);

// TODO: Handle window resize messages during loading on GPUs without OUT_OF_DATE_KHR support
m_shaders_cache->load(&dlg, pipeline_layout);
m_shaders_cache->load(&dlg, m_pipeline_layout);
}
}

Expand Down Expand Up @@ -2012,7 +2012,7 @@ bool VKGSRender::load_program()

// Load current program from cache
std::tie(m_program, m_vertex_prog, m_fragment_prog) = m_prog_buffer->get_graphics_pipeline(vertex_program, fragment_program, m_pipeline_properties,
shadermode != shader_mode::recompiler, true, pipeline_layout);
shadermode != shader_mode::recompiler, true, m_pipeline_layout);

vk::leave_uninterruptible();

Expand Down Expand Up @@ -2271,7 +2271,7 @@ void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_
data_size = 20;
}

vkCmdPushConstants(*m_current_command_buffer, pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, 0, data_size, draw_info);
vkCmdPushConstants(*m_current_command_buffer, m_pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, 0, data_size, draw_info);

const usz data_offset = (id * 128) + m_vertex_layout_stream_info.offset;
auto dst = m_vertex_layout_ring_info.map(data_offset, 128);
Expand Down
6 changes: 3 additions & 3 deletions rpcs3/Emu/RSX/VK/VKGSRender.h
Expand Up @@ -120,8 +120,9 @@ class VKGSRender : public GSRender, public ::rsx::reports::ZCULL_control
volatile vk::host_data_t* m_host_data_ptr = nullptr;
std::unique_ptr<vk::buffer> m_host_object_data;

VkDescriptorSetLayout descriptor_layouts;
VkPipelineLayout pipeline_layout;
vk::descriptor_pool m_descriptor_pool;
VkDescriptorSetLayout m_descriptor_layouts;
VkPipelineLayout m_pipeline_layout;

vk::framebuffer_holder* m_draw_fbo = nullptr;

Expand Down Expand Up @@ -229,7 +230,6 @@ class VKGSRender : public GSRender, public ::rsx::reports::ZCULL_control
void check_heap_status(u32 flags = VK_HEAP_CHECK_ALL);
void check_present_status();

void check_descriptors();
VkDescriptorSet allocate_descriptor_set();

vk::vertex_upload_info upload_vertex_data();
Expand Down
10 changes: 3 additions & 7 deletions rpcs3/Emu/RSX/VK/VKGSRenderTypes.hpp
Expand Up @@ -176,8 +176,6 @@ namespace vk
VkSemaphore present_wait_semaphore = VK_NULL_HANDLE;

vk::descriptor_set descriptor_set;
vk::descriptor_pool descriptor_pool;
u32 used_descriptors = 0;

rsx::flags32_t flags = 0;

Expand All @@ -186,7 +184,7 @@ namespace vk
u32 present_image = -1;
command_buffer_chunk* swap_command_buffer = nullptr;

//Heap pointers
// Heap pointers
s64 attrib_heap_ptr = 0;
s64 vtx_env_heap_ptr = 0;
s64 frag_env_heap_ptr = 0;
Expand All @@ -200,14 +198,12 @@ namespace vk

u64 last_frame_sync_time = 0;

//Copy shareable information
// Copy shareable information
void grab_resources(frame_context_t& other)
{
present_wait_semaphore = other.present_wait_semaphore;
acquire_signal_semaphore = other.acquire_signal_semaphore;
descriptor_set.swap(other.descriptor_set);
descriptor_pool = other.descriptor_pool;
used_descriptors = other.used_descriptors;
flags = other.flags;

attrib_heap_ptr = other.attrib_heap_ptr;
Expand All @@ -222,7 +218,7 @@ namespace vk
rasterizer_env_heap_ptr = other.rasterizer_env_heap_ptr;
}

//Exchange storage (non-copyable)
// Exchange storage (non-copyable)
void swap_storage(frame_context_t& other)
{
std::swap(buffer_views_to_clean, other.buffer_views_to_clean);
Expand Down