Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vk: Use a dynamic number of descriptor allocations #10943

Merged
merged 1 commit into from Sep 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
19 changes: 9 additions & 10 deletions rpcs3/Emu/RSX/VK/VKGSRender.cpp
Expand Up @@ -425,22 +425,23 @@ VKGSRender::VKGSRender() : GSRender()
m_occlusion_query_manager->set_control_flags(VK_QUERY_CONTROL_PRECISE_BIT, 0);
}

//Generate frame contexts
// Generate frame contexts
const u32 max_draw_calls = m_device->get_descriptor_max_draw_calls();
const auto& binding_table = m_device->get_pipeline_binding_table();
const u32 num_fs_samplers = binding_table.vertex_textures_first_bind_slot - binding_table.textures_first_bind_slot;

std::vector<VkDescriptorPoolSize> sizes;
sizes.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER , 6 * DESCRIPTOR_MAX_DRAW_CALLS });
sizes.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER , 3 * DESCRIPTOR_MAX_DRAW_CALLS });
sizes.push_back({ VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER , (num_fs_samplers + 4) * DESCRIPTOR_MAX_DRAW_CALLS });
sizes.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER , 6 * max_draw_calls });
sizes.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER , 3 * max_draw_calls });
sizes.push_back({ VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER , (num_fs_samplers + 4) * max_draw_calls });

// Conditional rendering predicate slot; refactor to allow skipping this when not needed
sizes.push_back({ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1 * DESCRIPTOR_MAX_DRAW_CALLS });
sizes.push_back({ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1 * max_draw_calls });

VkSemaphoreCreateInfo semaphore_info = {};
semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;

//VRAM allocation
// VRAM allocation
m_attrib_ring_info.create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, "attrib buffer", 0x400000, VK_TRUE);
m_fragment_env_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment env buffer");
m_vertex_env_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "vertex env buffer");
Expand Down Expand Up @@ -476,7 +477,7 @@ VKGSRender::VKGSRender() : GSRender()
{
vkCreateSemaphore((*m_device), &semaphore_info, nullptr, &ctx.present_wait_semaphore);
vkCreateSemaphore((*m_device), &semaphore_info, nullptr, &ctx.acquire_signal_semaphore);
ctx.descriptor_pool.create(*m_device, sizes.data(), static_cast<u32>(sizes.size()), DESCRIPTOR_MAX_DRAW_CALLS, 1);
ctx.descriptor_pool.create(*m_device, sizes.data(), static_cast<u32>(sizes.size()), max_draw_calls, 1);
}

const auto& memory_map = m_device->get_memory_mapping();
Expand Down Expand Up @@ -1063,8 +1064,7 @@ void VKGSRender::check_descriptors()
{
// Ease resource pressure if the number of draw calls becomes too high or we are running low on memory resources
const auto required_descriptors = rsx::method_registers.current_draw_clause.pass_count();
ensure(required_descriptors < DESCRIPTOR_MAX_DRAW_CALLS);
if ((required_descriptors + m_current_frame->used_descriptors) > DESCRIPTOR_MAX_DRAW_CALLS)
if (!m_current_frame->descriptor_pool.can_allocate(required_descriptors, m_current_frame->used_descriptors))
{
// Should hard sync before resetting descriptors for spec compliance
flush_command_queue(true);
Expand All @@ -1078,7 +1078,6 @@ VkDescriptorSet VKGSRender::allocate_descriptor_set()
{
if (!m_shader_interpreter.is_interpreter(m_program)) [[likely]]
{
ensure(m_current_frame->used_descriptors < DESCRIPTOR_MAX_DRAW_CALLS);
return m_current_frame->descriptor_pool.allocate(descriptor_layouts, VK_TRUE, m_current_frame->used_descriptors++);
}
else
Expand Down
1 change: 0 additions & 1 deletion rpcs3/Emu/RSX/VK/VKHelpers.h
Expand Up @@ -16,7 +16,6 @@
#include "Emu/RSX/Common/TextureUtils.h"
#include "Emu/RSX/rsx_utils.h"

#define DESCRIPTOR_MAX_DRAW_CALLS 16384
#define OCCLUSION_MAX_POOL_SIZE DESCRIPTOR_MAX_DRAW_CALLS

#define FRAME_PRESENT_TIMEOUT 10000000ull // 10 seconds
Expand Down
14 changes: 8 additions & 6 deletions rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp
Expand Up @@ -373,13 +373,15 @@ namespace vk

void shader_interpreter::create_descriptor_pools(const vk::render_device& dev)
{
const auto max_draw_calls = dev.get_descriptor_max_draw_calls();

std::vector<VkDescriptorPoolSize> sizes;
sizes.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER , 6 * DESCRIPTOR_MAX_DRAW_CALLS });
sizes.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER , 3 * DESCRIPTOR_MAX_DRAW_CALLS });
sizes.push_back({ VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER , 68 * DESCRIPTOR_MAX_DRAW_CALLS });
sizes.push_back({ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 3 * DESCRIPTOR_MAX_DRAW_CALLS });
sizes.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER , 6 * max_draw_calls });
sizes.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER , 3 * max_draw_calls });
sizes.push_back({ VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER , 68 * max_draw_calls });
sizes.push_back({ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 3 * max_draw_calls });

m_descriptor_pool.create(dev, sizes.data(), ::size32(sizes), DESCRIPTOR_MAX_DRAW_CALLS, 2);
m_descriptor_pool.create(dev, sizes.data(), ::size32(sizes), max_draw_calls, 2);
}

void shader_interpreter::init(const vk::render_device& dev)
Expand Down Expand Up @@ -513,7 +515,7 @@ namespace vk

VkDescriptorSet shader_interpreter::allocate_descriptor_set()
{
if (m_used_descriptors == DESCRIPTOR_MAX_DRAW_CALLS)
if (!m_descriptor_pool.can_allocate(1u, m_used_descriptors))
{
m_descriptor_pool.reset(0);
m_used_descriptors = 0;
Expand Down
15 changes: 2 additions & 13 deletions rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp
Expand Up @@ -109,7 +109,7 @@ namespace vk
{
ensure(subpool_count);

info.flags = dev.get_descriptor_indexing_support() ? VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT : 0;
info.flags = dev.get_descriptor_update_after_bind_support() ? VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT : 0;
info.maxSets = max_sets;
info.poolSizeCount = size_descriptors_count;
info.pPoolSizes = sizes;
Expand Down Expand Up @@ -139,16 +139,6 @@ namespace vk
m_owner = nullptr;
}

bool descriptor_pool::valid() const
{
return (!m_device_pools.empty());
}

descriptor_pool::operator VkDescriptorPool()
{
return m_current_pool_handle;
}

void descriptor_pool::reset(VkDescriptorPoolResetFlags flags)
{
m_descriptor_set_cache.clear();
Expand Down Expand Up @@ -194,11 +184,10 @@ namespace vk

if (use_cache)
{
ensure(used_count < info.maxSets);
const auto alloc_size = std::min<u32>(info.maxSets - used_count, max_cache_size);

ensure(alloc_size);
ensure(m_descriptor_set_cache.empty());

alloc_info.descriptorSetCount = alloc_size;
alloc_info.pSetLayouts = m_allocation_request_cache.data();

Expand Down
8 changes: 5 additions & 3 deletions rpcs3/Emu/RSX/VK/vkutils/descriptors.h
Expand Up @@ -20,11 +20,13 @@ namespace vk
void destroy();
void reset(VkDescriptorPoolResetFlags flags);

bool valid() const;
operator VkDescriptorPool();

VkDescriptorSet allocate(VkDescriptorSetLayout layout, VkBool32 use_cache, u32 used_count);

operator VkDescriptorPool() { return m_current_pool_handle; }
FORCE_INLINE bool valid() const { return (!m_device_pools.empty()); }
FORCE_INLINE u32 max_sets() const { return info.maxSets; }
FORCE_INLINE bool can_allocate(u32 required_count, u32 used_count) const { return (used_count + required_count) <= info.maxSets; };

private:
const vk::render_device* m_owner = nullptr;
VkDescriptorPoolCreateInfo info = {};
Expand Down
61 changes: 59 additions & 2 deletions rpcs3/Emu/RSX/VK/vkutils/device.cpp
Expand Up @@ -85,13 +85,65 @@ namespace vk
surface_capabilities_2_support = instance_extensions.is_supported(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
}

void physical_device::get_physical_device_properties(bool allow_extensions)
{
vkGetPhysicalDeviceMemoryProperties(dev, &memory_properties);

if (!allow_extensions)
{
vkGetPhysicalDeviceProperties(dev, &props);
return;
}

supported_extensions instance_extensions(supported_extensions::instance);
if (!instance_extensions.is_supported(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME))
{
vkGetPhysicalDeviceProperties(dev, &props);
}
else
{
VkPhysicalDeviceProperties2KHR properties2;
properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
properties2.pNext = nullptr;

VkPhysicalDeviceDescriptorIndexingPropertiesEXT descriptor_indexing_props{};

if (descriptor_indexing_support)
{
descriptor_indexing_props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT;
descriptor_indexing_props.pNext = properties2.pNext;
properties2.pNext = &descriptor_indexing_props;
}

auto _vkGetPhysicalDeviceProperties2KHR = reinterpret_cast<PFN_vkGetPhysicalDeviceProperties2KHR>(vkGetInstanceProcAddr(parent, "vkGetPhysicalDeviceProperties2KHR"));
ensure(_vkGetPhysicalDeviceProperties2KHR);

_vkGetPhysicalDeviceProperties2KHR(dev, &properties2);
props = properties2.properties;

if (descriptor_indexing_support)
{
if (descriptor_indexing_props.maxUpdateAfterBindDescriptorsInAllPools < 800'000)
{
rsx_log.error("Physical device does not support enough descriptors for deferred updates to work effectively. Deferred updates are disabled.");
descriptor_update_after_bind_mask = 0;
}
else if (descriptor_indexing_props.maxUpdateAfterBindDescriptorsInAllPools < 2'000'000)
{
rsx_log.warning("Physical device reports a low amount of allowed deferred descriptor updates. Draw call threshold will be lowered accordingly.");
descriptor_max_draw_calls = 8192;
}
}
}
}

void physical_device::create(VkInstance context, VkPhysicalDevice pdev, bool allow_extensions)
{
dev = pdev;
parent = context;
vkGetPhysicalDeviceProperties(pdev, &props);
vkGetPhysicalDeviceMemoryProperties(pdev, &memory_properties);

get_physical_device_features(allow_extensions);
get_physical_device_properties(allow_extensions);

rsx_log.always()("Found vulkan-compatible GPU: '%s' running on driver %s", get_name(), get_driver_version());

Expand Down Expand Up @@ -714,6 +766,11 @@ namespace vk
return pgpu->descriptor_update_after_bind_mask;
}

u32 render_device::get_descriptor_max_draw_calls() const
{
return pgpu->descriptor_max_draw_calls;
}

mem_allocator_base* render_device::get_allocator() const
{
return m_allocator.get();
Expand Down
5 changes: 5 additions & 0 deletions rpcs3/Emu/RSX/VK/vkutils/device.h
Expand Up @@ -9,6 +9,8 @@
#include <vector>
#include <unordered_map>

#define DESCRIPTOR_MAX_DRAW_CALLS 16384

namespace vk
{
struct gpu_formats_support
Expand Down Expand Up @@ -62,10 +64,12 @@ namespace vk
bool descriptor_indexing_support = false;

u64 descriptor_update_after_bind_mask = 0;
u32 descriptor_max_draw_calls = DESCRIPTOR_MAX_DRAW_CALLS;

friend class render_device;
private:
void get_physical_device_features(bool allow_extensions);
void get_physical_device_properties(bool allow_extensions);

public:

Expand Down Expand Up @@ -147,6 +151,7 @@ namespace vk
bool get_descriptor_indexing_support() const;

u64 get_descriptor_update_after_bind_support() const;
u32 get_descriptor_max_draw_calls() const;

VkQueue get_present_queue() const;
VkQueue get_graphics_queue() const;
Expand Down