Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rsx: Maintenance fixes [2] #12716

Merged
merged 3 commits into from Sep 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 5 additions & 3 deletions rpcs3/Emu/RSX/Common/simple_array.hpp
Expand Up @@ -109,10 +109,12 @@ namespace rsx
_capacity = size;
}

void resize(u32 size)
template <typename T> requires UnsignedInt<T>
void resize(T size)
{
reserve(size);
_size = size;
const auto new_size = static_cast<u32>(size);
reserve(new_size);
_size = new_size;
}

void push_back(const Ty& val)
Expand Down
32 changes: 16 additions & 16 deletions rpcs3/Emu/RSX/RSXThread.cpp
Expand Up @@ -1605,6 +1605,12 @@ namespace rsx

void thread::on_framebuffer_options_changed(u32 opt)
{
if (m_rtts_dirty)
{
// Nothing to do
return;
}

auto evaluate_depth_buffer_state = [&]()
{
m_framebuffer_layout.zeta_write_enabled =
Expand Down Expand Up @@ -1678,12 +1684,6 @@ namespace rsx
return false;
};

if (m_rtts_dirty)
{
// Nothing to do
return;
}

switch (opt)
{
case NV4097_SET_DEPTH_TEST_ENABLE:
Expand Down Expand Up @@ -3523,7 +3523,7 @@ namespace rsx
const usz avg_frame_time = diffs / 59;

u32 lowered_delay = 0;
u32 highered_delay = 0;
u32 raised_delay = 0;
bool can_reevaluate = true;
u64 prev_preempt_count = umax;

Expand All @@ -3543,7 +3543,7 @@ namespace rsx
}
else if (prev_preempt_count < frame_times[i].preempt_count)
{
highered_delay++;
raised_delay++;
}

if (i > frame_times.size() - 30)
Expand All @@ -3556,7 +3556,7 @@ namespace rsx
prev_preempt_count = frame_times[i].preempt_count;
}

preempt_count = std::min<u32>(frame_times.back().preempt_count, max_preempt_count);
preempt_count = std::min<u64>(frame_times.back().preempt_count, max_preempt_count);

u32 fails = 0;
u32 hard_fails = 0;
Expand All @@ -3576,7 +3576,7 @@ namespace rsx
{
if (diff_of_diff >= avg_frame_time / 3)
{
highered_delay++;
raised_delay++;
hard_fails++;

if (i == frame_times.size())
Expand Down Expand Up @@ -3632,7 +3632,7 @@ namespace rsx
}
else
{
preempt_count = std::min<u32>(preempt_count + 4, max_preempt_count);
preempt_count = std::min<u64>(preempt_count + 4, max_preempt_count);
}
}
else
Expand All @@ -3641,26 +3641,26 @@ namespace rsx
}
}
// Sudden FPS drop detection
else if ((fails > 13 || hard_fails > 2 || !(abs_dst(fps_10, 300) < 20 || abs_dst(fps_10, 600) < 30 || abs_dst(fps_10, g_cfg.video.vblank_rate * 10) < 30 || abs_dst(fps_10, g_cfg.video.vblank_rate * 10 / 2) < 20)) && lowered_delay < highered_delay && is_last_frame_a_fail)
else if ((fails > 13 || hard_fails > 2 || !(abs_dst(fps_10, 300) < 20 || abs_dst(fps_10, 600) < 30 || abs_dst(fps_10, g_cfg.video.vblank_rate * 10) < 30 || abs_dst(fps_10, g_cfg.video.vblank_rate * 10 / 2) < 20)) && lowered_delay < raised_delay && is_last_frame_a_fail)
{
lower_preemption_count();
}

perf_log.trace("CPU preemption control: reeval=%d, preempt_count=%d, fails=%d, hard=%d, avg_frame_time=%d, highered=%d, lowered=%d, taken=%u", can_reevaluate, preempt_count, fails, hard_fails, avg_frame_time, highered_delay, lowered_delay, ::g_lv2_preempts_taken.load());
perf_log.trace("CPU preemption control: reeval=%d, preempt_count=%llu, fails=%u, hard=%u, avg_frame_time=%llu, highered=%u, lowered=%u, taken=%u", can_reevaluate, preempt_count, fails, hard_fails, avg_frame_time, raised_delay, lowered_delay, ::g_lv2_preempts_taken.load());

if (hard_measures_taken)
{
preempt_fail_old_preempt_count = std::max<u32>(preempt_fail_old_preempt_count, std::min<u32>(frame_times.back().preempt_count, max_preempt_count));
preempt_fail_old_preempt_count = std::max<u64>(preempt_fail_old_preempt_count, std::min<u64>(frame_times.back().preempt_count, max_preempt_count));
}
else if (preempt_fail_old_preempt_count)
{
perf_log.error("Lowering current preemption count significantly due to a performance drop, if this issue persists frequently consider lowering max preemptions count to 'new-count' or lower. (old-count=%d, new-count=%d)", preempt_fail_old_preempt_count, preempt_count);
perf_log.error("Lowering current preemption count significantly due to a performance drop, if this issue persists frequently consider lowering max preemptions count to 'new-count' or lower. (old-count=%llu, new-count=%llu)", preempt_fail_old_preempt_count, preempt_count);
preempt_fail_old_preempt_count = 0;
}

const u64 tsc_diff = (current_tsc - frame_times.back().tsc);
const u64 time_diff = (current_time - frame_times.back().timestamp);
const u64 preempt_diff = tsc_diff * (1'000'000 / 30) / (time_diff * std::max<u32>(preempt_count, 1));
const u64 preempt_diff = tsc_diff * (1'000'000 / 30) / (time_diff * std::max<u64>(preempt_count, 1ull));

if (!preempt_count)
{
Expand Down
29 changes: 14 additions & 15 deletions rpcs3/Emu/RSX/RSXThread.h
Expand Up @@ -467,6 +467,19 @@ namespace rsx

struct sampled_image_descriptor_base;

struct desync_fifo_cmd_info
{
u32 cmd;
u64 timestamp;
};

struct frame_time_t
{
u64 preempt_count;
u64 timestamp;
u64 tsc;
};

class thread : public cpu_thread
{
u64 timestamp_ctrl = 0;
Expand Down Expand Up @@ -681,24 +694,10 @@ namespace rsx
atomic_t<bool> sync_point_request = false;
bool in_begin_end = false;

struct desync_fifo_cmd_info
{
u32 cmd;
u64 timestamp;
};

std::queue<desync_fifo_cmd_info> recovered_fifo_cmds_history;

struct frame_time_t
{
u64 preempt_count;
kd-11 marked this conversation as resolved.
Show resolved Hide resolved
u64 timestamp;
u64 tsc;
};

std::deque<frame_time_t> frame_times;
u32 prevent_preempt_increase_tickets = 0;
u32 preempt_fail_old_preempt_count = 0;
u64 preempt_fail_old_preempt_count = 0;

atomic_t<s32> async_tasks_pending{ 0 };

Expand Down
4 changes: 2 additions & 2 deletions rpcs3/Emu/RSX/VK/VKRenderPass.cpp
Expand Up @@ -153,9 +153,9 @@ namespace vk

for (u32 i = 0, layout_offset = 0; i < 5; ++i, layout_offset += 3)
{
if (const auto layout = VkImageLayout((layout_blob >> layout_offset) & 0x7))
if (const auto layout_encoding = (layout_blob >> layout_offset) & 0x7)
{
result.push_back(layout);
result.push_back(decode_layout(layout_encoding));
}
else
{
Expand Down
9 changes: 7 additions & 2 deletions rpcs3/Emu/RSX/VK/VKRenderTargets.h
Expand Up @@ -103,6 +103,12 @@ namespace vk
return {};
}

// If we have driver support for FBO loops, set the usage flag for it.
if (vk::get_current_renderer()->get_framebuffer_loops_support())
{
return { VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT, 0 };
}

// Workarounds to force transition to GENERAL to decompress.
// Fixes corruption in FBO loops for ANV and RADV.
switch (vk::get_driver_vendor())
Expand All @@ -117,8 +123,7 @@ namespace vk
break;
case driver_vendor::AMD:
case driver_vendor::RADV:
if ((vk::get_chip_family() >= chip_class::AMD_navi1x) &&
!vk::get_current_renderer()->get_framebuffer_loops_support())
if (vk::get_chip_family() >= chip_class::AMD_navi1x)
{
// Only needed for GFX10+
return { 0, VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT };
Expand Down
1 change: 1 addition & 0 deletions rpcs3/Emu/RSX/VK/VulkanAPI.h
Expand Up @@ -27,6 +27,7 @@
#define VK_EXT_attachment_feedback_loop_layout 1
#define VK_EXT_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_EXTENSION_NAME "VK_EXT_attachment_feedback_loop_layout"
#define VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT static_cast<VkImageLayout>(1000339000)
#define VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT 0x00080000
#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_FEATURES_EXT static_cast<VkStructureType>(1000339000)

typedef struct VkPhysicalDeviceAttachmentFeedbackLoopLayoutFeaturesEXT {
Expand Down
14 changes: 14 additions & 0 deletions rpcs3/Emu/RSX/VK/vkutils/device.cpp
Expand Up @@ -451,6 +451,11 @@ namespace vk
requested_extensions.push_back(VK_EXT_DESCRIPTOR_INDEXING_EXTENSION_NAME);
}

if (pgpu->framebuffer_loops_support)
{
requested_extensions.push_back(VK_EXT_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_EXTENSION_NAME);
}

enabled_features.robustBufferAccess = VK_TRUE;
enabled_features.fullDrawIndexUint32 = VK_TRUE;
enabled_features.independentBlend = VK_TRUE;
Expand Down Expand Up @@ -616,6 +621,15 @@ namespace vk
device.pNext = &indexing_features;
}

VkPhysicalDeviceAttachmentFeedbackLoopLayoutFeaturesEXT fbo_loop_features{};
if (pgpu->framebuffer_loops_support)
{
fbo_loop_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_FEATURES_EXT;
fbo_loop_features.attachmentFeedbackLoopLayout = VK_TRUE;
fbo_loop_features.pNext = const_cast<void*>(device.pNext);
device.pNext = &fbo_loop_features;
}

CHECK_RESULT_EX(vkCreateDevice(*pgpu, &device, nullptr, &dev), message_on_error);

// Initialize queues
Expand Down
7 changes: 4 additions & 3 deletions rpcs3/Emu/RSX/rsx_methods.cpp
Expand Up @@ -507,7 +507,7 @@ namespace rsx

if (fifo_span.size() < rcount)
{
rcount = fifo_span.size();
rcount = ::size32(fifo_span);
}

if (rsx->m_graphics_state & rsx::pipeline_state::transform_constants_dirty)
Expand Down Expand Up @@ -560,7 +560,7 @@ namespace rsx

if (fifo_span.size() < rcount)
{
rcount = fifo_span.size();
rcount = ::size32(fifo_span);
}

copy_data_swap_u32(&rsx::method_registers.transform_program[load_pos * 4 + index % 4], fifo_span.data(), rcount);
Expand Down Expand Up @@ -1030,7 +1030,7 @@ namespace rsx

if (fifo_span.size() < count)
{
count = fifo_span.size();
count = ::size32(fifo_span);
}

// Skip "handled methods"
Expand Down Expand Up @@ -3382,6 +3382,7 @@ namespace rsx
bind(NV4097_SET_SURFACE_COLOR_BOFFSET, nv4097::set_surface_dirty_bit);
bind(NV4097_SET_SURFACE_COLOR_COFFSET, nv4097::set_surface_dirty_bit);
bind(NV4097_SET_SURFACE_COLOR_DOFFSET, nv4097::set_surface_dirty_bit);
bind(NV4097_SET_SURFACE_COLOR_TARGET, nv4097::set_surface_dirty_bit);
bind(NV4097_SET_SURFACE_ZETA_OFFSET, nv4097::set_surface_dirty_bit);
bind(NV4097_SET_CONTEXT_DMA_COLOR_A, nv4097::set_surface_dirty_bit);
bind(NV4097_SET_CONTEXT_DMA_COLOR_B, nv4097::set_surface_dirty_bit);
Expand Down