Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Strict Rendering Mode (SRM) fix for Tales of Vesperia. Move old surface copy before texture upload. #4350

Merged
merged 1 commit into from
Mar 29, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
214 changes: 107 additions & 107 deletions rpcs3/Emu/RSX/GL/GLGSRender.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,113 @@ void GLGSRender::end()
//Do vertex upload before RTT prep / texture lookups to give the driver time to push data
auto upload_info = set_vertex_buffer();

//Check if depth buffer is bound and valid
//If ds is not initialized clear it; it seems new depth textures should have depth cleared
auto copy_rtt_contents = [](gl::render_target *surface)
{
if (surface->get_compatible_internal_format() == surface->old_contents->get_compatible_internal_format())
{
//Copy data from old contents onto this one
//1. Clip a rectangular region defning the data
//2. Perform a GPU blit
u16 parent_w = surface->old_contents->width();
u16 parent_h = surface->old_contents->height();
u16 copy_w, copy_h;

std::tie(std::ignore, std::ignore, copy_w, copy_h) = rsx::clip_region<u16>(parent_w, parent_h, 0, 0, surface->width(), surface->height(), true);
glCopyImageSubData(surface->old_contents->id(), GL_TEXTURE_2D, 0, 0, 0, 0, surface->id(), GL_TEXTURE_2D, 0, 0, 0, 0, copy_w, copy_h, 1);
surface->set_cleared();
}
//TODO: download image contents and reupload them or do a memory cast to copy memory contents if not compatible

surface->old_contents = nullptr;
};

//Check if we have any 'recycled' surfaces in memory and if so, clear them
std::vector<int> buffers_to_clear;
bool clear_all_color = true;
bool clear_depth = false;

for (int index = 0; index < 4; index++)
{
if (std::get<0>(m_rtts.m_bound_render_targets[index]) != 0)
{
if (std::get<1>(m_rtts.m_bound_render_targets[index])->cleared())
clear_all_color = false;
else
buffers_to_clear.push_back(index);
}
}

gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil);
if (ds && !ds->cleared())
{
clear_depth = true;
}

//Temporarily disable pixel tests
glDisable(GL_SCISSOR_TEST);

if (clear_depth || buffers_to_clear.size() > 0)
{
GLenum mask = 0;

if (clear_depth)
{
gl_state.depth_mask(GL_TRUE);
gl_state.clear_depth(1.0);
gl_state.clear_stencil(255);
mask |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
}

if (clear_all_color)
mask |= GL_COLOR_BUFFER_BIT;

glClear(mask);

if (buffers_to_clear.size() > 0 && !clear_all_color)
{
GLfloat colors[] = { 0.f, 0.f, 0.f, 0.f };
//It is impossible for the render target to be typa A or B here (clear all would have been flagged)
for (auto &i : buffers_to_clear)
glClearBufferfv(draw_fbo.id(), i, colors);
}

if (clear_depth)
gl_state.depth_mask(rsx::method_registers.depth_write_enabled());

ds->set_cleared();
}

if (ds && ds->old_contents != nullptr && ds->get_rsx_pitch() == ds->old_contents->get_rsx_pitch() &&
ds->old_contents->get_compatible_internal_format() == gl::texture::internal_format::rgba8)
{
m_depth_converter.run(ds->width(), ds->height(), ds->id(), ds->old_contents->id());
ds->old_contents = nullptr;
}

if (g_cfg.video.strict_rendering_mode)
{
if (ds && ds->old_contents != nullptr)
copy_rtt_contents(ds);

for (auto &rtt : m_rtts.m_bound_render_targets)
{
if (auto surface = std::get<1>(rtt))
{
if (surface->old_contents != nullptr)
copy_rtt_contents(surface);
}
}
}
else
{
// Old contents are one use only. Keep the depth conversion check from firing over and over
if (ds) ds->old_contents = nullptr;
}

glEnable(GL_SCISSOR_TEST);

//Load textures
{
std::chrono::time_point<steady_clock> textures_start = steady_clock::now();
Expand Down Expand Up @@ -359,113 +466,6 @@ void GLGSRender::end()

update_draw_state();

//Check if depth buffer is bound and valid
//If ds is not initialized clear it; it seems new depth textures should have depth cleared
auto copy_rtt_contents = [](gl::render_target *surface)
{
if (surface->get_compatible_internal_format() == surface->old_contents->get_compatible_internal_format())
{
//Copy data from old contents onto this one
//1. Clip a rectangular region defning the data
//2. Perform a GPU blit
u16 parent_w = surface->old_contents->width();
u16 parent_h = surface->old_contents->height();
u16 copy_w, copy_h;

std::tie(std::ignore, std::ignore, copy_w, copy_h) = rsx::clip_region<u16>(parent_w, parent_h, 0, 0, surface->width(), surface->height(), true);
glCopyImageSubData(surface->old_contents->id(), GL_TEXTURE_2D, 0, 0, 0, 0, surface->id(), GL_TEXTURE_2D, 0, 0, 0, 0, copy_w, copy_h, 1);
surface->set_cleared();
}
//TODO: download image contents and reupload them or do a memory cast to copy memory contents if not compatible

surface->old_contents = nullptr;
};

//Check if we have any 'recycled' surfaces in memory and if so, clear them
std::vector<int> buffers_to_clear;
bool clear_all_color = true;
bool clear_depth = false;

for (int index = 0; index < 4; index++)
{
if (std::get<0>(m_rtts.m_bound_render_targets[index]) != 0)
{
if (std::get<1>(m_rtts.m_bound_render_targets[index])->cleared())
clear_all_color = false;
else
buffers_to_clear.push_back(index);
}
}

gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil);
if (ds && !ds->cleared())
{
clear_depth = true;
}

//Temporarily disable pixel tests
glDisable(GL_SCISSOR_TEST);

if (clear_depth || buffers_to_clear.size() > 0)
{
GLenum mask = 0;

if (clear_depth)
{
gl_state.depth_mask(GL_TRUE);
gl_state.clear_depth(1.0);
gl_state.clear_stencil(255);
mask |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
}

if (clear_all_color)
mask |= GL_COLOR_BUFFER_BIT;

glClear(mask);

if (buffers_to_clear.size() > 0 && !clear_all_color)
{
GLfloat colors[] = { 0.f, 0.f, 0.f, 0.f };
//It is impossible for the render target to be typa A or B here (clear all would have been flagged)
for (auto &i: buffers_to_clear)
glClearBufferfv(draw_fbo.id(), i, colors);
}

if (clear_depth)
gl_state.depth_mask(rsx::method_registers.depth_write_enabled());

ds->set_cleared();
}

if (ds && ds->old_contents != nullptr && ds->get_rsx_pitch() == ds->old_contents->get_rsx_pitch() &&
ds->old_contents->get_compatible_internal_format() == gl::texture::internal_format::rgba8)
{
m_depth_converter.run(ds->width(), ds->height(), ds->id(), ds->old_contents->id());
ds->old_contents = nullptr;
}

if (g_cfg.video.strict_rendering_mode)
{
if (ds && ds->old_contents != nullptr)
copy_rtt_contents(ds);

for (auto &rtt : m_rtts.m_bound_render_targets)
{
if (auto surface = std::get<1>(rtt))
{
if (surface->old_contents != nullptr)
copy_rtt_contents(surface);
}
}
}
else
{
// Old contents are one use only. Keep the depth conversion check from firing over and over
if (ds) ds->old_contents = nullptr;
}

glEnable(GL_SCISSOR_TEST);

std::chrono::time_point<steady_clock> draw_start = steady_clock::now();

if (g_cfg.video.debug_output)
Expand Down
150 changes: 76 additions & 74 deletions rpcs3/Emu/RSX/VK/VKGSRender.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1072,6 +1072,82 @@ void VKGSRender::end()
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - vertex_start).count();

std::chrono::time_point<steady_clock> textures_start = vertex_end;


auto ds = std::get<1>(m_rtts.m_bound_depth_stencil);

//Check for data casts
if (ds && ds->old_contents)
{
if (ds->old_contents->info.format == VK_FORMAT_B8G8R8A8_UNORM)
{
auto rp = vk::get_render_pass_location(VK_FORMAT_UNDEFINED, ds->info.format, 0);
auto render_pass = m_render_passes[rp];
m_depth_converter->run(*m_current_command_buffer, ds->width(), ds->height(), ds, ds->old_contents->get_view(0xAAE4, rsx::default_remap_vector), render_pass, m_framebuffers_to_clean);

ds->old_contents = nullptr;
ds->dirty = false;
}
else if (!g_cfg.video.strict_rendering_mode)
{
//Clear this to avoid dereferencing stale ptr
ds->old_contents = nullptr;
}
}

if (g_cfg.video.strict_rendering_mode)
{
auto copy_rtt_contents = [&](vk::render_target* surface)
{
if (surface->info.format == surface->old_contents->info.format)
{
const VkImageAspectFlags aspect = surface->attachment_aspect_flag;

const u16 parent_w = surface->old_contents->width();
const u16 parent_h = surface->old_contents->height();
u16 copy_w, copy_h;

std::tie(std::ignore, std::ignore, copy_w, copy_h) = rsx::clip_region<u16>(parent_w, parent_h, 0, 0, surface->width(), surface->height(), true);

VkImageSubresourceRange subresource_range = { aspect, 0, 1, 0, 1 };
VkImageLayout old_layout = surface->current_layout;

vk::change_image_layout(*m_current_command_buffer, surface, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range);
vk::change_image_layout(*m_current_command_buffer, surface->old_contents, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresource_range);

VkImageCopy copy_rgn;
copy_rgn.srcOffset = { 0, 0, 0 };
copy_rgn.dstOffset = { 0, 0, 0 };
copy_rgn.dstSubresource = { aspect, 0, 0, 1 };
copy_rgn.srcSubresource = { aspect, 0, 0, 1 };
copy_rgn.extent = { copy_w, copy_h, 1 };

vkCmdCopyImage(*m_current_command_buffer, surface->old_contents->value, surface->old_contents->current_layout, surface->value, surface->current_layout, 1, &copy_rgn);
vk::change_image_layout(*m_current_command_buffer, surface, old_layout, subresource_range);

surface->dirty = false;
}
//TODO: download image contents and reupload them or do a memory cast to copy memory contents if not compatible

surface->old_contents = nullptr;
};

//Prepare surfaces if needed
for (auto &rtt : m_rtts.m_bound_render_targets)
{
if (auto surface = std::get<1>(rtt))
{
if (surface->old_contents != nullptr)
copy_rtt_contents(surface);
}
}

if (ds && ds->old_contents)
{
copy_rtt_contents(ds);
}
}

//Load textures
{
std::lock_guard<shared_mutex> lock(m_sampler_mutex);
Expand Down Expand Up @@ -1288,80 +1364,6 @@ void VKGSRender::end()
//Only textures are synchronized tightly with the GPU and they have been read back above
vk::enter_uninterruptible();

auto ds = std::get<1>(m_rtts.m_bound_depth_stencil);

//Check for data casts
if (ds && ds->old_contents)
{
if (ds->old_contents->info.format == VK_FORMAT_B8G8R8A8_UNORM)
{
auto rp = vk::get_render_pass_location(VK_FORMAT_UNDEFINED, ds->info.format, 0);
auto render_pass = m_render_passes[rp];
m_depth_converter->run(*m_current_command_buffer, ds->width(), ds->height(), ds, ds->old_contents->get_view(0xAAE4, rsx::default_remap_vector), render_pass, m_framebuffers_to_clean);

ds->old_contents = nullptr;
ds->dirty = false;
}
else if (!g_cfg.video.strict_rendering_mode)
{
//Clear this to avoid dereferencing stale ptr
ds->old_contents = nullptr;
}
}

if (g_cfg.video.strict_rendering_mode)
{
auto copy_rtt_contents = [&](vk::render_target* surface)
{
if (surface->info.format == surface->old_contents->info.format)
{
const VkImageAspectFlags aspect = surface->attachment_aspect_flag;

const u16 parent_w = surface->old_contents->width();
const u16 parent_h = surface->old_contents->height();
u16 copy_w, copy_h;

std::tie(std::ignore, std::ignore, copy_w, copy_h) = rsx::clip_region<u16>(parent_w, parent_h, 0, 0, surface->width(), surface->height(), true);

VkImageSubresourceRange subresource_range = { aspect, 0, 1, 0, 1 };
VkImageLayout old_layout = surface->current_layout;

vk::change_image_layout(*m_current_command_buffer, surface, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range);
vk::change_image_layout(*m_current_command_buffer, surface->old_contents, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresource_range);

VkImageCopy copy_rgn;
copy_rgn.srcOffset = { 0, 0, 0 };
copy_rgn.dstOffset = { 0, 0, 0 };
copy_rgn.dstSubresource = { aspect, 0, 0, 1 };
copy_rgn.srcSubresource = { aspect, 0, 0, 1 };
copy_rgn.extent = { copy_w, copy_h, 1 };

vkCmdCopyImage(*m_current_command_buffer, surface->old_contents->value, surface->old_contents->current_layout, surface->value, surface->current_layout, 1, &copy_rgn);
vk::change_image_layout(*m_current_command_buffer, surface, old_layout, subresource_range);

surface->dirty = false;
}
//TODO: download image contents and reupload them or do a memory cast to copy memory contents if not compatible

surface->old_contents = nullptr;
};

//Prepare surfaces if needed
for (auto &rtt : m_rtts.m_bound_render_targets)
{
if (auto surface = std::get<1>(rtt))
{
if (surface->old_contents != nullptr)
copy_rtt_contents(surface);
}
}

if (ds && ds->old_contents)
{
copy_rtt_contents(ds);
}
}

u32 occlusion_id = 0;
if (m_occlusion_query_active)
{
Expand Down