Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vk: Enable use of a passthrough DMA layer if supported #9613

Merged
merged 14 commits into from Jan 24, 2021
Merged
142 changes: 109 additions & 33 deletions rpcs3/Emu/RSX/Common/TextureUtils.cpp
Expand Up @@ -296,6 +296,55 @@ struct copy_rgb655_block_swizzled

namespace
{
/**
* Generates copy instructions required to build the texture GPU side without actually copying anything.
* Returns a set of addresses and data lengths to use. This can be used to generate a GPU task to avoid CPU doing the heavy lifting.
*/
std::vector<rsx::memory_transfer_cmd>
build_transfer_cmds(const void* src, u16 block_size_in_bytes, u16 width_in_block, u16 row_count, u16 depth, u8 border, u32 dst_pitch_in_block, u32 src_pitch_in_block)
{
std::vector<rsx::memory_transfer_cmd> result;

if (src_pitch_in_block == dst_pitch_in_block && !border)
{
// Fast copy
rsx::memory_transfer_cmd cmd;
cmd.src = src;
cmd.dst = nullptr;
cmd.length = src_pitch_in_block * block_size_in_bytes * row_count * depth;
return { cmd };
}

const u32 width_in_bytes = width_in_block * block_size_in_bytes;
const u32 src_pitch_in_bytes = src_pitch_in_block * block_size_in_bytes;
const u32 dst_pitch_in_bytes = dst_pitch_in_block * block_size_in_bytes;

const u32 h_porch = border * block_size_in_bytes;
const u32 v_porch = src_pitch_in_bytes * border;

auto src_ = static_cast<const char*>(src) + h_porch;
auto dst_ = static_cast<const char*>(nullptr);

for (int layer = 0; layer < depth; ++layer)
{
// Front
src_ += v_porch;

for (int row = 0; row < row_count; ++row)
{
rsx::memory_transfer_cmd cmd{ dst_, src_, width_in_bytes };
result.push_back(cmd);
src_ += src_pitch_in_bytes;
dst_ += dst_pitch_in_bytes;
}

// Back
src_ += v_porch;
}

return result;
}

/**
* Texture upload template.
*
Expand Down Expand Up @@ -533,7 +582,7 @@ namespace rsx
return get_subresources_layout_impl(texture);
}

texture_memory_info upload_texture_subresource(gsl::span<std::byte> dst_buffer, const rsx::subresource_layout& src_layout, int format, bool is_swizzled, const texture_uploader_capabilities& caps)
texture_memory_info upload_texture_subresource(gsl::span<std::byte> dst_buffer, const rsx::subresource_layout& src_layout, int format, bool is_swizzled, texture_uploader_capabilities& caps)
{
u16 w = src_layout.width_in_block;
u16 h = src_layout.height_in_block;
Expand Down Expand Up @@ -644,6 +693,11 @@ namespace rsx
// Remove the VTC tiling to support ATI and Vulkan.
copy_unmodified_block_vtc::copy_mipmap_level(as_span_workaround<u64>(dst_buffer), as_const_span<const u64>(src_layout.data), w, h, depth, get_row_pitch_in_block<u64>(w, caps.alignment), src_layout.pitch_in_block);
}
else if (caps.supports_zero_copy)
{
result.require_upload = true;
result.deferred_cmds = build_transfer_cmds(src_layout.data.data(), 8, w, h, depth, 0, get_row_pitch_in_block<u64>(w, caps.alignment), src_layout.pitch_in_block);
}
else
{
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u64>(dst_buffer), as_const_span<const u64>(src_layout.data), 1, w, h, depth, 0, get_row_pitch_in_block<u64>(w, caps.alignment), src_layout.pitch_in_block);
Expand All @@ -661,6 +715,11 @@ namespace rsx
// Remove the VTC tiling to support ATI and Vulkan.
copy_unmodified_block_vtc::copy_mipmap_level(as_span_workaround<u128>(dst_buffer), as_const_span<const u128>(src_layout.data), w, h, depth, get_row_pitch_in_block<u128>(w, caps.alignment), src_layout.pitch_in_block);
}
else if (caps.supports_zero_copy)
{
result.require_upload = true;
result.deferred_cmds = build_transfer_cmds(src_layout.data.data(), 16, w, h, depth, 0, get_row_pitch_in_block<u128>(w, caps.alignment), src_layout.pitch_in_block);
}
else
{
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u128>(dst_buffer), as_const_span<const u128>(src_layout.data), 1, w, h, depth, 0, get_row_pitch_in_block<u128>(w, caps.alignment), src_layout.pitch_in_block);
Expand All @@ -677,56 +736,73 @@ namespace rsx
if (word_size == 1)
{
if (is_swizzled)
{
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u8>(dst_buffer), as_const_span<const u8>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
}
else if (caps.supports_zero_copy)
{
result.require_upload = true;
result.deferred_cmds = build_transfer_cmds(src_layout.data.data(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
}
else
{
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u8>(dst_buffer), as_const_span<const u8>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
}
}
else if (caps.supports_byteswap)
else
{
result.require_swap = true;
result.element_size = word_size;
result.block_length = words_per_block;

if (word_size == 2)
bool require_cpu_swizzle = !caps.supports_hw_deswizzle;
bool require_cpu_byteswap = !caps.supports_byteswap;

if (is_swizzled && caps.supports_hw_deswizzle)
{
if (is_swizzled)
if (word_size == 4 || (((word_size * words_per_block) & 3) == 0))
{
if (((word_size * words_per_block) & 3) == 0 && caps.supports_hw_deswizzle)
{
result.require_deswizzle = true;
}
result.require_deswizzle = true;
}

if (is_swizzled && !result.require_deswizzle)
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const u16>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
else
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const u16>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
{
require_cpu_swizzle = true;
}
}
else if (word_size == 4)

if (!require_cpu_byteswap && !require_cpu_swizzle)
{
result.require_deswizzle = (is_swizzled && caps.supports_hw_deswizzle);
result.require_swap = true;

if (is_swizzled && !caps.supports_hw_deswizzle)
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const u32>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
else
if (caps.supports_zero_copy)
{
result.require_upload = true;
result.deferred_cmds = build_transfer_cmds(src_layout.data.data(), word_size * words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
}
else if (word_size == 2)
{
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const u16>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
}
else if (word_size == 4)
{
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const u32>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
}
}
}
else
{
if (word_size == 2)
{
if (is_swizzled)
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
else
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
}
else if (word_size == 4)
else
{
if (is_swizzled)
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const be_t<u32>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
else
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const be_t<u32>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
if (word_size == 2)
{
if (is_swizzled)
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
else
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
}
else if (word_size == 4)
{
if (is_swizzled)
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const be_t<u32>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
else
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const be_t<u32>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
}
}
}
}
Expand Down
13 changes: 12 additions & 1 deletion rpcs3/Emu/RSX/Common/TextureUtils.h
Expand Up @@ -112,19 +112,30 @@ namespace rsx
u32 pitch_in_block;
};

struct memory_transfer_cmd
{
const void* dst;
const void* src;
u32 length;
};

struct texture_memory_info
{
int element_size;
int block_length;
bool require_swap;
bool require_deswizzle;
bool require_upload;

std::vector<memory_transfer_cmd> deferred_cmds;
};

struct texture_uploader_capabilities
{
bool supports_byteswap;
bool supports_vtc_decoding;
bool supports_hw_deswizzle;
bool supports_zero_copy;
usz alignment;
};

Expand All @@ -143,7 +154,7 @@ namespace rsx
std::vector<subresource_layout> get_subresources_layout(const rsx::fragment_texture &texture);
std::vector<subresource_layout> get_subresources_layout(const rsx::vertex_texture &texture);

texture_memory_info upload_texture_subresource(gsl::span<std::byte> dst_buffer, const subresource_layout &src_layout, int format, bool is_swizzled, const texture_uploader_capabilities& caps);
texture_memory_info upload_texture_subresource(gsl::span<std::byte> dst_buffer, const subresource_layout &src_layout, int format, bool is_swizzled, texture_uploader_capabilities& caps);

u8 get_format_block_size_in_bytes(int format);
u8 get_format_block_size_in_texel(int format);
Expand Down
11 changes: 3 additions & 8 deletions rpcs3/Emu/RSX/GL/GLTexture.cpp
Expand Up @@ -67,7 +67,7 @@ namespace gl
case CELL_GCM_TEXTURE_A1R5G5B5: return std::make_tuple(GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV);
case CELL_GCM_TEXTURE_A4R4G4B4: return std::make_tuple(GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4);
case CELL_GCM_TEXTURE_R5G6B5: return std::make_tuple(GL_RGB, GL_UNSIGNED_SHORT_5_6_5);
case CELL_GCM_TEXTURE_A8R8G8B8: return std::make_tuple(GL_BGRA, GL_UNSIGNED_INT_8_8_8_8);
case CELL_GCM_TEXTURE_A8R8G8B8: return std::make_tuple(GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV);
case CELL_GCM_TEXTURE_G8B8: return std::make_tuple(GL_RG, GL_UNSIGNED_BYTE);
case CELL_GCM_TEXTURE_R6G5B5: return std::make_tuple(GL_RGB, GL_UNSIGNED_SHORT_5_6_5);
case CELL_GCM_TEXTURE_DEPTH24_D8: return std::make_tuple(GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8);
Expand All @@ -81,7 +81,7 @@ namespace gl
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: return std::make_tuple(GL_RGBA, GL_FLOAT);
case CELL_GCM_TEXTURE_X32_FLOAT: return std::make_tuple(GL_RED, GL_FLOAT);
case CELL_GCM_TEXTURE_D1R5G5B5: return std::make_tuple(GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV);
case CELL_GCM_TEXTURE_D8R8G8B8: return std::make_tuple(GL_BGRA, GL_UNSIGNED_INT_8_8_8_8);
case CELL_GCM_TEXTURE_D8R8G8B8: return std::make_tuple(GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV);
case CELL_GCM_TEXTURE_Y16_X16_FLOAT: return std::make_tuple(GL_RG, GL_HALF_FLOAT);
case CELL_GCM_TEXTURE_COMPRESSED_DXT1: return std::make_tuple(GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_UNSIGNED_BYTE);
case CELL_GCM_TEXTURE_COMPRESSED_DXT23: return std::make_tuple(GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_UNSIGNED_BYTE);
Expand Down Expand Up @@ -645,7 +645,7 @@ namespace gl
const std::vector<rsx::subresource_layout> &input_layouts,
bool is_swizzled, GLenum gl_format, GLenum gl_type, std::vector<std::byte>& staging_buffer)
{
rsx::texture_uploader_capabilities caps{ true, false, false, 4 };
rsx::texture_uploader_capabilities caps{ true, false, false, false, 4 };

pixel_unpack_settings unpack_settings;
unpack_settings.row_length(0).alignment(4);
Expand Down Expand Up @@ -714,11 +714,6 @@ namespace gl

switch (gl_type)
{
case GL_UNSIGNED_INT_8_8_8_8:
// NOTE: GL_UNSIGNED_INT_8_8_8_8 is already a swapped type
// TODO: Remove reliance on format and type checks when compute acceleration is implemented
apply_settings = false;
break;
case GL_BYTE:
case GL_UNSIGNED_BYTE:
// Multi-channel format uploaded one byte at a time. This is due to poor driver support for formats like GL_UNSIGNED SHORT_8_8
Expand Down