diff --git a/backends/vulkan/runtime/api/containers/StagingBuffer.h b/backends/vulkan/runtime/api/containers/StagingBuffer.h index a24728470b0..66c607e178c 100644 --- a/backends/vulkan/runtime/api/containers/StagingBuffer.h +++ b/backends/vulkan/runtime/api/containers/StagingBuffer.h @@ -14,6 +14,8 @@ #include +#include + namespace vkcompute { namespace api { @@ -55,6 +57,10 @@ class StagingBuffer final { return vulkan_buffer_; } + inline void* data() { + return vulkan_buffer_.allocation_info().pMappedData; + } + inline size_t numel() { return numel_; } @@ -62,6 +68,30 @@ class StagingBuffer final { inline size_t nbytes() { return nbytes_; } + + inline void copy_from(const void* src, const size_t nbytes) { + VK_CHECK_COND(nbytes <= nbytes_); + memcpy(data(), src, nbytes); + vmaFlushAllocation( + vulkan_buffer_.vma_allocator(), + vulkan_buffer_.allocation(), + 0u, + VK_WHOLE_SIZE); + } + + inline void copy_to(void* dst, const size_t nbytes) { + VK_CHECK_COND(nbytes <= nbytes_); + vmaInvalidateAllocation( + vulkan_buffer_.vma_allocator(), + vulkan_buffer_.allocation(), + 0u, + VK_WHOLE_SIZE); + memcpy(dst, data(), nbytes); + } + + inline void set_staging_zeros() { + memset(data(), 0, nbytes_); + } }; } // namespace api diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp index a8f57f57d2a..c22241940f8 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.cpp +++ b/backends/vulkan/runtime/graph/ComputeGraph.cpp @@ -401,7 +401,7 @@ void ComputeGraph::copy_into_staging( const size_t numel) { StagingPtr staging = get_staging(idx); size_t nbytes = numel * vkapi::element_size(staging->dtype()); - copy_ptr_to_staging(data, *staging, nbytes); + staging->copy_from(data, nbytes); } void ComputeGraph::copy_from_staging( @@ -410,7 +410,7 @@ void ComputeGraph::copy_from_staging( const size_t numel) { StagingPtr staging = get_staging(idx); size_t nbytes = numel * vkapi::element_size(staging->dtype()); - copy_staging_to_ptr(*staging, data, nbytes); + staging->copy_to(data, nbytes); } void ComputeGraph::prepare() { diff --git a/backends/vulkan/runtime/graph/ops/PrepackNode.cpp b/backends/vulkan/runtime/graph/ops/PrepackNode.cpp index a9c2f6c9b6a..61b24cd409b 100644 --- a/backends/vulkan/runtime/graph/ops/PrepackNode.cpp +++ b/backends/vulkan/runtime/graph/ops/PrepackNode.cpp @@ -53,8 +53,7 @@ api::StagingBuffer PrepackNode::create_staging_buffer(ComputeGraph* graph) { if (graph->val_is_none(tref_)) { size_t numel = utils::multiply_integers(packed->sizes()); api::StagingBuffer staging(graph->context(), packed->dtype(), numel); - size_t nbytes = numel * vkapi::element_size(packed->dtype()); - set_staging_zeros(staging, nbytes); + staging.set_staging_zeros(); return staging; } @@ -62,7 +61,7 @@ api::StagingBuffer PrepackNode::create_staging_buffer(ComputeGraph* graph) { size_t numel = utils::multiply_integers(tref->sizes); api::StagingBuffer staging(graph->context(), tref->dtype, numel); size_t nbytes = numel * vkapi::element_size(tref->dtype); - copy_ptr_to_staging(tref->data, staging, nbytes); + staging.copy_from(tref->data, nbytes); return staging; } diff --git a/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp b/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp index 9cb715e202a..8804bcf2ef6 100644 --- a/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp +++ b/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp @@ -13,88 +13,8 @@ #include -#include - namespace vkcompute { -template -void memcpy_to_mapping_impl( - const void* src, - vkapi::MemoryMap& dst_mapping, - const size_t nbytes) { - T* data_ptr = dst_mapping.template data(); - memcpy(data_ptr, reinterpret_cast(src), nbytes); -} - -template -void memcpy_from_mapping_impl( - vkapi::MemoryMap& src_mapping, - void* dst, - const size_t nbytes) { - T* data_ptr = src_mapping.template data(); - memcpy(reinterpret_cast(dst), data_ptr, nbytes); -} - -void memcpy_to_mapping( - const void* src, - vkapi::MemoryMap& dst_mapping, - const size_t nbytes, - const vkapi::ScalarType dtype) { -#define DTYPE_CASE(ctype, vkformat, name) \ - case vkapi::ScalarType::name: \ - memcpy_to_mapping_impl(src, dst_mapping, nbytes); \ - break; - - switch (dtype) { - VK_FORALL_SCALAR_TYPES(DTYPE_CASE) - default: - VK_THROW("Unrecognized dtype!"); - } -#undef DTYPE_CASE -} - -void memcpy_from_mapping( - vkapi::MemoryMap& src_mapping, - void* dst, - const size_t nbytes, - const vkapi::ScalarType dtype) { -#define DTYPE_CASE(ctype, vkformat, name) \ - case vkapi::ScalarType::name: \ - memcpy_from_mapping_impl(src_mapping, dst, nbytes); \ - break; - - switch (dtype) { - VK_FORALL_SCALAR_TYPES(DTYPE_CASE) - default: - VK_THROW("Unrecognized dtype!"); - } -#undef DTYPE_CASE -} - -void copy_ptr_to_staging( - const void* src, - api::StagingBuffer& staging, - const size_t nbytes) { - vkapi::MemoryMap mapping(staging.buffer(), vkapi::MemoryAccessType::WRITE); - mapping.invalidate(); - memcpy_to_mapping(src, mapping, nbytes, staging.dtype()); -} - -void copy_staging_to_ptr( - api::StagingBuffer& staging, - void* dst, - const size_t nbytes) { - vkapi::MemoryMap mapping(staging.buffer(), vkapi::MemoryAccessType::READ); - mapping.invalidate(); - memcpy_from_mapping(mapping, dst, nbytes, staging.dtype()); -} - -void set_staging_zeros(api::StagingBuffer& staging, const size_t nbytes) { - vkapi::MemoryMap mapping(staging.buffer(), vkapi::MemoryAccessType::WRITE); - uint8_t* data_ptr = mapping.template data(); - memset(data_ptr, 0, staging.nbytes()); -} - vkapi::ShaderInfo get_nchw_to_tensor_shader( const api::vTensor& v_dst, const bool int8_buffer_enabled) { diff --git a/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h b/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h index f16c52ecf33..8d63958a738 100644 --- a/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h +++ b/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h @@ -12,25 +12,6 @@ namespace vkcompute { -// -// Functions to copy data into and out of a staging buffer -// - -void copy_ptr_to_staging( - const void* src, - api::StagingBuffer& staging, - const size_t nbytes); -void copy_staging_to_ptr( - api::StagingBuffer& staging, - void* dst, - const size_t nbytes); - -void set_staging_zeros(api::StagingBuffer& staging, const size_t nbytes); - -// -// Functions to get shaders -// - vkapi::ShaderInfo get_nchw_to_tensor_shader( const api::vTensor& v_dst, bool int8_buffer_enabled = true); diff --git a/backends/vulkan/runtime/vk_api/memory/Allocation.cpp b/backends/vulkan/runtime/vk_api/memory/Allocation.cpp index b07bb2862d3..d4e0fc9702e 100644 --- a/backends/vulkan/runtime/vk_api/memory/Allocation.cpp +++ b/backends/vulkan/runtime/vk_api/memory/Allocation.cpp @@ -30,6 +30,7 @@ Allocation::Allocation() create_info{}, allocator(VK_NULL_HANDLE), allocation(VK_NULL_HANDLE), + allocation_info({}), is_copy_(false) {} Allocation::Allocation( @@ -40,6 +41,7 @@ Allocation::Allocation( create_info(create_info), allocator(vma_allocator), allocation(VK_NULL_HANDLE), + allocation_info({}), is_copy_(false) { VK_CHECK(vmaAllocateMemory( allocator, &memory_requirements, &create_info, &allocation, nullptr)); @@ -50,6 +52,7 @@ Allocation::Allocation(const Allocation& other) noexcept create_info(other.create_info), allocator(other.allocator), allocation(other.allocation), + allocation_info(other.allocation_info), is_copy_(true) {} Allocation::Allocation(Allocation&& other) noexcept @@ -57,8 +60,10 @@ Allocation::Allocation(Allocation&& other) noexcept create_info(other.create_info), allocator(other.allocator), allocation(other.allocation), + allocation_info(other.allocation_info), is_copy_(other.is_copy_) { other.allocation = VK_NULL_HANDLE; + other.allocation_info = {}; } Allocation& Allocation::operator=(Allocation&& other) noexcept { @@ -68,9 +73,11 @@ Allocation& Allocation::operator=(Allocation&& other) noexcept { create_info = other.create_info; allocator = other.allocator; allocation = other.allocation; + allocation_info = other.allocation_info; is_copy_ = other.is_copy_; other.allocation = tmp_allocation; + other.allocation_info = {}; return *this; } diff --git a/backends/vulkan/runtime/vk_api/memory/Allocation.h b/backends/vulkan/runtime/vk_api/memory/Allocation.h index cec6f61e766..44e8277a35c 100644 --- a/backends/vulkan/runtime/vk_api/memory/Allocation.h +++ b/backends/vulkan/runtime/vk_api/memory/Allocation.h @@ -62,6 +62,8 @@ struct Allocation final { VmaAllocator allocator; // Handles to the allocated memory VmaAllocation allocation; + // Information about the allocated memory + VmaAllocationInfo allocation_info; private: // Indicates whether this class instance is a copy of another class instance, diff --git a/backends/vulkan/runtime/vk_api/memory/Allocator.cpp b/backends/vulkan/runtime/vk_api/memory/Allocator.cpp index b990cf6a119..e814063fa90 100644 --- a/backends/vulkan/runtime/vk_api/memory/Allocator.cpp +++ b/backends/vulkan/runtime/vk_api/memory/Allocator.cpp @@ -142,7 +142,8 @@ VulkanBuffer Allocator::create_staging_buffer(const VkDeviceSize size) { // Staging buffers are accessed by both the CPU and GPU, so set the // appropriate flags to indicate that the host device will be accessing // the data from this buffer. - alloc_create_info.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; + alloc_create_info.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT; alloc_create_info.usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST; alloc_create_info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; alloc_create_info.preferredFlags = diff --git a/backends/vulkan/runtime/vk_api/memory/Buffer.cpp b/backends/vulkan/runtime/vk_api/memory/Buffer.cpp index 366b45a5e41..5a78dab764d 100644 --- a/backends/vulkan/runtime/vk_api/memory/Buffer.cpp +++ b/backends/vulkan/runtime/vk_api/memory/Buffer.cpp @@ -67,7 +67,7 @@ VulkanBuffer::VulkanBuffer( &allocation_create_info, &handle_, &(memory_.allocation), - nullptr)); + &(memory_.allocation_info))); } else { VmaAllocatorInfo allocator_info{}; vmaGetAllocatorInfo(allocator_, &allocator_info); diff --git a/backends/vulkan/runtime/vk_api/memory/Buffer.h b/backends/vulkan/runtime/vk_api/memory/Buffer.h index 9302048f861..af32ffffa84 100644 --- a/backends/vulkan/runtime/vk_api/memory/Buffer.h +++ b/backends/vulkan/runtime/vk_api/memory/Buffer.h @@ -114,6 +114,10 @@ class VulkanBuffer final { return memory_.allocation; } + inline VmaAllocationInfo allocation_info() const { + return memory_.allocation_info; + } + inline VmaAllocationCreateInfo allocation_create_info() const { return VmaAllocationCreateInfo(memory_.create_info); } diff --git a/backends/vulkan/test/utils/test_utils.cpp b/backends/vulkan/test/utils/test_utils.cpp index 4c2972419d0..a469a44dc1a 100644 --- a/backends/vulkan/test/utils/test_utils.cpp +++ b/backends/vulkan/test/utils/test_utils.cpp @@ -326,15 +326,15 @@ void record_reference_matmul( void fill_vtensor(api::vTensor& vten, std::vector& data) { api::StagingBuffer staging_buffer(api::context(), vten.dtype(), data.size()); -#define CASE(ctype, name) \ - case vkapi::ScalarType::name: { \ - std::vector data_converted; \ - data_converted.resize(data.size()); \ - for (int i = 0; i < data.size(); ++i) { \ - data_converted[i] = ctype(data[i]); \ - } \ - copy_ptr_to_staging( \ - data_converted.data(), staging_buffer, vten.staging_buffer_nbytes()); \ +#define CASE(ctype, name) \ + case vkapi::ScalarType::name: { \ + std::vector data_converted; \ + data_converted.resize(data.size()); \ + for (int i = 0; i < data.size(); ++i) { \ + data_converted[i] = ctype(data[i]); \ + } \ + staging_buffer.copy_from( \ + data_converted.data(), vten.staging_buffer_nbytes()); \ } break; switch (vten.dtype()) { @@ -424,14 +424,14 @@ void extract_vtensor(api::vTensor& vten, std::vector& data) { api::context()->submit_cmd_to_gpu(fence.get_submit_handle()); fence.wait(); -#define CASE(ctype, name) \ - case vkapi::ScalarType::name: { \ - std::vector data_converted(data.size()); \ - copy_staging_to_ptr( \ - staging_buffer, data_converted.data(), vten.staging_buffer_nbytes()); \ - for (int i = 0; i < data.size(); ++i) { \ - data[i] = float(data_converted[i]); \ - } \ +#define CASE(ctype, name) \ + case vkapi::ScalarType::name: { \ + std::vector data_converted(data.size()); \ + staging_buffer.copy_to( \ + data_converted.data(), vten.staging_buffer_nbytes()); \ + for (int i = 0; i < data.size(); ++i) { \ + data[i] = float(data_converted[i]); \ + } \ } break; switch (vten.dtype()) { diff --git a/backends/vulkan/test/utils/test_utils.h b/backends/vulkan/test/utils/test_utils.h index 3bc12c472db..25163e664bf 100644 --- a/backends/vulkan/test/utils/test_utils.h +++ b/backends/vulkan/test/utils/test_utils.h @@ -132,7 +132,7 @@ fill_staging(api::StagingBuffer& staging, float val, int numel = -1) { } std::vector data(numel); std::fill(data.begin(), data.end(), val); - copy_ptr_to_staging(data.data(), staging, sizeof(float) * numel); + staging.copy_from(data.data(), sizeof(float) * numel); } void fill_vtensor(api::vTensor& vten, std::vector& data); @@ -169,7 +169,7 @@ check_staging_buffer(api::StagingBuffer& staging, float val, int numel = -1) { numel = staging.numel(); } std::vector data(numel); - copy_staging_to_ptr(staging, data.data(), sizeof(float) * numel); + staging.copy_to(data.data(), sizeof(float) * numel); for (size_t i = 0; i < data.size(); ++i) { CHECK_VALUE(data, i, val); diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp index a0bfefafa02..c035d5f8b85 100644 --- a/backends/vulkan/test/vulkan_compute_api_test.cpp +++ b/backends/vulkan/test/vulkan_compute_api_test.cpp @@ -360,7 +360,7 @@ TEST_F(VulkanComputeAPITest, spec_var_shader_test) { submit_to_gpu(); std::vector data(len); - copy_staging_to_ptr(buffer, data.data(), buffer.nbytes()); + buffer.copy_to(data.data(), buffer.nbytes()); for (size_t i = 0; i < len; ++i) { CHECK_VALUE(data, i, scale * i + offset); @@ -470,7 +470,7 @@ void test_storage_buffer_type(const size_t len) { submit_to_gpu(); std::vector data(len); - copy_staging_to_ptr(buffer, data.data(), buffer.nbytes()); + buffer.copy_to(data.data(), buffer.nbytes()); for (size_t i = 0; i < len; ++i) { CHECK_VALUE(data, i, T(i)); @@ -2132,7 +2132,7 @@ void run_from_gpu_test( submit_to_gpu(); std::vector data_out(staging_buffer.numel()); - copy_staging_to_ptr(staging_buffer, data_out.data(), staging_buffer.nbytes()); + staging_buffer.copy_to(data_out.data(), staging_buffer.nbytes()); for (int i = 0; i < vten.numel(); i++) { CHECK_VALUE(data_out, i, i + offset); @@ -2160,8 +2160,7 @@ void round_trip_test( for (int i = 0; i < staging_buffer_in.numel(); i++) { data_in[i] = T(i * -1); } - copy_ptr_to_staging( - data_in.data(), staging_buffer_in, vten.staging_buffer_nbytes()); + staging_buffer_in.copy_from(data_in.data(), vten.staging_buffer_nbytes()); // Output staging buffer StagingBuffer staging_buffer_out( @@ -2182,8 +2181,7 @@ void round_trip_test( // Extract data from output staging buffer std::vector data_out(staging_buffer_out.numel()); - copy_staging_to_ptr( - staging_buffer_out, data_out.data(), staging_buffer_out.nbytes()); + staging_buffer_out.copy_to(data_out.data(), staging_buffer_out.nbytes()); // All indices should be equal to the input data for (int i = 0; i < vten.numel(); i++) { @@ -2624,8 +2622,7 @@ void test_conv2d( for (int i = 0; i < in_numel; i++) { data_in[i] = i + 1; } - copy_ptr_to_staging( - data_in.data(), staging_buffer_in, sizeof(float) * in_numel); + staging_buffer_in.copy_from(data_in.data(), sizeof(float) * in_numel); // Output staging buffer const int64_t out_numel = @@ -2642,8 +2639,7 @@ void test_conv2d( // Extract data from output staging buffer std::vector data_out(out_numel); - copy_staging_to_ptr( - staging_buffer_out, data_out.data(), sizeof(float) * out_numel); + staging_buffer_out.copy_to(data_out.data(), sizeof(float) * out_numel); // Check data matches results copied from ATen-VK for (int i = 0; i < vten.numel(); i++) {