diff --git a/backends/vulkan/runtime/api/containers/Tensor.cpp b/backends/vulkan/runtime/api/containers/Tensor.cpp index b3dff832339..597cb8859b0 100644 --- a/backends/vulkan/runtime/api/containers/Tensor.cpp +++ b/backends/vulkan/runtime/api/containers/Tensor.cpp @@ -567,6 +567,48 @@ void vTensor::virtual_resize(const std::vector& new_sizes) { update_metadata(); } +/* + * Transposing the dim order is a bit unintuitive. dim0 and dim1 have swapped + * their "identities", so we need to swap the values of dim0 and dim1 wherever + * they appear in the dim order vector. Compare this to just swapping the + * elements at dim0 and dim1 in the `sizes` vectors. + */ +void transpose_dim_order_inplace( + std::vector& dim_order, + const int64_t dim0, + const int64_t dim1) { + for (int i = 0; i < dim_order.size(); ++i) { + if (dim_order[i] == dim0) { + dim_order[i] = dim1; + } else if (dim_order[i] == dim1) { + dim_order[i] = dim0; + } + } +} + +void vTensor::virtual_transpose(const int64_t dim0, const int64_t dim1) { + std::iter_swap(sizes_.begin() + dim0, sizes_.begin() + dim1); + if (storage_type() == utils::kBuffer) { + transpose_dim_order_inplace(dim_order_, dim0, dim1); + } else { + const int dim0_whcn = sizes_.size() - 1 - dim0; + const int dim1_whcn = sizes_.size() - 1 - dim1; + // Cannot transpose batch dimension for texture storage + VK_CHECK_COND(dim0_whcn < 3 && dim1_whcn < 3); + + std::iter_swap( + axis_map_.begin() + dim0_whcn, axis_map_.begin() + dim1_whcn); + + if (packed_dim_whcn_idx() == dim0_whcn) { + memory_layout_ = utils::GPUMemoryLayout(dim1_whcn); + } + if (packed_dim_whcn_idx() == dim1_whcn) { + memory_layout_ = utils::GPUMemoryLayout(dim0_whcn); + } + } + update_metadata(); +} + void vTensor::reallocate(const std::vector& new_sizes) { sizes_ = new_sizes; update_metadata(); diff --git a/backends/vulkan/runtime/api/containers/Tensor.h b/backends/vulkan/runtime/api/containers/Tensor.h index da20a47d465..f2a1e362fd4 100644 --- a/backends/vulkan/runtime/api/containers/Tensor.h +++ b/backends/vulkan/runtime/api/containers/Tensor.h @@ -530,6 +530,11 @@ class vTensor final { */ void virtual_resize(const std::vector& new_sizes); + /* + * Transpose the tensor in-place by updating its metadata. + */ + void virtual_transpose(const int64_t dim0, const int64_t dim1); + /* * Discard the underlying VkImage or VkBuffer and re-allocate based on new * tensor sizes diff --git a/backends/vulkan/test/utils/test_utils.cpp b/backends/vulkan/test/utils/test_utils.cpp index d3bb884e9f1..e70d80d32ce 100644 --- a/backends/vulkan/test/utils/test_utils.cpp +++ b/backends/vulkan/test/utils/test_utils.cpp @@ -314,6 +314,44 @@ void record_reference_matmul( mat2.strides_ubo()); } +void record_matmul_texture3d( + api::Context* context, + api::vTensor& out, + api::vTensor& mat1, + api::vTensor& mat2) { + std::string kernel_name = "matmul_naive"; + kernel_name.reserve(kShaderNameReserve); + add_storage_type_suffix(kernel_name, out.storage_type()); + add_dtype_suffix(kernel_name, out.dtype()); + + utils::uvec3 global_wg_size = out.logical_extents(); + + vkapi::PipelineBarrier pipeline_barrier{}; + api::context()->submit_compute_job( + VK_KERNEL_FROM_STR(kernel_name), + pipeline_barrier, + global_wg_size, + {8, 8, 1}, + {out.packed_dim_whcn_idx(), + mat1.packed_dim_whcn_idx(), + mat2.packed_dim_whcn_idx()}, + VK_NULL_HANDLE, + 0, + out.image( + pipeline_barrier, + vkapi::PipelineStage::COMPUTE, + vkapi::MemoryAccessType::WRITE), + mat1.image(pipeline_barrier, vkapi::PipelineStage::COMPUTE), + mat2.image(pipeline_barrier, vkapi::PipelineStage::COMPUTE), + out.sizes_ubo(), + out.logical_limits_ubo(), + out.axis_map_ubo(), + mat1.sizes_ubo(), + mat1.axis_map_ubo(), + mat2.sizes_ubo(), + mat2.axis_map_ubo()); +} + // // Input & Output Utilities // diff --git a/backends/vulkan/test/utils/test_utils.h b/backends/vulkan/test/utils/test_utils.h index 25163e664bf..c4e65c7fb64 100644 --- a/backends/vulkan/test/utils/test_utils.h +++ b/backends/vulkan/test/utils/test_utils.h @@ -121,6 +121,12 @@ void record_reference_matmul( api::vTensor& mat1, api::vTensor& mat2); +void record_matmul_texture3d( + api::Context* context, + api::vTensor& out, + api::vTensor& mat1, + api::vTensor& mat2); + // // Input & Output Utilities // diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp index 59b3ee42dc8..6dad9974e79 100644 --- a/backends/vulkan/test/vulkan_compute_api_test.cpp +++ b/backends/vulkan/test/vulkan_compute_api_test.cpp @@ -258,6 +258,48 @@ TEST_F(VulkanComputeAPITest, calculate_tensor_strides_test) { } } +TEST_F(VulkanComputeAPITest, virtual_transpose_test) { + std::vector sizes = {7, 9, 11, 13}; + // (dim0, dim1), new_sizes, new_dim_order, new_axis_map, new_packed_dim_idx + std::vector>> test_cases = { + {{2, 3}, {7, 9, 13, 11}, {0, 1, 3, 2}, {1, 0, 2, 2}, {1}}, + {{2, 1}, {7, 11, 9, 13}, {0, 2, 1, 3}, {0, 2, 1, 2}, {0}}, + {{1, 3}, {7, 13, 11, 9}, {0, 3, 2, 1}, {2, 1, 0, 2}, {2}}, + }; + + for (const auto& test_case : test_cases) { + const int dim0 = test_case.at(0).at(0); + const int dim1 = test_case.at(0).at(1); + + const auto& expected_sizes = test_case.at(1); + const auto& expected_dim_order = test_case.at(2); + const auto& expected_axis_map = test_case.at(3); + const int expected_packed_dim = test_case.at(4).at(0); + + { + vTensor a_buffer = vTensor( + context(), sizes, vkapi::kFloat, utils::kBuffer, utils::kWidthPacked); + + a_buffer.virtual_transpose(dim0, dim1); + EXPECT_TRUE(a_buffer.sizes() == expected_sizes); + EXPECT_TRUE(a_buffer.dim_order() == expected_dim_order); + } + + { + vTensor a_texture = vTensor( + context(), + sizes, + vkapi::kFloat, + utils::kTexture3D, + utils::kWidthPacked); + a_texture.virtual_transpose(dim0, dim1); + EXPECT_TRUE(a_texture.sizes() == expected_sizes); + EXPECT_TRUE(a_texture.axis_map() == expected_axis_map); + EXPECT_TRUE(a_texture.packed_dim_whcn_idx() == expected_packed_dim); + } + } +} + TEST_F(VulkanComputeAPITest, vec_test) { utils::vec3 v3({1, 2, 3}); ASSERT_TRUE(v3[0] == 1); @@ -637,46 +679,58 @@ TEST_F(VulkanComputeAPITest, tensor_no_copy_transpose_test) { constexpr int N = 17; std::vector mat1_sizes = {M, K}; std::vector mat2_sizes = {N, K}; - std::vector mat2_t_sizes = {K, N}; std::vector out_sizes = {M, N}; - std::vector transposed_dim_order = {1, 0}; - - vTensor mat1 = CREATE_FLOAT_BUFFER(mat1_sizes, /*allocate_memory=*/true); - vTensor mat2 = CREATE_FLOAT_BUFFER(mat2_sizes, /*allocate_memory=*/true); - vTensor out = CREATE_FLOAT_BUFFER(out_sizes, /*allocate_memory=*/true); - - // Generate data - std::vector mat1_data = - create_random_float_buffer(mat1.staging_buffer_numel()); - std::vector mat2_data = - create_random_float_buffer(mat2.staging_buffer_numel()); - - // Create direct view and modify sizes and strides later - vTensor mat2_t = vTensor(mat2); - - std::vector mat2_t_data = transpose_matrix(mat2_data, N, K); - std::vector ref_out = - compute_reference_matmul(mat1_data, mat2_t_data, M, K, N); - - // Fill original tensor with some data - fill_vtensor(mat1, mat1_data); - fill_vtensor(mat2, mat2_data); - - record_reference_matmul(api::context(), out, mat1, mat2_t); + for (const auto storage_type : {utils::kTexture3D, utils::kBuffer}) { + vTensor mat1 = vTensor( + context(), + mat1_sizes, + vkapi::kFloat, + storage_type, + utils::kWidthPacked); + vTensor mat2 = vTensor( + context(), + mat2_sizes, + vkapi::kFloat, + storage_type, + utils::kWidthPacked); + vTensor out = vTensor( + context(), out_sizes, vkapi::kFloat, storage_type, utils::kWidthPacked); + + // Generate data + std::vector mat1_data = + create_random_float_buffer(mat1.staging_buffer_numel()); + std::vector mat2_data = + create_random_float_buffer(mat2.staging_buffer_numel()); + + // Create direct view and modify sizes and strides later + vTensor mat2_t = vTensor(mat2); + // Update sizes and strides of mat2_t to be that of a transposed tensor + mat2_t.virtual_transpose(0, 1); + + EXPECT_TRUE(mat2_t.gpu_memory_layout() == utils::kHeightPacked); + + std::vector mat2_t_data = transpose_matrix(mat2_data, N, K); + std::vector ref_out = + compute_reference_matmul(mat1_data, mat2_t_data, M, K, N); - // Update sizes and strides of mat2_t to be that of a transposed tensor - mat2_t.virtual_reconfigure(mat2_t_sizes, transposed_dim_order); - EXPECT_TRUE(mat2_t.gpu_memory_layout() == utils::kHeightPacked); + // Fill original tensor with some data + fill_vtensor(mat1, mat1_data); + fill_vtensor(mat2, mat2_data); - std::vector data_out(out.staging_buffer_numel()); - // Extract the copy tensor; should contain the data of the original tensor - extract_vtensor(out, data_out); + if (storage_type == utils::kTexture3D) { + record_matmul_texture3d(context(), out, mat1, mat2_t); + } else { + record_reference_matmul(context(), out, mat1, mat2_t); + } - EXPECT_TRUE(data_out.size() == ref_out.size()); + std::vector data_out(out.staging_buffer_numel()); + // Extract the copy tensor; should contain the data of the original tensor + extract_vtensor(out, data_out); - for (size_t i = 0; i < data_out.size(); ++i) { - EXPECT_TRUE(check_close(data_out[i], ref_out[i])); + for (size_t i = 0; i < ref_out.size(); ++i) { + EXPECT_TRUE(check_close(data_out[i], ref_out[i])); + } } }