Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize Vulkan command buffer submission rate. #49112

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
96a86e3
Optimize Vulkan command buffer submission rate.
Dec 9, 2020
c223eac
Update on "Optimize Vulkan command buffer submission rate."
Dec 9, 2020
f653f59
Update on "Optimize Vulkan command buffer submission rate."
Dec 15, 2020
e79df81
Update on "Optimize Vulkan command buffer submission rate."
Dec 15, 2020
181fe25
Update on "Optimize Vulkan command buffer submission rate."
Dec 16, 2020
7a5b5ab
Update on "Optimize Vulkan command buffer submission rate."
Dec 17, 2020
96b756a
Update on "Optimize Vulkan command buffer submission rate."
Dec 20, 2020
513520d
Update on "Optimize Vulkan command buffer submission rate."
Dec 21, 2020
64fafd0
Update on "Optimize Vulkan command buffer submission rate."
Dec 30, 2020
4f3b9f2
Update on "Optimize Vulkan command buffer submission rate."
Dec 31, 2020
1ce1c68
Update on "Optimize Vulkan command buffer submission rate."
Jan 2, 2021
d2886e1
Update on "Optimize Vulkan command buffer submission rate."
Jan 2, 2021
580b46f
Update on "Optimize Vulkan command buffer submission rate."
Jan 2, 2021
5278b89
Update on "Optimize Vulkan command buffer submission rate."
Jan 6, 2021
8f0746f
Update on "Optimize Vulkan command buffer submission rate."
Jan 6, 2021
2dcb123
Update on "Optimize Vulkan command buffer submission rate."
Jan 7, 2021
ee32d7d
Update on "Optimize Vulkan command buffer submission rate."
Jan 7, 2021
8742826
Update on "Optimize Vulkan command buffer submission rate."
Jan 7, 2021
9c1750d
Update on "Optimize Vulkan command buffer submission rate."
Jan 8, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ cmake_dependent_option(
USE_VALGRIND "Use Valgrind. Only available on Linux." ON
"LINUX" OFF)
option(USE_VULKAN "Use Vulkan GPU backend" OFF)
option(USE_VULKAN_FP16_INFERENCE "Vulkan - Use fp16 inference even on fp32 tensors" ON)
option(USE_VULKAN_FP16_INFERENCE "Vulkan - Use fp16 inference even on fp32 tensors" OFF)
option(USE_VULKAN_RELAXED_PRECISION "Vulkan - Use relaxed precision math in the kernels (mediump)" OFF)
option(USE_VULKAN_SHADERC_RUNTIME "Vulkan - Use runtime shader compilation (needs libshaderc)" OFF)
option(USE_VULKAN_WRAPPER "Vulkan - Dynamically load Vulkan functions" ON)
Expand Down
4 changes: 4 additions & 0 deletions aten/src/ATen/native/vulkan/api/Cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ class Cache final {
Factory factory_;
};

//
// Impl
//

template<typename Factory>
inline Cache<Factory>::Cache(Factory factory)
: factory_(std::move(factory)) {
Expand Down
273 changes: 185 additions & 88 deletions aten/src/ATen/native/vulkan/api/Command.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,25 @@ Command::Buffer::Buffer(const VkCommandBuffer command_buffer)
"Invalid Vulkan command buffer!");
}

Command::Buffer::Buffer(Buffer&& buffer)
: command_buffer_(std::move(buffer.command_buffer_)),
bound_(std::move(buffer.bound_)),
barriers_(std::move(buffer.barriers_)) {
buffer.invalidate();
}

Command::Buffer& Command::Buffer::operator=(Buffer&& buffer) {
if (&buffer != this) {
command_buffer_ = std::move(buffer.command_buffer_);
bound_ = std::move(buffer.bound_);
barriers_ = std::move(buffer.barriers_);

buffer.invalidate();
};

return *this;
}

void Command::Buffer::Buffer::begin() {
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
command_buffer_,
Expand Down Expand Up @@ -107,69 +126,6 @@ void Command::Buffer::Buffer::end() {
VK_CHECK(vkEndCommandBuffer(command_buffer_));
}

void Command::Buffer::barrier() {
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
command_buffer_,
"This command buffer is in an invalid state! "
"Potential reason: This command buffer is moved from.");

if (barriers_.stage) {
c10::SmallVector<VkBufferMemoryBarrier, 4u> buffer_memory_barriers;

for (const Resource::Buffer::Barrier& barrier : barriers_.buffers) {
buffer_memory_barriers.push_back({
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
nullptr,
barrier.memory.src,
barrier.memory.dst,
VK_QUEUE_FAMILY_IGNORED,
VK_QUEUE_FAMILY_IGNORED,
barrier.object.handle,
barrier.object.offset,
barrier.object.range,
});
}

c10::SmallVector<VkImageMemoryBarrier, 4u> image_memory_barriers;

for (const Resource::Image::Barrier& barrier : barriers_.images) {
image_memory_barriers.push_back({
VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
nullptr,
barrier.memory.src,
barrier.memory.dst,
barrier.layout.src,
barrier.layout.dst,
VK_QUEUE_FAMILY_IGNORED,
VK_QUEUE_FAMILY_IGNORED,
barrier.object.handle,
{
VK_IMAGE_ASPECT_COLOR_BIT,
0u,
VK_REMAINING_MIP_LEVELS,
0u,
VK_REMAINING_ARRAY_LAYERS,
},
});
}

vkCmdPipelineBarrier(
command_buffer_,
barriers_.stage.src,
barriers_.stage.dst,
0u,
0u,
nullptr,
buffer_memory_barriers.size(),
buffer_memory_barriers.data(),
image_memory_barriers.size(),
image_memory_barriers.data());
}

// Reset
barriers_.reset();
}

void Command::Buffer::barrier(const Pipeline::Barrier& barrier) {
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
command_buffer_,
Expand Down Expand Up @@ -291,31 +247,86 @@ void Command::Buffer::dispatch(
bound_.pipeline.local_work_group.data[2u]));
}

void Command::Buffer::submit(
const VkQueue queue,
const Resource::Fence fence) {
void Command::Buffer::barrier() {
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
command_buffer_,
"This command buffer is in an invalid state! "
"Potential reason: This command buffer is moved from.");

TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
queue,
"Invalid Vulkan queue!");
if (barriers_.stage) {
c10::SmallVector<VkBufferMemoryBarrier, 4u> buffer_memory_barriers;

const VkSubmitInfo submit_info{
VK_STRUCTURE_TYPE_SUBMIT_INFO,
nullptr,
0u,
nullptr,
nullptr,
1u,
&command_buffer_,
0u,
nullptr,
};
for (const Resource::Buffer::Barrier& barrier : barriers_.buffers) {
buffer_memory_barriers.push_back({
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
nullptr,
barrier.memory.src,
barrier.memory.dst,
VK_QUEUE_FAMILY_IGNORED,
VK_QUEUE_FAMILY_IGNORED,
barrier.object.handle,
barrier.object.offset,
barrier.object.range,
});
}

c10::SmallVector<VkImageMemoryBarrier, 4u> image_memory_barriers;

VK_CHECK(vkQueueSubmit(queue, 1u, &submit_info, fence.handle()));
for (const Resource::Image::Barrier& barrier : barriers_.images) {
image_memory_barriers.push_back({
VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
nullptr,
barrier.memory.src,
barrier.memory.dst,
barrier.layout.src,
barrier.layout.dst,
VK_QUEUE_FAMILY_IGNORED,
VK_QUEUE_FAMILY_IGNORED,
barrier.object.handle,
{
VK_IMAGE_ASPECT_COLOR_BIT,
0u,
VK_REMAINING_MIP_LEVELS,
0u,
VK_REMAINING_ARRAY_LAYERS,
},
});
}

vkCmdPipelineBarrier(
command_buffer_,
barriers_.stage.src,
barriers_.stage.dst,
0u,
0u,
nullptr,
buffer_memory_barriers.size(),
buffer_memory_barriers.data(),
image_memory_barriers.size(),
image_memory_barriers.data());
}

// Reset
barriers_.reset();
}

void Command::Buffer::invalidate() {
command_buffer_ = VK_NULL_HANDLE;
}

inline void Command::Buffer::Bound::reset() {
pipeline = {};
descriptor_set = VK_NULL_HANDLE;
}

inline Command::Buffer::Barrier::Stage::operator bool() const {
return (0u != src) || (0u != dst);
}

inline void Command::Buffer::Barrier::reset() {
stage = {};
buffers.clear();
images.clear();
}

Command::Pool::Pool(const GPU& gpu)
Expand All @@ -338,17 +349,19 @@ Command::Pool::Pool(const GPU& gpu)
Command::Pool::Pool(Pool&& pool)
: device_(std::move(pool.device_)),
command_pool_(std::move(pool.command_pool_)),
buffer_(std::move(pool.buffer_)) {
pool.device_ = VK_NULL_HANDLE;
buffer_(std::move(pool.buffer_)),
stream_(std::move(pool.stream_)) {
pool.invalidate();
}

Command::Pool& Command::Pool::operator=(Pool&& pool) {
if (&pool != this) {
device_ = std::move(pool.device_);
command_pool_ = std::move(pool.command_pool_);
buffer_ = std::move(pool.buffer_);
stream_ = std::move(pool.stream_);

pool.device_ = VK_NULL_HANDLE;
pool.invalidate();
};

return *this;
Expand Down Expand Up @@ -383,25 +396,109 @@ Command::Buffer Command::Pool::allocate() {
Configuration::kQuantum);

allocate_command_buffers(
device_,
command_pool_.get(),
buffer_.pool.data() + buffer_.in_use,
Configuration::kQuantum);
device_,
command_pool_.get(),
buffer_.pool.data() + buffer_.in_use,
Configuration::kQuantum);
}

return Buffer(buffer_.pool[buffer_.in_use++]);
}

Command::Buffer& Command::Pool::stream() {
if (!stream_.buffer) {
stream_.buffer = allocate();
stream_.buffer.begin();
stream_.counter = 0u;
}

return stream_.buffer;
}

void Command::Pool::purge() {
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
device_ && command_pool_,
"This command pool is in an invalid state! "
"Potential reason: This command pool is moved from.");

TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
!stream_.buffer,
"Pending command buffer detected. Make sure all command buffers are "
"submitted to the queue for execution prior to reclaiming pool memory.");

buffer_.in_use = 0u;
VK_CHECK(vkResetCommandPool(device_, command_pool_.get(), 0u));
}

void Command::Pool::submit(
const VkQueue queue,
const c10::ArrayRef<const Buffer> buffers,
const Resource::Fence fence) {
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
device_ && command_pool_,
"This command pool is in an invalid state! "
"Potential reason: This command pool is moved from.");

TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
queue,
"Invalid Vulkan queue!");

c10::SmallVector<VkCommandBuffer, Configuration::kReserve> command_buffers;
command_buffers.reserve(buffers.size());

for (const Buffer& buffer : buffers) {
VkCommandBuffer command_buffer = buffer.handle();

TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
command_buffer,
"Invalid Vulkan command buffer!");

// Are we submitting our one and only command stream, or a regular command
// buffer whose scope is manually maintained by the user? Automatically
// maintain state and submission rate if the former.

if (stream_.buffer.handle() == command_buffer) {
// Hand the stream off to the driver if:
// - The user has implictly signaled interest in the results via a fence.
// - We are over the submission cutoff. We don't want to starve the GPU.

if (fence || (stream_.counter++ > Configuration::kSubmit)) {
stream_.buffer.end();
stream_.buffer.invalidate();
}
// Skip - Accumulate more calls prior to submission.
else {
command_buffer = VK_NULL_HANDLE;
}
}

if (command_buffer) {
command_buffers.push_back(command_buffer);
}
}

if (!command_buffers.empty()) {
const VkSubmitInfo submit_info{
VK_STRUCTURE_TYPE_SUBMIT_INFO,
nullptr,
0u,
nullptr,
nullptr,
command_buffers.size(),
command_buffers.data(),
0u,
nullptr,
};

VK_CHECK(vkQueueSubmit(queue, 1u, &submit_info, fence.handle()));
}
}

void Command::Pool::invalidate() {
device_ = VK_NULL_HANDLE;
command_pool_.reset();
}

} // namespace api
} // namespace vulkan
} // namespace native
Expand Down