From b069a05732a51ccf85ac5c525e37022d589c5573 Mon Sep 17 00:00:00 2001 From: Austin Ho Date: Sun, 12 May 2024 17:54:16 +0000 Subject: [PATCH] #0: Add assertion to ensure writing to issue queue is properly gated by fetch queue Fix corner case in EnqueueTerminateCommand where host was reserving space for both prefetch cmds at once, but fetch queue gets each command as separate entries. Host could have potentially wrapped but device would not have --- tt_metal/impl/dispatch/command_queue.cpp | 29 +++++++++---------- .../impl/dispatch/command_queue_interface.hpp | 6 +++- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/tt_metal/impl/dispatch/command_queue.cpp b/tt_metal/impl/dispatch/command_queue.cpp index b7589e10967..913eca885fe 100644 --- a/tt_metal/impl/dispatch/command_queue.cpp +++ b/tt_metal/impl/dispatch/command_queue.cpp @@ -1162,25 +1162,24 @@ EnqueueTerminateCommand::EnqueueTerminateCommand( command_queue_id(command_queue_id), device(device), manager(manager) {} void EnqueueTerminateCommand::process() { - uint32_t cmd_sequence_sizeB = - CQ_PREFETCH_CMD_BARE_MIN_SIZE + // CQ_PREFETCH_CMD_RELAY_INLINE + CQ_DISPATCH_CMD_TERMINATE - CQ_PREFETCH_CMD_BARE_MIN_SIZE; // CQ_PREFETCH_CMD_TERMINATE + // CQ_PREFETCH_CMD_RELAY_INLINE + CQ_DISPATCH_CMD_TERMINATE + // CQ_PREFETCH_CMD_TERMINATE + uint32_t cmd_sequence_sizeB = CQ_PREFETCH_CMD_BARE_MIN_SIZE; + // dispatch and prefetch terminate commands each needs to be a separate fetch queue entry void *cmd_region = this->manager.issue_queue_reserve(cmd_sequence_sizeB, this->command_queue_id); - - HugepageDeviceCommand command_sequence(cmd_region, cmd_sequence_sizeB); - command_sequence.add_dispatch_terminate(); - command_sequence.add_prefetch_terminate(); - + HugepageDeviceCommand dispatch_command_sequence(cmd_region, cmd_sequence_sizeB); + dispatch_command_sequence.add_dispatch_terminate(); this->manager.issue_queue_push_back(cmd_sequence_sizeB, this->command_queue_id); + this->manager.fetch_queue_reserve_back(this->command_queue_id); + this->manager.fetch_queue_write(cmd_sequence_sizeB, this->command_queue_id); - // Command sequence has dispatch and prefetch terminate commands but each needs to be a separate fetch queue entry - TT_ASSERT(cmd_sequence_sizeB % CQ_PREFETCH_CMD_BARE_MIN_SIZE == 0); - uint32_t num_terminate_cmds = cmd_sequence_sizeB / CQ_PREFETCH_CMD_BARE_MIN_SIZE; - for (int terminate_cmd_idx = 0; terminate_cmd_idx < num_terminate_cmds; terminate_cmd_idx++) { - this->manager.fetch_queue_reserve_back(this->command_queue_id); - this->manager.fetch_queue_write(CQ_PREFETCH_CMD_BARE_MIN_SIZE, this->command_queue_id); - } + cmd_region = this->manager.issue_queue_reserve(cmd_sequence_sizeB, this->command_queue_id); + HugepageDeviceCommand prefetch_command_sequence(cmd_region, cmd_sequence_sizeB); + prefetch_command_sequence.add_prefetch_terminate(); + this->manager.issue_queue_push_back(cmd_sequence_sizeB, this->command_queue_id); + this->manager.fetch_queue_reserve_back(this->command_queue_id); + this->manager.fetch_queue_write(cmd_sequence_sizeB, this->command_queue_id); } diff --git a/tt_metal/impl/dispatch/command_queue_interface.hpp b/tt_metal/impl/dispatch/command_queue_interface.hpp index a868adbf4c1..411c01eda21 100644 --- a/tt_metal/impl/dispatch/command_queue_interface.hpp +++ b/tt_metal/impl/dispatch/command_queue_interface.hpp @@ -329,9 +329,9 @@ class SystemMemoryManager { } this->channel_offset = MAX_HUGEPAGE_SIZE * channel; + CoreType core_type = dispatch_core_manager::get(num_hw_cqs).get_dispatch_core_type(device_id); for (uint8_t cq_id = 0; cq_id < num_hw_cqs; cq_id++) { tt_cxy_pair prefetcher_core = dispatch_core_manager::get(num_hw_cqs).prefetcher_core(device_id, channel, cq_id); - CoreType core_type = dispatch_core_manager::get(num_hw_cqs).get_dispatch_core_type(device_id); tt_cxy_pair prefetcher_physical_core = tt_cxy_pair(prefetcher_core.chip, tt::get_physical_core_coordinate(prefetcher_core, core_type)); this->prefetcher_cores[cq_id] = prefetcher_physical_core; @@ -341,6 +341,10 @@ class SystemMemoryManager { this->completion_byte_addrs[cq_id] = completion_tlb_offset + CQ_COMPLETION_READ_PTR % completion_tlb_size; this->cq_interfaces.push_back(SystemMemoryCQInterface(channel, cq_id, this->cq_size)); + // Prefetch queue acts as the sync mechanism to ensure that issue queue has space to write, so issue queue must be as large as the max amount of space the prefetch queue can specify + // Plus 1 to handle wrapping + // Plus 1 to allow us to start writing to issue queue before we reserve space in the prefetch queue + TT_FATAL(dispatch_constants::get(core_type).max_prefetch_command_size() * (dispatch_constants::get(core_type).prefetch_q_entries() + 2) <= this->get_issue_queue_size(cq_id)); this->cq_to_event.push_back(0); this->cq_to_last_completed_event.push_back(0); this->prefetch_q_dev_ptrs[cq_id] = dispatch_constants::PREFETCH_Q_BASE;